From 8af60dae0911d6d087c9fc11f27b947f867589e2 Mon Sep 17 00:00:00 2001
From: Dmitry Shmidt <dimitrysh@google.com>
Date: Tue, 24 Jun 2014 09:36:50 -0700
Subject: [PATCH 0001/1185] net: wireless: Increase scan entry expiration to
 fit new scan time

Change-Id: I0e23ce45d78d7c17633670973f49943a5ed6032d
Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>
---
 net/wireless/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 41b0f96a933f..3ebf125bc99e 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -55,7 +55,7 @@
  * also linked into the probe response struct.
  */
 
-#define IEEE80211_SCAN_RESULT_EXPIRE	(3 * HZ)
+#define IEEE80211_SCAN_RESULT_EXPIRE	(7 * HZ)
 
 static void bss_free(struct cfg80211_internal_bss *bss)
 {

From f6f56efe7de93cd091ee456921a6043da370c22a Mon Sep 17 00:00:00 2001
From: Dmitry Shmidt <dimitrysh@google.com>
Date: Tue, 24 Jun 2014 13:19:46 -0700
Subject: [PATCH 0002/1185] net: wireless: Add NL80211_FLAG_NEED_WIPHY flag to
 vendor command

Change-Id: I52ee3bc8a422c2a4c57cccccccd6ba3e721b4c01
Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>
---
 net/wireless/nl80211.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ddb993fb0d38..150a38fc346e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9153,7 +9153,8 @@ static struct genl_ops nl80211_ops[] = {
 		.doit = nl80211_vendor_cmd,
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
-		.internal_flags = NL80211_FLAG_NEED_RTNL,
+		.internal_flags = NL80211_FLAG_NEED_WIPHY |
+				  NL80211_FLAG_NEED_RTNL,
 	},
 };
 

From 364a09b74ecbcee9411c3677212f0392d3c876de Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Tue, 23 Jul 2013 17:38:41 -0400
Subject: [PATCH 0003/1185] SELinux: Enable setting security contexts on rootfs
 inodes.

rootfs (ramfs) can support setting of security contexts
by userspace due to the vfs fallback behavior of calling
the security module to set the in-core inode state
for security.* attributes when the filesystem does not
provide an xattr handler.  No xattr handler required
as the inodes are pinned in memory and have no backing
store.

This is useful in allowing early userspace to label individual
files within a rootfs while still providing a policy-defined
default via genfs.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <pmoore@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 security/selinux/hooks.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 44087388010c..a8de30bd733a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -422,6 +422,13 @@ static int sb_finish_set_opts(struct super_block *sb)
 	if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
 		sbsec->flags |= SE_SBLABELSUPP;
 
+	/*
+	 * Special handling for rootfs. Is genfs but supports
+	 * setting SELinux context on in-core inodes.
+	 */
+	if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0)
+		sbsec->flags |= SE_SBLABELSUPP;
+
 	/* Initialize the root inode. */
 	rc = inode_doinit_with_dentry(root_inode, root);
 

From 99a6ea48b591877d1cd6a51732c40a1d5321d961 Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Mon, 31 Mar 2014 16:23:51 +0900
Subject: [PATCH 0004/1185] net: core: Support UID-based routing.

This contains the following commits:

1. cc2f522 net: core: Add a UID range to fib rules.
2. d7ed2bd net: core: Use the socket UID in routing lookups.
3. 2f9306a net: core: Add a RTA_UID attribute to routes.
    This is so that userspace can do per-UID route lookups.
4. 8e46efb net: ipv6: Use the UID in IPv6 PMTUD
    IPv4 PMTUD already does this because ipv4_sk_update_pmtu
    uses __build_flow_key, which includes the UID.

Bug: 15413527
Change-Id: I81bd31dae655de9cce7d7a1f9a905dc1c2feba7c
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/net/fib_rules.h          |  6 +++-
 include/net/flow.h               |  9 +++++-
 include/net/ip.h                 |  1 +
 include/net/ip6_route.h          |  2 +-
 include/net/route.h              |  5 +--
 include/uapi/linux/fib_rules.h   |  2 ++
 include/uapi/linux/rtnetlink.h   |  1 +
 net/core/fib_rules.c             | 53 ++++++++++++++++++++++++++++++--
 net/ipv4/fib_frontend.c          |  1 +
 net/ipv4/inet_connection_sock.c  |  6 ++--
 net/ipv4/ip_output.c             |  3 +-
 net/ipv4/ping.c                  |  3 +-
 net/ipv4/raw.c                   |  3 +-
 net/ipv4/route.c                 | 25 +++++++++++----
 net/ipv4/syncookies.c            |  3 +-
 net/ipv4/udp.c                   |  3 +-
 net/ipv6/af_inet6.c              |  1 +
 net/ipv6/ah6.c                   |  2 +-
 net/ipv6/datagram.c              |  1 +
 net/ipv6/esp6.c                  |  2 +-
 net/ipv6/icmp.c                  |  2 +-
 net/ipv6/inet6_connection_sock.c |  2 ++
 net/ipv6/ipcomp6.c               |  2 +-
 net/ipv6/ping.c                  |  1 +
 net/ipv6/raw.c                   |  1 +
 net/ipv6/route.c                 | 12 ++++++--
 net/ipv6/syncookies.c            |  1 +
 net/ipv6/tcp_ipv6.c              |  1 +
 net/ipv6/udp.c                   |  1 +
 29 files changed, 129 insertions(+), 26 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index e361f4882426..4ac12e14c6d9 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -23,6 +23,8 @@ struct fib_rule {
 	struct fib_rule __rcu	*ctarget;
 	char			iifname[IFNAMSIZ];
 	char			oifname[IFNAMSIZ];
+	kuid_t			uid_start;
+	kuid_t			uid_end;
 	struct rcu_head		rcu;
 	struct net *		fr_net;
 };
@@ -80,7 +82,9 @@ struct fib_rules_ops {
 	[FRA_FWMARK]	= { .type = NLA_U32 }, \
 	[FRA_FWMASK]	= { .type = NLA_U32 }, \
 	[FRA_TABLE]     = { .type = NLA_U32 }, \
-	[FRA_GOTO]	= { .type = NLA_U32 }
+	[FRA_GOTO]	= { .type = NLA_U32 }, \
+	[FRA_UID_START]	= { .type = NLA_U32 }, \
+	[FRA_UID_END]	= { .type = NLA_U32 }
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
diff --git a/include/net/flow.h b/include/net/flow.h
index 628e11b98c58..c91e2aae3fb1 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -10,6 +10,7 @@
 #include <linux/socket.h>
 #include <linux/in6.h>
 #include <linux/atomic.h>
+#include <linux/uidgid.h>
 
 struct flowi_common {
 	int	flowic_oif;
@@ -23,6 +24,7 @@ struct flowi_common {
 #define FLOWI_FLAG_CAN_SLEEP		0x02
 #define FLOWI_FLAG_KNOWN_NH		0x04
 	__u32	flowic_secid;
+	kuid_t	flowic_uid;
 };
 
 union flowi_uli {
@@ -59,6 +61,7 @@ struct flowi4 {
 #define flowi4_proto		__fl_common.flowic_proto
 #define flowi4_flags		__fl_common.flowic_flags
 #define flowi4_secid		__fl_common.flowic_secid
+#define flowi4_uid		__fl_common.flowic_uid
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -78,7 +81,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 				      __u32 mark, __u8 tos, __u8 scope,
 				      __u8 proto, __u8 flags,
 				      __be32 daddr, __be32 saddr,
-				      __be16 dport, __be16 sport)
+				      __be16 dport, __be16 sport,
+				      kuid_t uid)
 {
 	fl4->flowi4_oif = oif;
 	fl4->flowi4_iif = 0;
@@ -88,6 +92,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 	fl4->flowi4_proto = proto;
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
+	fl4->flowi4_uid = uid;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
 	fl4->fl4_dport = dport;
@@ -115,6 +120,7 @@ struct flowi6 {
 #define flowi6_proto		__fl_common.flowic_proto
 #define flowi6_flags		__fl_common.flowic_flags
 #define flowi6_secid		__fl_common.flowic_secid
+#define flowi6_uid		__fl_common.flowic_uid
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	__be32			flowlabel;
@@ -158,6 +164,7 @@ struct flowi {
 #define flowi_proto	u.__fl_common.flowic_proto
 #define flowi_flags	u.__fl_common.flowic_flags
 #define flowi_secid	u.__fl_common.flowic_secid
+#define flowi_uid	u.__fl_common.flowic_uid
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
diff --git a/include/net/ip.h b/include/net/ip.h
index 509b88079270..02fc145ecc42 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -153,6 +153,7 @@ struct ip_reply_arg {
 				/* -1 if not needed */ 
 	int	    bound_dev_if;
 	u8  	    tos;
+	kuid_t	    uid;
 }; 
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 260f83f16bcf..25b4500f28c9 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -131,7 +131,7 @@ extern int			rt6_route_rcv(struct net_device *dev,
 					      const struct in6_addr *gwaddr);
 
 extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
-			    int oif, u32 mark);
+			    int oif, u32 mark, kuid_t uid);
 extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk,
 			       __be32 mtu);
 extern void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark);
diff --git a/include/net/route.h b/include/net/route.h
index 2ea40c1b5e00..b5b44875543e 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -142,7 +142,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
 	flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
 			   RT_SCOPE_UNIVERSE, proto,
 			   sk ? inet_sk_flowi_flags(sk) : 0,
-			   daddr, saddr, dport, sport);
+			   daddr, saddr, dport, sport, sock_i_uid(sk));
 	if (sk)
 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
 	return ip_route_output_flow(net, fl4, sk);
@@ -253,7 +253,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
 		flow_flags |= FLOWI_FLAG_CAN_SLEEP;
 
 	flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
-			   protocol, flow_flags, dst, src, dport, sport);
+			   protocol, flow_flags, dst, src, dport, sport,
+			   sock_i_uid(sk));
 }
 
 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 51da65b68b85..9dcdb6251cb8 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -49,6 +49,8 @@ enum {
 	FRA_TABLE,	/* Extended table id */
 	FRA_FWMASK,	/* mask for netfilter mark */
 	FRA_OIFNAME,
+	FRA_UID_START,	/* UID range */
+	FRA_UID_END,
 	__FRA_MAX
 };
 
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 7a2144e1afae..07c1146c1f51 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -297,6 +297,7 @@ enum rtattr_type_t {
 	RTA_TABLE,
 	RTA_MARK,
 	RTA_MFC_STATS,
+	RTA_UID,
 	__RTA_MAX
 };
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 0e9131195eb0..a40a876b8559 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -31,6 +31,8 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 	r->pref = pref;
 	r->table = table;
 	r->flags = flags;
+	r->uid_start = INVALID_UID;
+	r->uid_end = INVALID_UID;
 	r->fr_net = hold_net(ops->fro_net);
 
 	/* The lock is not required here, the list in unreacheable
@@ -179,6 +181,23 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
 }
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 
+static inline kuid_t fib_nl_uid(struct nlattr *nla)
+{
+	return make_kuid(current_user_ns(), nla_get_u32(nla));
+}
+
+static int nla_put_uid(struct sk_buff *skb, int idx, kuid_t uid)
+{
+	return nla_put_u32(skb, idx, from_kuid_munged(current_user_ns(), uid));
+}
+
+static int fib_uid_range_match(struct flowi *fl, struct fib_rule *rule)
+{
+	return (!uid_valid(rule->uid_start) && !uid_valid(rule->uid_end)) ||
+	       (uid_gte(fl->flowi_uid, rule->uid_start) &&
+		uid_lte(fl->flowi_uid, rule->uid_end));
+}
+
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 			  struct flowi *fl, int flags)
 {
@@ -193,6 +212,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 	if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
 		goto out;
 
+	if (!fib_uid_range_match(fl, rule))
+		goto out;
+
 	ret = ops->match(rule, fl, flags);
 out:
 	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -363,6 +385,19 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 	} else if (rule->action == FR_ACT_GOTO)
 		goto errout_free;
 
+	/* UID start and end must either both be valid or both unspecified. */
+	rule->uid_start = rule->uid_end = INVALID_UID;
+	if (tb[FRA_UID_START] || tb[FRA_UID_END]) {
+		if (tb[FRA_UID_START] && tb[FRA_UID_END]) {
+			rule->uid_start = fib_nl_uid(tb[FRA_UID_START]);
+			rule->uid_end = fib_nl_uid(tb[FRA_UID_END]);
+		}
+		if (!uid_valid(rule->uid_start) ||
+		    !uid_valid(rule->uid_end) ||
+		    !uid_lte(rule->uid_start, rule->uid_end))
+		goto errout_free;
+	}
+
 	err = ops->configure(rule, skb, frh, tb);
 	if (err < 0)
 		goto errout_free;
@@ -469,6 +504,14 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 		    (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
 			continue;
 
+		if (tb[FRA_UID_START] &&
+		    !uid_eq(rule->uid_start, fib_nl_uid(tb[FRA_UID_START])))
+			continue;
+
+		if (tb[FRA_UID_END] &&
+		    !uid_eq(rule->uid_end, fib_nl_uid(tb[FRA_UID_END])))
+			continue;
+
 		if (!ops->compare(rule, frh, tb))
 			continue;
 
@@ -525,7 +568,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 			 + nla_total_size(4) /* FRA_PRIORITY */
 			 + nla_total_size(4) /* FRA_TABLE */
 			 + nla_total_size(4) /* FRA_FWMARK */
-			 + nla_total_size(4); /* FRA_FWMASK */
+			 + nla_total_size(4) /* FRA_FWMASK */
+			 + nla_total_size(4) /* FRA_UID_START */
+			 + nla_total_size(4); /* FRA_UID_END */
 
 	if (ops->nlmsg_payload)
 		payload += ops->nlmsg_payload(rule);
@@ -579,7 +624,11 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	    ((rule->mark_mask || rule->mark) &&
 	     nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
 	    (rule->target &&
-	     nla_put_u32(skb, FRA_GOTO, rule->target)))
+	     nla_put_u32(skb, FRA_GOTO, rule->target)) ||
+	    (uid_valid(rule->uid_start) &&
+	     nla_put_uid(skb, FRA_UID_START, rule->uid_start)) ||
+	    (uid_valid(rule->uid_end) &&
+	     nla_put_uid(skb, FRA_UID_END, rule->uid_end)))
 		goto nla_put_failure;
 	if (ops->fill(rule, skb, frh) < 0)
 		goto nla_put_failure;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c7629a209f9d..ffffeb448ec4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -531,6 +531,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
 	[RTA_METRICS]		= { .type = NLA_NESTED },
 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
 	[RTA_FLOW]		= { .type = NLA_U32 },
+	[RTA_UID]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 442087d371f6..6dfec2f18214 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -422,7 +422,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 			   sk->sk_protocol,
 			   flags,
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
-			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport,
+			   sock_i_uid(sk));
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
@@ -458,7 +459,8 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
-			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport,
+			   sock_i_uid(sk));
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c2ee385cecfb..8e20e9405582 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1503,7 +1503,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
 			   ip_reply_arg_flowi_flags(arg),
 			   daddr, saddr,
-			   tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
+			   tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
+			   arg->uid);
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(net, &fl4);
 	if (IS_ERR(rt))
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 111e5a409594..b83d82951cad 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -768,7 +768,8 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
 			   RT_SCOPE_UNIVERSE, sk->sk_protocol,
-			   inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
+			   inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
+			   sock_i_uid(sk));
 
 	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_flow(net, &fl4, sk);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dd44e0ab600c..b8287330c579 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -572,7 +572,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			   RT_SCOPE_UNIVERSE,
 			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
 			   inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
-			   daddr, saddr, 0, 0);
+			   daddr, saddr, 0, 0,
+			   sock_i_uid(sk));
 
 	if (!inet->hdrincl) {
 		err = raw_probe_proto_opt(&fl4, msg);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c04359196ebc..ef2fe9d2e9e6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -500,7 +500,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
 }
 EXPORT_SYMBOL(__ip_select_ident);
 
-static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
+static void __build_flow_key(struct flowi4 *fl4, struct sock *sk,
 			     const struct iphdr *iph,
 			     int oif, u8 tos,
 			     u8 prot, u32 mark, int flow_flags)
@@ -516,11 +516,12 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
 	flowi4_init_output(fl4, oif, mark, tos,
 			   RT_SCOPE_UNIVERSE, prot,
 			   flow_flags,
-			   iph->daddr, iph->saddr, 0, 0);
+			   iph->daddr, iph->saddr, 0, 0,
+			   sock_i_uid(sk));
 }
 
 static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
-			       const struct sock *sk)
+			       struct sock *sk)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	int oif = skb->dev->ifindex;
@@ -531,7 +532,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
 	__build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
 }
 
-static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
+static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	const struct ip_options_rcu *inet_opt;
@@ -545,11 +546,12 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
 			   inet_sk_flowi_flags(sk),
-			   daddr, inet->inet_saddr, 0, 0);
+			   daddr, inet->inet_saddr, 0, 0,
+			   sock_i_uid(sk));
 	rcu_read_unlock();
 }
 
-static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
+static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk,
 				 const struct sk_buff *skb)
 {
 	if (skb)
@@ -2287,6 +2289,11 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	    nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
 		goto nla_put_failure;
 
+	if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
+	    nla_put_u32(skb, RTA_UID,
+			from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
+		goto nla_put_failure;
+
 	error = rt->dst.error;
 
 	if (rt_is_input_route(rt)) {
@@ -2336,6 +2343,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	int err;
 	int mark;
 	struct sk_buff *skb;
+	kuid_t uid;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
 	if (err < 0)
@@ -2363,6 +2371,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
 	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
 	mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+	if (tb[RTA_UID])
+		uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
+	else
+		uid = (iif ? INVALID_UID : current_uid());
 
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = dst;
@@ -2370,6 +2382,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	fl4.flowi4_tos = rtm->rtm_tos;
 	fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
 	fl4.flowi4_mark = mark;
+	fl4.flowi4_uid = uid;
 
 	if (iif) {
 		struct net_device *dev;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5abb45e281be..c94032b95c60 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -353,7 +353,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
 			   inet_sk_flowi_flags(sk),
 			   (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
-			   ireq->loc_addr, th->source, th->dest);
+			   ireq->loc_addr, th->source, th->dest,
+			   sock_i_uid(sk));
 	security_req_classify_flow(req, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(sock_net(sk), &fl4);
 	if (IS_ERR(rt)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0bf5d399a03c..35ab330ed958 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -962,7 +962,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
 				   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 				   inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
-				   faddr, saddr, dport, inet->inet_sport);
+				   faddr, saddr, dport, inet->inet_sport,
+				   sock_i_uid(sk));
 
 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
 		rt = ip_route_output_flow(net, fl4, sk);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a4cfde67fcb7..d29ae19ae698 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -694,6 +694,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl6.flowi6_mark = sk->sk_mark;
 		fl6.fl6_dport = inet->inet_dport;
 		fl6.fl6_sport = inet->inet_sport;
+		fl6.flowi6_uid = sock_i_uid(sk);
 		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index bb02e176cb70..b903e19463c9 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -630,7 +630,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, 0, 0);
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 4b56cbbc7890..00b4a5f6eea4 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -162,6 +162,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = inet->inet_dport;
 	fl6.fl6_sport = inet->inet_sport;
+	fl6.flowi6_uid = sock_i_uid(sk);
 
 	if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
 		fl6.flowi6_oif = np->mcast_oif;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 40ffd72243a4..fdc81cb29e80 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -449,7 +449,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, 0, 0);
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 28da4003e842..12b1a942dc99 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -90,7 +90,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct net *net = dev_net(skb->dev);
 
 	if (type == ICMPV6_PKT_TOOBIG)
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID);
 	else if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, 0, 0);
 
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index f1493138d21e..65a46058c854 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -84,6 +84,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 	fl6->flowi6_mark = inet_rsk(req)->ir_mark;
 	fl6->fl6_dport = inet_rsk(req)->rmt_port;
 	fl6->fl6_sport = inet_rsk(req)->loc_port;
+	fl6->flowi6_uid = sock_i_uid(sk);
 	security_req_classify_flow(req, flowi6_to_flowi(fl6));
 
 	dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
@@ -211,6 +212,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
 	fl6->flowi6_mark = sk->sk_mark;
 	fl6->fl6_sport = inet->inet_sport;
 	fl6->fl6_dport = inet->inet_dport;
+	fl6->flowi6_uid = sock_i_uid(sk);
 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 
 	final_p = fl6_update_dst(fl6, np->opt, &final);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 7af5aee75d98..a1beb59a841e 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -78,7 +78,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, 0, 0);
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index d4edfceab36f..38ceca8a6358 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -159,6 +159,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	fl6.saddr = np->saddr;
 	fl6.daddr = *daddr;
 	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_uid = sock_i_uid(sk);
 	fl6.fl6_icmp_type = user_icmph.icmp6_type;
 	fl6.fl6_icmp_code = user_icmph.icmp6_code;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index eedff8ccded5..dfef31581f85 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -761,6 +761,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	memset(&fl6, 0, sizeof(fl6));
 
 	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_uid = sock_i_uid(sk);
 
 	if (sin6) {
 		if (addr_len < SIN6_LEN_RFC2133)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8ecf44af7c2e..bad36468dcd7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1099,7 +1099,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 }
 
 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
-		     int oif, u32 mark)
+		     int oif, u32 mark, kuid_t uid)
 {
 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
 	struct dst_entry *dst;
@@ -1112,6 +1112,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
 	fl6.flowlabel = ip6_flowinfo(iph);
+	fl6.flowi6_uid = uid;
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
@@ -1123,7 +1124,7 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
 {
 	ip6_update_pmtu(skb, sock_net(sk), mtu,
-			sk->sk_bound_dev_if, sk->sk_mark);
+			sk->sk_bound_dev_if, sk->sk_mark, sock_i_uid(sk));
 }
 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
 
@@ -2199,6 +2200,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_PRIORITY]          = { .type = NLA_U32 },
 	[RTA_METRICS]           = { .type = NLA_NESTED },
 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
+	[RTA_UID]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2585,6 +2587,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
 	if (tb[RTA_OIF])
 		oif = nla_get_u32(tb[RTA_OIF]);
 
+	if (tb[RTA_UID])
+		fl6.flowi6_uid = make_kuid(current_user_ns(),
+					   nla_get_u32(tb[RTA_UID]));
+	else
+		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
+
 	if (iif) {
 		struct net_device *dev;
 		int flags = 0;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 1efbc6f44a6a..ba8622daffd7 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -243,6 +243,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		fl6.flowi6_mark = ireq->ir_mark;
 		fl6.fl6_dport = inet_rsk(req)->rmt_port;
 		fl6.fl6_sport = inet_sk(sk)->inet_sport;
+		fl6.flowi6_uid = sock_i_uid(sk);
 		security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
 		dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6e882dadb4f8..a4fc647deb00 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -252,6 +252,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = usin->sin6_port;
 	fl6.fl6_sport = inet->inet_sport;
+	fl6.flowi6_uid = sock_i_uid(sk);
 
 	final_p = fl6_update_dst(&fl6, np->opt, &final);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 42923b14dfa6..e6dd85da9062 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1147,6 +1147,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
 
 	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_uid = sock_i_uid(sk);
 
 	if (msg->msg_controllen) {
 		opt = &opt_space;

From 7ae573c139c8a91bed58060818a2559526aee741 Mon Sep 17 00:00:00 2001
From: Dmitry Shmidt <dimitrysh@google.com>
Date: Thu, 26 Jun 2014 09:26:21 -0700
Subject: [PATCH 0005/1185] net: wireless: Fix
 cfg80211_vendor_cmd_alloc_reply_skb

Change-Id: Ia8da6cdacd5668d10f8955972d996177305b7228
Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>
---
 include/net/cfg80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 304e41381a1f..2ebb168e5a5b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3696,8 +3696,8 @@ void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp);
 static inline struct sk_buff *
 cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen)
 {
-	return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_TESTMODE,
-					  NL80211_ATTR_TESTDATA, approxlen);
+	return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_VENDOR,
+					  NL80211_ATTR_VENDOR_DATA, approxlen);
 }
 
 /**

From 3c9e49908b882631f3737022e707ffddf0b7c230 Mon Sep 17 00:00:00 2001
From: Minsung Kim <ms925.kim@samsung.com>
Date: Wed, 25 Jun 2014 19:44:50 +0900
Subject: [PATCH 0006/1185] cpufreq: fix sleeping in atomic context when
 realloc freq_table for all_time_in_state

Commit 40cf2f8 (cpufreq: Persist cpufreq time in state data across hotplug)
causes the following call trace to be spit on boot:

BUG: sleeping function called from invalid context at mm/slub.c:936
in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: swapper/0
CPU: 6 PID: 1 Comm: swapper/0 Not tainted 3.10.9-20140624.172707-eng-gd6c0f69-dirty #50
Backtrace:
[<c0012270>] (dump_backtrace+0x0/0x10c) from [<c001256c>] (show_stack+0x18/0x1c)
 r6:ffff1788 r5:c0c020c0 r4:e609c000 r3:00000000
[<c0012554>] (show_stack+0x0/0x1c) from [<c07a2970>] (dump_stack+0x20/0x28)
[<c07a2950>] (dump_stack+0x0/0x28) from [<c0057678>] (__might_sleep+0x104/0x120)
[<c0057574>] (__might_sleep+0x0/0x120) from [<c00ff000>] (__kmalloc_track_caller+0x144/0x274)
 r6:00000000 r5:e609c000 r4:e6802140
[<c00feebc>] (__kmalloc_track_caller+0x0/0x274) from [<c00da098>] (krealloc+0x58/0xb0)
[<c00da040>] (krealloc+0x0/0xb0) from [<c050266c>] (cpufreq_allstats_create+0x120/0x204)
 r8:e4c4ff00 r7:c0d266b8 r6:0013d620 r5:e4c4e600 r4:00000001
r3:e535d6d0
[<c050254c>] (cpufreq_allstats_create+0x0/0x204) from [<c0502e38>] (cpufreq_stat_notifier_policy+0xb8/0xd0)
[<c0502d80>] (cpufreq_stat_notifier_policy+0x0/0xd0) from [<c00517cc>] (notifier_call_chain+0x4c/0x8c)
 r5:00000000 r4:fffffffe
[<c0051780>] (notifier_call_chain+0x0/0x8c) from [<c00519fc>] (__blocking_notifier_call_chain+0x50/0x68)
 r8:c0cd4d00 r7:00000002 r6:e609dd7c r5:ffffffff r4:c0d25a4c
r3:ffffffff
[<c00519ac>] (__blocking_notifier_call_chain+0x0/0x68) from [<c0051a34>] (blocking_notifier_call_chain+0x20/0x28)
 r7:c0e24f30 r6:00000000 r5:e53e1e00 r4:e609dd7c
[<c0051a14>] (blocking_notifier_call_chain+0x0/0x28) from [<c0500fec>] (__cpufreq_set_policy+0xc0/0x1d0)
[<c0500f2c>] (__cpufreq_set_policy+0x0/0x1d0) from [<c0501308>] (cpufreq_add_dev_interface+0x20c/0x270)
 r7:00000008 r6:00000000 r5:e53e1e00 r4:e53e1e58
[<c05010fc>] (cpufreq_add_dev_interface+0x0/0x270) from [<c05016a8>] (cpufreq_add_dev+0x33c/0x420)
[<c050136c>] (cpufreq_add_dev+0x0/0x420) from [<c03604a4>] (subsys_interface_register+0x80/0xbc)
[<c0360424>] (subsys_interface_register+0x0/0xbc) from [<c050035c>] (cpufreq_register_driver+0x8c/0x194)

Change-Id: If77a656d0ea60a8fc4083283d104509fa6c07f8f
Signed-off-by: Minsung Kim <ms925.kim@samsung.com>
---
 drivers/cpufreq/cpufreq_stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 3f5e279ff9d8..7a2bcac3ad7f 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -424,7 +424,7 @@ static void add_all_freq_table(unsigned int freq)
 	unsigned int size;
 	size = sizeof(unsigned int) * (all_freq_table->table_size + 1);
 	all_freq_table->freq_table = krealloc(all_freq_table->freq_table,
-			size, GFP_KERNEL);
+			size, GFP_ATOMIC);
 	if (IS_ERR(all_freq_table->freq_table)) {
 		pr_warn("Could not reallocate memory for freq_table\n");
 		all_freq_table->freq_table = NULL;

From 8d0e99be244a647f8f0dd8741238446088e1c30f Mon Sep 17 00:00:00 2001
From: Yann Soubeyrand <ysoubeyrand@adeneo-embedded.com>
Date: Wed, 18 Jun 2014 14:57:29 +0200
Subject: [PATCH 0007/1185] net: wireless: fix misplaced #endif in
 net/wireless/nl80211.c

The patch "nl80211: cumulative vendor command support patch" introduced
compilation error in file net/wireless/nl80211.c. The nl80211_vendor_mcgrp
variable is defined only if the CONFIG_NL80211_TESTMODE preprocessor constant
is defined. However, this variable is later used wether
CONFIG_NL80211_TESTMODE is defined or not. The cause is a misplaced #endif.

Change-Id: I466488285578d57e6554a1f8ebe71d4f3385ecf2
Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 150a38fc346e..ec1356fab67e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10973,11 +10973,11 @@ int nl80211_init(void)
 	err = genl_register_mc_group(&nl80211_fam, &nl80211_testmode_mcgrp);
 	if (err)
 		goto err_out;
-#endif
 
 	err = genl_register_mc_group(&nl80211_fam, &nl80211_vendor_mcgrp);
 	if (err)
 		goto err_out;
+#endif
 
 	err = netlink_register_notifier(&nl80211_netlink_notifier);
 	if (err)

From dd979cc254c3aeec85927dbaad669d1ecd193c6f Mon Sep 17 00:00:00 2001
From: Dmitry Shmidt <dimitrysh@google.com>
Date: Tue, 1 Jul 2014 14:48:15 -0700
Subject: [PATCH 0008/1185] net: cfg80211: Fix wiphy_vendor_command 'doit' type

Change-Id: I5b1732eed7ac4f6bc267b4baa2153f6de2e16dc8
Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2ebb168e5a5b..d9681a288ce6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2497,7 +2497,7 @@ struct wiphy_vendor_command {
 	struct nl80211_vendor_cmd_info info;
 	u32 flags;
 	int (*doit)(struct wiphy *wiphy, struct wireless_dev *wdev,
-		    void *data, int data_len);
+		    const void *data, int data_len);
 };
 
 /**

From 70bcc368f145d0871379f5ffe0428e65e10345e1 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Fri, 27 Jun 2014 16:39:35 -0700
Subject: [PATCH 0009/1185] input: Made keyreset more robust

Switched do_restart to run in a seperate workqueue to  handle
cases where kernel_restart hangs.

Change-Id: I1ecd61f8d0859f1a86d37c692351d644b5db9c69
Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 drivers/input/keyreset.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/input/keyreset.c b/drivers/input/keyreset.c
index eaaccde82210..7fbf7247e65f 100644
--- a/drivers/input/keyreset.c
+++ b/drivers/input/keyreset.c
@@ -27,9 +27,10 @@ struct keyreset_state {
 	int restart_requested;
 	int (*reset_fn)(void);
 	struct platform_device *pdev_child;
+	struct work_struct restart_work;
 };
 
-static void do_restart(void)
+static void do_restart(struct work_struct *unused)
 {
 	sys_sync();
 	kernel_restart(NULL);
@@ -44,7 +45,7 @@ static void do_reset_fn(void *priv)
 		state->restart_requested = state->reset_fn();
 	} else {
 		pr_info("keyboard reset\n");
-		do_restart();
+		schedule_work(&state->restart_work);
 		state->restart_requested = 1;
 	}
 }
@@ -69,6 +70,7 @@ static int keyreset_probe(struct platform_device *pdev)
 	if (!state->pdev_child)
 		return -ENOMEM;
 	state->pdev_child->dev.parent = &pdev->dev;
+	INIT_WORK(&state->restart_work, do_restart);
 
 	keyp = pdata->keys_down;
 	while ((key = *keyp++)) {

From 455b09d66a9ccfc572497ae88375ae343ff9ae66 Mon Sep 17 00:00:00 2001
From: Sreeram Ramachandran <sreeram@google.com>
Date: Tue, 8 Jul 2014 11:57:14 -0700
Subject: [PATCH 0010/1185] Handle 'sk' being NULL in UID-based routing.

Bug: 15413527
Change-Id: Iab1fae9da6053b284591628ef1de878761b137b1
Signed-off-by: Sreeram Ramachandran <sreeram@google.com>
---
 include/net/route.h | 2 +-
 net/ipv4/route.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index b5b44875543e..647bb2adbffd 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -142,7 +142,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
 	flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
 			   RT_SCOPE_UNIVERSE, proto,
 			   sk ? inet_sk_flowi_flags(sk) : 0,
-			   daddr, saddr, dport, sport, sock_i_uid(sk));
+			   daddr, saddr, dport, sport, sk ? sock_i_uid(sk) : 0);
 	if (sk)
 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
 	return ip_route_output_flow(net, fl4, sk);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ef2fe9d2e9e6..42cd979d1633 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -517,7 +517,7 @@ static void __build_flow_key(struct flowi4 *fl4, struct sock *sk,
 			   RT_SCOPE_UNIVERSE, prot,
 			   flow_flags,
 			   iph->daddr, iph->saddr, 0, 0,
-			   sock_i_uid(sk));
+			   sk ? sock_i_uid(sk) : 0);
 }
 
 static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,

From f8fe2735daaf662876d1333075991997c04d5359 Mon Sep 17 00:00:00 2001
From: Anson Jacob <ansonkuzhumbil@gmail.com>
Date: Mon, 23 Jun 2014 19:07:44 +0800
Subject: [PATCH 0011/1185] usb: gadget: f_accessory: Enabled Zero Length
 Packet (ZLP) for acc_write

Accessory connected to Android Device requires
Zero Length Packet (ZLP) to be written when data
transferred out from the Android device are multiples
of wMaxPacketSize (64bytes (Full-Speed) / 512bytes (High-Speed))
to end the transfer.

Change-Id: Ib2c2c0ab98ef9afa10e74a720142deca5c0ed476
Signed-off-by: Anson Jacob <ansonkuzhumbil@gmail.com>
---
 drivers/usb/gadget/f_accessory.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c
index 53e50b5e8612..a401acdceb4d 100644
--- a/drivers/usb/gadget/f_accessory.c
+++ b/drivers/usb/gadget/f_accessory.c
@@ -662,10 +662,17 @@ static ssize_t acc_write(struct file *fp, const char __user *buf,
 			break;
 		}
 
-		if (count > BULK_BUFFER_SIZE)
+		if (count > BULK_BUFFER_SIZE) {
 			xfer = BULK_BUFFER_SIZE;
-		else
+			/* ZLP, They will be more TX requests so not yet. */
+			req->zero = 0;
+		} else {
 			xfer = count;
+			/* If the data length is a multple of the
+			 * maxpacket size then send a zero length packet(ZLP).
+			*/
+			req->zero = ((xfer % dev->ep_in->maxpacket) == 0);
+		}
 		if (copy_from_user(req->buf, buf, xfer)) {
 			r = -EFAULT;
 			break;

From 2ca27aef6f907c33d85915df5f37ddee32a92742 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 28 Aug 2013 22:29:55 +0200
Subject: [PATCH 0012/1185] HID: validate HID report id size

The "Report ID" field of a HID report is used to build indexes of
reports. The kernel's index of these is limited to 256 entries, so any
malicious device that sets a Report ID greater than 255 will trigger
memory corruption on the host:

[ 1347.156239] BUG: unable to handle kernel paging request at ffff88094958a878
[ 1347.156261] IP: [<ffffffff813e4da0>] hid_register_report+0x2a/0x8b

CVE-2013-2888

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@kernel.org
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 10 +++++++---
 include/linux/hid.h    |  4 +++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 264f55099940..95a97a44fe74 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -63,6 +63,8 @@ struct hid_report *hid_register_report(struct hid_device *device, unsigned type,
 	struct hid_report_enum *report_enum = device->report_enum + type;
 	struct hid_report *report;
 
+	if (id >= HID_MAX_IDS)
+		return NULL;
 	if (report_enum->report_id_hash[id])
 		return report_enum->report_id_hash[id];
 
@@ -404,8 +406,10 @@ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item)
 
 	case HID_GLOBAL_ITEM_TAG_REPORT_ID:
 		parser->global.report_id = item_udata(item);
-		if (parser->global.report_id == 0) {
-			hid_err(parser->device, "report_id 0 is invalid\n");
+		if (parser->global.report_id == 0 ||
+		    parser->global.report_id >= HID_MAX_IDS) {
+			hid_err(parser->device, "report_id %u is invalid\n",
+				parser->global.report_id);
 			return -1;
 		}
 		return 0;
@@ -575,7 +579,7 @@ static void hid_close_report(struct hid_device *device)
 	for (i = 0; i < HID_REPORT_TYPES; i++) {
 		struct hid_report_enum *report_enum = device->report_enum + i;
 
-		for (j = 0; j < 256; j++) {
+		for (j = 0; j < HID_MAX_IDS; j++) {
 			struct hid_report *report = report_enum->report_id_hash[j];
 			if (report)
 				hid_free_report(report);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 8136c6d99037..95b6ddf0fa45 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -393,10 +393,12 @@ struct hid_report {
 	struct hid_device *device;			/* associated device */
 };
 
+#define HID_MAX_IDS 256
+
 struct hid_report_enum {
 	unsigned numbered;
 	struct list_head report_list;
-	struct hid_report *report_id_hash[256];
+	struct hid_report *report_id_hash[HID_MAX_IDS];
 };
 
 #define HID_REPORT_TYPES 3

From b8b84374b4876b2a62187102392f72a8beaf217e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 11 Sep 2013 21:56:50 +0200
Subject: [PATCH 0013/1185] HID: provide a helper for validating hid reports

Many drivers need to validate the characteristics of their HID report
during initialization to avoid misusing the reports. This adds a common
helper to perform validation of the report exisitng, the field existing,
and the expected number of values within the field.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 58 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/hid.h    |  4 +++
 2 files changed, 62 insertions(+)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 95a97a44fe74..c6d42deb9163 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -759,6 +759,64 @@ int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size)
 }
 EXPORT_SYMBOL_GPL(hid_parse_report);
 
+static const char * const hid_report_names[] = {
+	"HID_INPUT_REPORT",
+	"HID_OUTPUT_REPORT",
+	"HID_FEATURE_REPORT",
+};
+/**
+ * hid_validate_values - validate existing device report's value indexes
+ *
+ * @device: hid device
+ * @type: which report type to examine
+ * @id: which report ID to examine (0 for first)
+ * @field_index: which report field to examine
+ * @report_counts: expected number of values
+ *
+ * Validate the number of values in a given field of a given report, after
+ * parsing.
+ */
+struct hid_report *hid_validate_values(struct hid_device *hid,
+				       unsigned int type, unsigned int id,
+				       unsigned int field_index,
+				       unsigned int report_counts)
+{
+	struct hid_report *report;
+
+	if (type > HID_FEATURE_REPORT) {
+		hid_err(hid, "invalid HID report type %u\n", type);
+		return NULL;
+	}
+
+	if (id >= HID_MAX_IDS) {
+		hid_err(hid, "invalid HID report id %u\n", id);
+		return NULL;
+	}
+
+	/*
+	 * Explicitly not using hid_get_report() here since it depends on
+	 * ->numbered being checked, which may not always be the case when
+	 * drivers go to access report values.
+	 */
+	report = hid->report_enum[type].report_id_hash[id];
+	if (!report) {
+		hid_err(hid, "missing %s %u\n", hid_report_names[type], id);
+		return NULL;
+	}
+	if (report->maxfield <= field_index) {
+		hid_err(hid, "not enough fields in %s %u\n",
+			hid_report_names[type], id);
+		return NULL;
+	}
+	if (report->field[field_index]->report_count < report_counts) {
+		hid_err(hid, "not enough values in %s %u field %u\n",
+			hid_report_names[type], id, field_index);
+		return NULL;
+	}
+	return report;
+}
+EXPORT_SYMBOL_GPL(hid_validate_values);
+
 /**
  * hid_open_report - open a driver-specific device report
  *
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 95b6ddf0fa45..1f3c5f7b3bc5 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -749,6 +749,10 @@ void hid_output_report(struct hid_report *report, __u8 *data);
 struct hid_device *hid_allocate_device(void);
 struct hid_report *hid_register_report(struct hid_device *device, unsigned type, unsigned id);
 int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
+struct hid_report *hid_validate_values(struct hid_device *hid,
+				       unsigned int type, unsigned int id,
+				       unsigned int field_index,
+				       unsigned int report_counts);
 int hid_open_report(struct hid_device *device);
 int hid_check_keys_pressed(struct hid_device *hid);
 int hid_connect(struct hid_device *hid, unsigned int connect_mask);

From 728a564fa767d186ea1f95fbf81b43e9d865dc0b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 11 Sep 2013 21:56:51 +0200
Subject: [PATCH 0014/1185] HID: zeroplus: validate output report details

The zeroplus HID driver was not checking the size of allocated values
in fields it used. A HID device could send a malicious output report
that would cause the driver to write beyond the output report allocation
during initialization, causing a heap overflow:

[ 1442.728680] usb 1-1: New USB device found, idVendor=0c12, idProduct=0005
...
[ 1466.243173] BUG kmalloc-192 (Tainted: G        W   ): Redzone overwritten

CVE-2013-2889

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-zpff.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/hid/hid-zpff.c b/drivers/hid/hid-zpff.c
index 6ec28a37c146..a29756c6ca02 100644
--- a/drivers/hid/hid-zpff.c
+++ b/drivers/hid/hid-zpff.c
@@ -68,21 +68,13 @@ static int zpff_init(struct hid_device *hid)
 	struct hid_report *report;
 	struct hid_input *hidinput = list_entry(hid->inputs.next,
 						struct hid_input, list);
-	struct list_head *report_list =
-			&hid->report_enum[HID_OUTPUT_REPORT].report_list;
 	struct input_dev *dev = hidinput->input;
-	int error;
+	int i, error;
 
-	if (list_empty(report_list)) {
-		hid_err(hid, "no output report found\n");
-		return -ENODEV;
-	}
-
-	report = list_entry(report_list->next, struct hid_report, list);
-
-	if (report->maxfield < 4) {
-		hid_err(hid, "not enough fields in report\n");
-		return -ENODEV;
+	for (i = 0; i < 4; i++) {
+		report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1);
+		if (!report)
+			return -ENODEV;
 	}
 
 	zpff = kzalloc(sizeof(struct zpff_device), GFP_KERNEL);

From 7e62de4584a6ad505e164ae92c9c1571da46c9bb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 11 Sep 2013 21:56:53 +0200
Subject: [PATCH 0015/1185] HID: steelseries: validate output report details

A HID device could send a malicious output report that would cause the
steelseries HID driver to write beyond the output report allocation
during initialization, causing a heap overflow:

[  167.981534] usb 1-1: New USB device found, idVendor=1038, idProduct=1410
...
[  182.050547] BUG kmalloc-256 (Tainted: G        W   ): Redzone overwritten

CVE-2013-2891

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-steelseries.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hid/hid-steelseries.c b/drivers/hid/hid-steelseries.c
index d16491192112..29f328f411fb 100644
--- a/drivers/hid/hid-steelseries.c
+++ b/drivers/hid/hid-steelseries.c
@@ -249,6 +249,11 @@ static int steelseries_srws1_probe(struct hid_device *hdev,
 		goto err_free;
 	}
 
+	if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 0, 0, 16)) {
+		ret = -ENODEV;
+		goto err_free;
+	}
+
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
 		hid_err(hdev, "hw start failed\n");

From e3c850750a74e04855fc61e9f5d2854eeca8087c Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 28 Aug 2013 22:30:49 +0200
Subject: [PATCH 0016/1185] HID: pantherlord: validate output report details

A HID device could send a malicious output report that would cause the
pantherlord HID driver to write beyond the output report allocation
during initialization, causing a heap overflow:

[  310.939483] usb 1-1: New USB device found, idVendor=0e8f, idProduct=0003
...
[  315.980774] BUG kmalloc-192 (Tainted: G        W   ): Redzone overwritten

CVE-2013-2892

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@kernel.org
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-pl.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-pl.c b/drivers/hid/hid-pl.c
index d29112fa5cd5..2dcd7d98dbd6 100644
--- a/drivers/hid/hid-pl.c
+++ b/drivers/hid/hid-pl.c
@@ -132,8 +132,14 @@ static int plff_init(struct hid_device *hid)
 			strong = &report->field[0]->value[2];
 			weak = &report->field[0]->value[3];
 			debug("detected single-field device");
-		} else if (report->maxfield >= 4 && report->field[0]->maxusage == 1 &&
-				report->field[0]->usage[0].hid == (HID_UP_LED | 0x43)) {
+		} else if (report->field[0]->maxusage == 1 &&
+			   report->field[0]->usage[0].hid ==
+				(HID_UP_LED | 0x43) &&
+			   report->maxfield >= 4 &&
+			   report->field[0]->report_count >= 1 &&
+			   report->field[1]->report_count >= 1 &&
+			   report->field[2]->report_count >= 1 &&
+			   report->field[3]->report_count >= 1) {
 			report->field[0]->value[0] = 0x00;
 			report->field[1]->value[0] = 0x00;
 			strong = &report->field[2]->value[0];

From d87aff426add54f65131d450a06db618742a7dfe Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 11 Sep 2013 21:56:54 +0200
Subject: [PATCH 0017/1185] HID: LG: validate HID output report details

A HID device could send a malicious output report that would cause the
lg, lg3, and lg4 HID drivers to write beyond the output report allocation
during an event, causing a heap overflow:

[  325.245240] usb 1-1: New USB device found, idVendor=046d, idProduct=c287
...
[  414.518960] BUG kmalloc-4096 (Not tainted): Redzone overwritten

Additionally, while lg2 did correctly validate the report details, it was
cleaned up and shortened.

CVE-2013-2893

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-lg2ff.c | 19 +++----------------
 drivers/hid/hid-lg3ff.c | 29 ++++++-----------------------
 drivers/hid/hid-lg4ff.c | 20 +-------------------
 drivers/hid/hid-lgff.c  | 17 ++---------------
 4 files changed, 12 insertions(+), 73 deletions(-)

diff --git a/drivers/hid/hid-lg2ff.c b/drivers/hid/hid-lg2ff.c
index b3cd1507dda2..1a42eaa6ca02 100644
--- a/drivers/hid/hid-lg2ff.c
+++ b/drivers/hid/hid-lg2ff.c
@@ -64,26 +64,13 @@ int lg2ff_init(struct hid_device *hid)
 	struct hid_report *report;
 	struct hid_input *hidinput = list_entry(hid->inputs.next,
 						struct hid_input, list);
-	struct list_head *report_list =
-			&hid->report_enum[HID_OUTPUT_REPORT].report_list;
 	struct input_dev *dev = hidinput->input;
 	int error;
 
-	if (list_empty(report_list)) {
-		hid_err(hid, "no output report found\n");
+	/* Check that the report looks ok */
+	report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7);
+	if (!report)
 		return -ENODEV;
-	}
-
-	report = list_entry(report_list->next, struct hid_report, list);
-
-	if (report->maxfield < 1) {
-		hid_err(hid, "output report is empty\n");
-		return -ENODEV;
-	}
-	if (report->field[0]->report_count < 7) {
-		hid_err(hid, "not enough values in the field\n");
-		return -ENODEV;
-	}
 
 	lg2ff = kmalloc(sizeof(struct lg2ff_device), GFP_KERNEL);
 	if (!lg2ff)
diff --git a/drivers/hid/hid-lg3ff.c b/drivers/hid/hid-lg3ff.c
index e52f181f6aa1..8c2da183d3bc 100644
--- a/drivers/hid/hid-lg3ff.c
+++ b/drivers/hid/hid-lg3ff.c
@@ -66,10 +66,11 @@ static int hid_lg3ff_play(struct input_dev *dev, void *data,
 	int x, y;
 
 /*
- * Maxusage should always be 63 (maximum fields)
- * likely a better way to ensure this data is clean
+ * Available values in the field should always be 63, but we only use up to
+ * 35. Instead, clear the entire area, however big it is.
  */
-	memset(report->field[0]->value, 0, sizeof(__s32)*report->field[0]->maxusage);
+	memset(report->field[0]->value, 0,
+	       sizeof(__s32) * report->field[0]->report_count);
 
 	switch (effect->type) {
 	case FF_CONSTANT:
@@ -129,32 +130,14 @@ static const signed short ff3_joystick_ac[] = {
 int lg3ff_init(struct hid_device *hid)
 {
 	struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-	struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
 	struct input_dev *dev = hidinput->input;
-	struct hid_report *report;
-	struct hid_field *field;
 	const signed short *ff_bits = ff3_joystick_ac;
 	int error;
 	int i;
 
-	/* Find the report to use */
-	if (list_empty(report_list)) {
-		hid_err(hid, "No output report found\n");
-		return -1;
-	}
-
 	/* Check that the report looks ok */
-	report = list_entry(report_list->next, struct hid_report, list);
-	if (!report) {
-		hid_err(hid, "NULL output report\n");
-		return -1;
-	}
-
-	field = report->field[0];
-	if (!field) {
-		hid_err(hid, "NULL field\n");
-		return -1;
-	}
+	if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 35))
+		return -ENODEV;
 
 	/* Assume single fixed device G940 */
 	for (i = 0; ff_bits[i] >= 0; i++)
diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c
index 0ddae2a00d59..8782fe1aaa07 100644
--- a/drivers/hid/hid-lg4ff.c
+++ b/drivers/hid/hid-lg4ff.c
@@ -484,34 +484,16 @@ static enum led_brightness lg4ff_led_get_brightness(struct led_classdev *led_cde
 int lg4ff_init(struct hid_device *hid)
 {
 	struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-	struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
 	struct input_dev *dev = hidinput->input;
-	struct hid_report *report;
-	struct hid_field *field;
 	struct lg4ff_device_entry *entry;
 	struct lg_drv_data *drv_data;
 	struct usb_device_descriptor *udesc;
 	int error, i, j;
 	__u16 bcdDevice, rev_maj, rev_min;
 
-	/* Find the report to use */
-	if (list_empty(report_list)) {
-		hid_err(hid, "No output report found\n");
-		return -1;
-	}
-
 	/* Check that the report looks ok */
-	report = list_entry(report_list->next, struct hid_report, list);
-	if (!report) {
-		hid_err(hid, "NULL output report\n");
+	if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7))
 		return -1;
-	}
-
-	field = report->field[0];
-	if (!field) {
-		hid_err(hid, "NULL field\n");
-		return -1;
-	}
 
 	/* Check what wheel has been connected */
 	for (i = 0; i < ARRAY_SIZE(lg4ff_devices); i++) {
diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c
index d7ea8c845b40..e1394af0ae7b 100644
--- a/drivers/hid/hid-lgff.c
+++ b/drivers/hid/hid-lgff.c
@@ -128,27 +128,14 @@ static void hid_lgff_set_autocenter(struct input_dev *dev, u16 magnitude)
 int lgff_init(struct hid_device* hid)
 {
 	struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-	struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
 	struct input_dev *dev = hidinput->input;
-	struct hid_report *report;
-	struct hid_field *field;
 	const signed short *ff_bits = ff_joystick;
 	int error;
 	int i;
 
-	/* Find the report to use */
-	if (list_empty(report_list)) {
-		hid_err(hid, "No output report found\n");
-		return -1;
-	}
-
 	/* Check that the report looks ok */
-	report = list_entry(report_list->next, struct hid_report, list);
-	field = report->field[0];
-	if (!field) {
-		hid_err(hid, "NULL field\n");
-		return -1;
-	}
+	if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7))
+		return -ENODEV;
 
 	for (i = 0; i < ARRAY_SIZE(devices); i++) {
 		if (dev->id.vendor == devices[i].idVendor &&

From 255b0df27b1f2b590226f29a78169cc529e74cf0 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 11 Sep 2013 21:56:55 +0200
Subject: [PATCH 0018/1185] HID: lenovo-tpkbd: validate output report details

A HID device could send a malicious output report that would cause the
lenovo-tpkbd HID driver to write just beyond the output report allocation
during initialization, causing a heap overflow:

[   76.109807] usb 1-1: New USB device found, idVendor=17ef, idProduct=6009
...
[   80.462540] BUG kmalloc-192 (Tainted: G        W   ): Redzone overwritten

CVE-2013-2894

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-lenovo-tpkbd.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-lenovo-tpkbd.c b/drivers/hid/hid-lenovo-tpkbd.c
index 07837f5a4eb8..762d988548a2 100644
--- a/drivers/hid/hid-lenovo-tpkbd.c
+++ b/drivers/hid/hid-lenovo-tpkbd.c
@@ -339,7 +339,15 @@ static int tpkbd_probe_tp(struct hid_device *hdev)
 	struct tpkbd_data_pointer *data_pointer;
 	size_t name_sz = strlen(dev_name(dev)) + 16;
 	char *name_mute, *name_micmute;
-	int ret;
+	int i, ret;
+
+	/* Validate required reports. */
+	for (i = 0; i < 4; i++) {
+		if (!hid_validate_values(hdev, HID_FEATURE_REPORT, 4, i, 1))
+			return -ENODEV;
+	}
+	if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 3, 0, 2))
+		return -ENODEV;
 
 	if (sysfs_create_group(&hdev->dev.kobj,
 				&tpkbd_attr_group_pointer)) {

From 142ae08aba82c8226779d6cc8cddc95a83923d77 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 11 Sep 2013 21:56:56 +0200
Subject: [PATCH 0019/1185] HID: logitech-dj: validate output report details

A HID device could send a malicious output report that would cause the
logitech-dj HID driver to leak kernel memory contents to the device, or
trigger a NULL dereference during initialization:

[  304.424553] usb 1-1: New USB device found, idVendor=046d, idProduct=c52b
...
[  304.780467] BUG: unable to handle kernel NULL pointer dereference at 0000000000000028
[  304.781409] IP: [<ffffffff815d50aa>] logi_dj_recv_send_report.isra.11+0x1a/0x90

CVE-2013-2895

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-logitech-dj.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 5207591a598c..0522b80eab5a 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -421,7 +421,7 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev,
 	struct hid_report *report;
 	struct hid_report_enum *output_report_enum;
 	u8 *data = (u8 *)(&dj_report->device_index);
-	int i;
+	unsigned int i;
 
 	output_report_enum = &hdev->report_enum[HID_OUTPUT_REPORT];
 	report = output_report_enum->report_id_hash[REPORT_ID_DJ_SHORT];
@@ -431,7 +431,7 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev,
 		return -ENODEV;
 	}
 
-	for (i = 0; i < report->field[0]->report_count; i++)
+	for (i = 0; i < DJREPORT_SHORT_LENGTH - 1; i++)
 		report->field[0]->value[i] = data[i];
 
 	hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
@@ -738,6 +738,12 @@ static int logi_dj_probe(struct hid_device *hdev,
 		goto hid_parse_fail;
 	}
 
+	if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, REPORT_ID_DJ_SHORT,
+				 0, DJREPORT_SHORT_LENGTH - 1)) {
+		retval = -ENODEV;
+		goto hid_parse_fail;
+	}
+
 	/* Starts the usb device and connects to upper interfaces hiddev and
 	 * hidraw */
 	retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);

From 03507a663cef349083e8d1abc4b9e9b1970a16e8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 28 Aug 2013 22:31:28 +0200
Subject: [PATCH 0020/1185] HID: ntrig: validate feature report details

A HID device could send a malicious feature report that would cause the
ntrig HID driver to trigger a NULL dereference during initialization:

[57383.031190] usb 3-1: New USB device found, idVendor=1b96, idProduct=0001
...
[57383.315193] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
[57383.315308] IP: [<ffffffffa08102de>] ntrig_probe+0x25e/0x420 [hid_ntrig]

CVE-2013-2896

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@kernel.org
Signed-off-by: Rafi Rubin <rafi@seas.upenn.edu>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ntrig.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c
index ef95102515e4..5482156ab4de 100644
--- a/drivers/hid/hid-ntrig.c
+++ b/drivers/hid/hid-ntrig.c
@@ -115,7 +115,8 @@ static inline int ntrig_get_mode(struct hid_device *hdev)
 	struct hid_report *report = hdev->report_enum[HID_FEATURE_REPORT].
 				    report_id_hash[0x0d];
 
-	if (!report)
+	if (!report || report->maxfield < 1 ||
+	    report->field[0]->report_count < 1)
 		return -EINVAL;
 
 	hid_hw_request(hdev, report, HID_REQ_GET_REPORT);

From 9b24c9c51653d4400d9a4aea2dc28124dce2893c Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Wed, 11 Sep 2013 21:56:57 +0200
Subject: [PATCH 0021/1185] HID: validate feature and input report details

When dealing with usage_index, be sure to properly use unsigned instead of
int to avoid overflows.

When working on report fields, always validate that their report_counts are
in bounds.
Without this, a HID device could report a malicious feature report that
could trick the driver into a heap overflow:

[  634.885003] usb 1-1: New USB device found, idVendor=0596, idProduct=0500
...
[  676.469629] BUG kmalloc-192 (Tainted: G        W   ): Redzone overwritten

CVE-2013-2897

Cc: stable@vger.kernel.org
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c  | 16 +++++++---------
 drivers/hid/hid-input.c | 11 ++++++++++-
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index c6d42deb9163..eadcc85e5a62 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -94,7 +94,6 @@ EXPORT_SYMBOL_GPL(hid_register_report);
 static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages, unsigned values)
 {
 	struct hid_field *field;
-	int i;
 
 	if (report->maxfield == HID_MAX_FIELDS) {
 		hid_err(report->device, "too many fields in report\n");
@@ -113,9 +112,6 @@ static struct hid_field *hid_register_field(struct hid_report *report, unsigned
 	field->value = (s32 *)(field->usage + usages);
 	field->report = report;
 
-	for (i = 0; i < usages; i++)
-		field->usage[i].usage_index = i;
-
 	return field;
 }
 
@@ -226,9 +222,9 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
 {
 	struct hid_report *report;
 	struct hid_field *field;
-	int usages;
+	unsigned usages;
 	unsigned offset;
-	int i;
+	unsigned i;
 
 	report = hid_register_report(parser->device, report_type, parser->global.report_id);
 	if (!report) {
@@ -255,7 +251,8 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
 	if (!parser->local.usage_index) /* Ignore padding fields */
 		return 0;
 
-	usages = max_t(int, parser->local.usage_index, parser->global.report_count);
+	usages = max_t(unsigned, parser->local.usage_index,
+				 parser->global.report_count);
 
 	field = hid_register_field(report, usages, parser->global.report_count);
 	if (!field)
@@ -266,13 +263,14 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
 	field->application = hid_lookup_collection(parser, HID_COLLECTION_APPLICATION);
 
 	for (i = 0; i < usages; i++) {
-		int j = i;
+		unsigned j = i;
 		/* Duplicate the last usage we parsed if we have excess values */
 		if (i >= parser->local.usage_index)
 			j = parser->local.usage_index - 1;
 		field->usage[i].hid = parser->local.usage[j];
 		field->usage[i].collection_index =
 			parser->local.collection_index[j];
+		field->usage[i].usage_index = i;
 	}
 
 	field->maxusage = usages;
@@ -1290,7 +1288,7 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
 			goto out;
 	}
 
-	if (hid->claimed != HID_CLAIMED_HIDRAW) {
+	if (hid->claimed != HID_CLAIMED_HIDRAW && report->maxfield) {
 		for (a = 0; a < report->maxfield; a++)
 			hid_input_field(hid, report->field[a], cdata, interrupt);
 		hdrv = hid->driver;
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 2df9cdcba0de..762b2cff72b1 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -477,6 +477,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 	if (field->flags & HID_MAIN_ITEM_CONSTANT)
 		goto ignore;
 
+	/* Ignore if report count is out of bounds. */
+	if (field->report_count < 1)
+		goto ignore;
+
 	/* only LED usages are supported in output fields */
 	if (field->report_type == HID_OUTPUT_REPORT &&
 			(usage->hid & HID_USAGE_PAGE) != HID_UP_LED) {
@@ -1172,7 +1176,11 @@ static void report_features(struct hid_device *hid)
 
 	rep_enum = &hid->report_enum[HID_FEATURE_REPORT];
 	list_for_each_entry(rep, &rep_enum->report_list, list)
-		for (i = 0; i < rep->maxfield; i++)
+		for (i = 0; i < rep->maxfield; i++) {
+			/* Ignore if report count is out of bounds. */
+			if (rep->field[i]->report_count < 1)
+				continue;
+
 			for (j = 0; j < rep->field[i]->maxusage; j++) {
 				/* Verify if Battery Strength feature is available */
 				hidinput_setup_battery(hid, HID_FEATURE_REPORT, rep->field[i]);
@@ -1181,6 +1189,7 @@ static void report_features(struct hid_device *hid)
 					drv->feature_mapping(hid, rep->field[i],
 							     rep->field[i]->usage + j);
 			}
+		}
 }
 
 static struct hid_input *hidinput_allocate(struct hid_device *hid)

From 8d6fa24bc8f6f4441c4bb29f6b7b44393a5c9319 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Wed, 11 Sep 2013 21:56:58 +0200
Subject: [PATCH 0022/1185] HID: multitouch: validate indexes details

When working on report indexes, always validate that they are in bounds.
Without this, a HID device could report a malicious feature report that
could trick the driver into a heap overflow:

[  634.885003] usb 1-1: New USB device found, idVendor=0596, idProduct=0500
...
[  676.469629] BUG kmalloc-192 (Tainted: G        W   ): Redzone overwritten

Note that we need to change the indexes from s8 to s16 as they can
be between -1 and 255.

CVE-2013-2897

Cc: stable@vger.kernel.org
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-multitouch.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index f4b77f4d5fb8..bb6fe3ee0030 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -101,9 +101,9 @@ struct mt_device {
 	unsigned last_slot_field;	/* the last field of a slot */
 	unsigned mt_report_id;	/* the report ID of the multitouch device */
 	unsigned pen_report_id;	/* the report ID of the pen device */
-	__s8 inputmode;		/* InputMode HID feature, -1 if non-existent */
-	__s8 inputmode_index;	/* InputMode HID feature index in the report */
-	__s8 maxcontact_report_id;	/* Maximum Contact Number HID feature,
+	__s16 inputmode;	/* InputMode HID feature, -1 if non-existent */
+	__s16 inputmode_index;	/* InputMode HID feature index in the report */
+	__s16 maxcontact_report_id;	/* Maximum Contact Number HID feature,
 				   -1 if non-existent */
 	__u8 num_received;	/* how many contacts we received */
 	__u8 num_expected;	/* expected last contact index */
@@ -317,20 +317,18 @@ static void mt_feature_mapping(struct hid_device *hdev,
 		struct hid_field *field, struct hid_usage *usage)
 {
 	struct mt_device *td = hid_get_drvdata(hdev);
-	int i;
 
 	switch (usage->hid) {
 	case HID_DG_INPUTMODE:
-		td->inputmode = field->report->id;
-		td->inputmode_index = 0; /* has to be updated below */
-
-		for (i=0; i < field->maxusage; i++) {
-			if (field->usage[i].hid == usage->hid) {
-				td->inputmode_index = i;
-				break;
-			}
+		/* Ignore if value index is out of bounds. */
+		if (usage->usage_index >= field->report_count) {
+			dev_err(&hdev->dev, "HID_DG_INPUTMODE out of range\n");
+			break;
 		}
 
+		td->inputmode = field->report->id;
+		td->inputmode_index = usage->usage_index;
+
 		break;
 	case HID_DG_CONTACTMAX:
 		td->maxcontact_report_id = field->report->id;
@@ -546,6 +544,10 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			mt_store_field(usage, td, hi);
 			return 1;
 		case HID_DG_CONTACTCOUNT:
+			/* Ignore if indexes are out of bounds. */
+			if (field->index >= field->report->maxfield ||
+			    usage->usage_index >= field->report_count)
+				return 1;
 			td->cc_index = field->index;
 			td->cc_value_index = usage->usage_index;
 			return 1;

From b73eb9e6927df1072e7420dedec4fb917d505fa5 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 28 Aug 2013 22:31:44 +0200
Subject: [PATCH 0023/1185] HID: sensor-hub: validate feature report details

A HID device could send a malicious feature report that would cause the
sensor-hub HID driver to read past the end of heap allocation, leaking
kernel memory contents to the caller.

CVE-2013-2898

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@kernel.org
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-sensor-hub.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index ca7498107327..aa34755ca205 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -221,7 +221,8 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
 
 	mutex_lock(&data->mutex);
 	report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT);
-	if (!report || (field_index >=  report->maxfield)) {
+	if (!report || (field_index >=  report->maxfield) ||
+	    report->field[field_index]->report_count < 1) {
 		ret = -EINVAL;
 		goto done_proc;
 	}

From 5e1624f591cc2acbc02d88ab6f1008a57cfb129f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 28 Aug 2013 22:31:52 +0200
Subject: [PATCH 0024/1185] HID: picolcd_core: validate output report details
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A HID device could send a malicious output report that would cause the
picolcd HID driver to trigger a NULL dereference during attr file writing.

[jkosina@suse.cz: changed

	report->maxfield < 1

to

	report->maxfield != 1

as suggested by Bruno].

CVE-2013-2899

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@kernel.org
Reviewed-by: Bruno Prémont <bonbons@linux-vserver.org>
Acked-by: Bruno Prémont <bonbons@linux-vserver.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-picolcd_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/hid-picolcd_core.c b/drivers/hid/hid-picolcd_core.c
index b48092d0e139..acbb021065ec 100644
--- a/drivers/hid/hid-picolcd_core.c
+++ b/drivers/hid/hid-picolcd_core.c
@@ -290,7 +290,7 @@ static ssize_t picolcd_operation_mode_store(struct device *dev,
 		buf += 10;
 		cnt -= 10;
 	}
-	if (!report)
+	if (!report || report->maxfield != 1)
 		return -EINVAL;
 
 	while (cnt > 0 && (buf[cnt-1] == '\n' || buf[cnt-1] == '\r'))

From 58c62d4474fbeda42851553a546e5cb860a036ea Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 28 Aug 2013 22:32:01 +0200
Subject: [PATCH 0025/1185] HID: check for NULL field when setting values

Defensively check that the field to be worked on is not NULL.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@kernel.org
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index eadcc85e5a62..5b3e4cf7ca6b 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1212,7 +1212,12 @@ EXPORT_SYMBOL_GPL(hid_output_report);
 
 int hid_set_field(struct hid_field *field, unsigned offset, __s32 value)
 {
-	unsigned size = field->report_size;
+	unsigned size;
+
+	if (!field)
+		return -1;
+
+	size = field->report_size;
 
 	hid_dump_input(field->report->device, field->usage + offset, value);
 

From e947bc2827cf608bd42ec9480e759766e5dfe09c Mon Sep 17 00:00:00 2001
From: Jonathan Hamilton <jonathan.hamilton@imgtec.com>
Date: Thu, 17 Jul 2014 15:54:44 -0700
Subject: [PATCH 0026/1185] video: adf: Cleanup sw_sync timeline at
 adf_device_destroy

If a sw_sync timeline was created by ADF (for drivers that do not implement
ops->complete_fence) we should clean it up when the ADF device is
destroyed.

Change-Id: Idd90180fcae56a87111f7d12bdd80190756a6b80
Signed-off-by: Jonathan Hamilton <jonathan.hamilton@imgtec.com>
---
 drivers/video/adf/adf.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/video/adf/adf.c b/drivers/video/adf/adf.c
index 231881c2b355..42c30c05826a 100644
--- a/drivers/video/adf/adf.c
+++ b/drivers/video/adf/adf.c
@@ -613,6 +613,10 @@ void adf_device_destroy(struct adf_device *dev)
 	}
 	mutex_destroy(&dev->post_lock);
 	mutex_destroy(&dev->client_lock);
+
+	if (dev->timeline)
+		sync_timeline_destroy(&dev->timeline->obj);
+
 	adf_obj_destroy(&dev->base, &adf_devices);
 }
 EXPORT_SYMBOL(adf_device_destroy);

From 30dc17b47c2f5b12279f1db8935ae2e6d35af8ea Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Tue, 4 Feb 2014 02:15:32 +0000
Subject: [PATCH 0027/1185] security: select correct default LSM_MMAP_MIN_ADDR
 on arm on arm64

Binaries compiled for arm may run on arm64 if CONFIG_COMPAT is
selected.  Set LSM_MMAP_MIN_ADDR to 32768 if ARM64 && COMPAT to
prevent selinux failures launching 32-bit static executables that
are mapped at 0x8000.

Signed-off-by: Colin Cross <ccross@android.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Eric Paris <eparis@redhat.com>
Acked-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from upstream 3.14 commit 530b099dfe8499d639e7fbcad28c4199e2a720c7)
Change-Id: I05d092d3539380e08e7daf0b9d2faae76147b72b
Signed-off-by: Dan Willemsen <dwillemsen@nvidia.com>
Reviewed-on: http://git-master/r/367837
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 security/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/Kconfig b/security/Kconfig
index e9c6ac724fef..beb86b500adf 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -103,7 +103,7 @@ config INTEL_TXT
 config LSM_MMAP_MIN_ADDR
 	int "Low address space for LSM to protect from user allocation"
 	depends on SECURITY && SECURITY_SELINUX
-	default 32768 if ARM
+	default 32768 if ARM || (ARM64 && COMPAT)
 	default 65536
 	help
 	  This is the portion of low virtual memory which should be protected

From 5f4a7e5e43293f27e0b5ff935b32ba303c89de59 Mon Sep 17 00:00:00 2001
From: Anson Jacob <ansonkuzhumbil@gmail.com>
Date: Tue, 1 Jul 2014 18:17:20 +0800
Subject: [PATCH 0028/1185] usb: gadget: f_audio_source: change max ISO packet
 size

Re-applying from
https://gitorious.org/shr/linux/commit/eb4c9d2db894c3492c0a848581bd4f6790f93d5f

Most USB-AUDIO devices are limited to 256 byte for max iso buffer size.
If a IN_EP_MAX_PACKET_SIZE is bigger than a USB-AUDIO device's max iso
buffer size, it will cause noise. This patch will prevent this case as
possibe by reducing packet size. When using 44.1khz, 2ch, 16bit audio
data, if max packet size is bigger than 176 bytes, it's no problem.

Credits to: Iliyan Malchev <malchev@google.com>

Change-Id: Ic2a1c19ea65d5fb42bf12926b51b255b465d7215
Signed-off-by: Anson Jacob <ansonkuzhumbil@gmail.com>
---
 drivers/usb/gadget/f_audio_source.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c
index 56dcf217cfe5..65760c42d422 100644
--- a/drivers/usb/gadget/f_audio_source.c
+++ b/drivers/usb/gadget/f_audio_source.c
@@ -24,7 +24,7 @@
 #define SAMPLE_RATE 44100
 #define FRAMES_PER_MSEC (SAMPLE_RATE / 1000)
 
-#define IN_EP_MAX_PACKET_SIZE 384
+#define IN_EP_MAX_PACKET_SIZE 256
 
 /* Number of requests to allocate */
 #define IN_EP_REQ_COUNT 4

From dc0cf1216b3db4617610ecee67185de0dade9552 Mon Sep 17 00:00:00 2001
From: Anson Jacob <ansonkuzhumbil@gmail.com>
Date: Mon, 23 Jun 2014 19:14:01 +0800
Subject: [PATCH 0029/1185] usb: gadget: f_audio_source: Fixed USB Audio Class
 Interface Descriptor

Fixed Android Issue #56549.

When both Vendor Class and Audio Class are activated for AOA 2.0,
the baInterfaceNr of the AudioControl Interface Descriptor points
to wrong interface numbers. They should be pointing to
Audio Control Device and Audio Streaming interfaces.

Replaced baInterfaceNr with the correct value.

Change-Id: Iaa083f3d97c1f0fc9481bf87852b2b51278a6351
Signed-off-by: Anson Jacob <ansonkuzhumbil@gmail.com>
---
 drivers/usb/gadget/f_audio_source.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c
index 65760c42d422..21ced13c83d8 100644
--- a/drivers/usb/gadget/f_audio_source.c
+++ b/drivers/usb/gadget/f_audio_source.c
@@ -580,12 +580,18 @@ audio_bind(struct usb_configuration *c, struct usb_function *f)
 		goto fail;
 	ac_interface_desc.bInterfaceNumber = status;
 
+	/* AUDIO_AC_INTERFACE */
+	ac_header_desc.baInterfaceNr[0] = status;
+
 	status = usb_interface_id(c, f);
 	if (status < 0)
 		goto fail;
 	as_interface_alt_0_desc.bInterfaceNumber = status;
 	as_interface_alt_1_desc.bInterfaceNumber = status;
 
+	/* AUDIO_AS_INTERFACE */
+	ac_header_desc.baInterfaceNr[1] = status;
+
 	status = -ENODEV;
 
 	/* allocate our endpoint */

From 42cfa82f8201f91cdb7539a6e15c6ee15eee7f73 Mon Sep 17 00:00:00 2001
From: Gavin Guo <gavin.guo@canonical.com>
Date: Fri, 18 Jul 2014 01:12:13 +0800
Subject: [PATCH 0030/1185] usb: Check if port status is equal to RxDetect

commit bb86cf569bbd7ad4dce581a37c7fbd748057e9dc upstream.

When using USB 3.0 pen drive with the [AMD] FCH USB XHCI Controller
[1022:7814], the second hotplugging will experience the USB 3.0 pen
drive is recognized as high-speed device. After bisecting the kernel,
I found the commit number 41e7e056cdc662f704fa9262e5c6e213b4ab45dd
(USB: Allow USB 3.0 ports to be disabled.) causes the bug. After doing
some experiments, the bug can be fixed by avoiding executing the function
hub_usb3_port_disable(). Because the port status with [AMD] FCH USB
XHCI Controlleris [1022:7814] is already in RxDetect
(I tried printing out the port status before setting to Disabled state),
it's reasonable to check the port status before really executing
hub_usb3_port_disable().

Fixes: 41e7e056cdc6 (USB: Allow USB 3.0 ports to be disabled.)
Signed-off-by: Gavin Guo <gavin.guo@canonical.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 46efdca96952..63c217053668 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -887,6 +887,25 @@ static int hub_usb3_port_disable(struct usb_hub *hub, int port1)
 	if (!hub_is_superspeed(hub->hdev))
 		return -EINVAL;
 
+	ret = hub_port_status(hub, port1, &portstatus, &portchange);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * USB controller Advanced Micro Devices, Inc. [AMD] FCH USB XHCI
+	 * Controller [1022:7814] will have spurious result making the following
+	 * usb 3.0 device hotplugging route to the 2.0 root hub and recognized
+	 * as high-speed device if we set the usb 3.0 port link state to
+	 * Disabled. Since it's already in USB_SS_PORT_LS_RX_DETECT state, we
+	 * check the state here to avoid the bug.
+	 */
+	if ((portstatus & USB_PORT_STAT_LINK_STATE) ==
+				USB_SS_PORT_LS_RX_DETECT) {
+		dev_dbg(&hub->ports[port1 - 1]->dev,
+			 "Not disabling port; link state is RxDetect\n");
+		return ret;
+	}
+
 	ret = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_SS_DISABLED);
 	if (ret)
 		return ret;

From 87f7b77e5fe55f27fd705b858eae9858ebdb2327 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Jul 2014 06:20:44 -0300
Subject: [PATCH 0031/1185] media: gspca_pac7302: Add new usb-id for Genius
 i-Look 317

commit 242841d3d71191348f98310e2d2001e1001d8630 upstream.

Tested-and-reported-by: yullaw <yullaw@mageia.cz>

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/pac7302.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/usb/gspca/pac7302.c b/drivers/media/usb/gspca/pac7302.c
index 6008c8d546a3..20d9c15a305d 100644
--- a/drivers/media/usb/gspca/pac7302.c
+++ b/drivers/media/usb/gspca/pac7302.c
@@ -945,6 +945,7 @@ static const struct usb_device_id device_table[] = {
 	{USB_DEVICE(0x093a, 0x2620)},
 	{USB_DEVICE(0x093a, 0x2621)},
 	{USB_DEVICE(0x093a, 0x2622), .driver_info = FL_VFLIP},
+	{USB_DEVICE(0x093a, 0x2623), .driver_info = FL_VFLIP},
 	{USB_DEVICE(0x093a, 0x2624), .driver_info = FL_VFLIP},
 	{USB_DEVICE(0x093a, 0x2625)},
 	{USB_DEVICE(0x093a, 0x2626)},

From 9ac5d53ca3d2c5b849a57227cc5b0c958692cfdb Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Mon, 7 Jul 2014 16:34:25 -0700
Subject: [PATCH 0032/1185] Drivers: hv: util: Fix a bug in the KVP code

commit 9bd2d0dfe4714dd5d7c09a93a5c9ea9e14ceb3fc upstream.

Add code to poll the channel since we process only one message
at a time and the host may not interrupt us. Also increase the
receive buffer size since some KVP messages are close to 8K bytes in size.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_kvp.c  | 14 ++++++++++++--
 drivers/hv/hv_util.c |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index ed50e9e83c61..0e8c1ea4dd53 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -111,6 +111,15 @@ kvp_work_func(struct work_struct *dummy)
 	kvp_respond_to_host(NULL, HV_E_FAIL);
 }
 
+static void poll_channel(struct vmbus_channel *channel)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&channel->inbound_lock, flags);
+	hv_kvp_onchannelcallback(channel);
+	spin_unlock_irqrestore(&channel->inbound_lock, flags);
+}
+
 static int kvp_handle_handshake(struct hv_kvp_msg *msg)
 {
 	int ret = 1;
@@ -139,7 +148,7 @@ static int kvp_handle_handshake(struct hv_kvp_msg *msg)
 		kvp_register(dm_reg_value);
 		kvp_transaction.active = false;
 		if (kvp_transaction.kvp_context)
-			hv_kvp_onchannelcallback(kvp_transaction.kvp_context);
+			poll_channel(kvp_transaction.kvp_context);
 	}
 	return ret;
 }
@@ -552,6 +561,7 @@ kvp_respond_to_host(struct hv_kvp_msg *msg_to_host, int error)
 
 	vmbus_sendpacket(channel, recv_buffer, buf_len, req_id,
 				VM_PKT_DATA_INBAND, 0);
+	poll_channel(channel);
 
 }
 
@@ -585,7 +595,7 @@ void hv_kvp_onchannelcallback(void *context)
 		return;
 	}
 
-	vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 2, &recvlen,
+	vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 4, &recvlen,
 			 &requestid);
 
 	if (recvlen > 0) {
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 2f561c5dfe24..64c778f7756f 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -279,7 +279,7 @@ static int util_probe(struct hv_device *dev,
 		(struct hv_util_service *)dev_id->driver_data;
 	int ret;
 
-	srv->recv_buffer = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
+	srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
 	if (!srv->recv_buffer)
 		return -ENOMEM;
 	if (srv->util_init) {

From 18d8867933b9df6b51afa3b5694d82dd88bb46e2 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@intel.com>
Date: Mon, 23 Jun 2014 17:42:44 +0200
Subject: [PATCH 0033/1185] Bluetooth: Ignore H5 non-link packets in non-active
 state

commit 48439d501e3d9e8634bdc0c418e066870039599d upstream.

When detecting a non-link packet, h5_reset_rx() frees the Rx skb.
Not returning after that will cause the upcoming h5_rx_payload()
call to dereference a now NULL Rx skb and trigger a kernel oops.

Signed-off-by: Loic Poulain <loic.poulain@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/hci_h5.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index b6154d5a07a5..db0be2fb05fe 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -406,6 +406,7 @@ static int h5_rx_3wire_hdr(struct hci_uart *hu, unsigned char c)
 	    H5_HDR_PKT_TYPE(hdr) != HCI_3WIRE_LINK_PKT) {
 		BT_ERR("Non-link packet received in non-active state");
 		h5_reset_rx(h5);
+		return 0;
 	}
 
 	h5->rx_func = h5_rx_payload;

From d609df085cf9333cfda02a398d74694e7f2a644c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 7 Jul 2014 15:28:51 +0200
Subject: [PATCH 0034/1185] fuse: handle large user and group ID

commit 233a01fa9c4c7c41238537e8db8434667ff28a2f upstream.

If the number in "user_id=N" or "group_id=N" mount options was larger than
INT_MAX then fuse returned EINVAL.

Fix this to handle all valid uid/gid values.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/inode.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index b5718516825b..39a986e1da9e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -461,6 +461,17 @@ static const match_table_t tokens = {
 	{OPT_ERR,			NULL}
 };
 
+static int fuse_match_uint(substring_t *s, unsigned int *res)
+{
+	int err = -ENOMEM;
+	char *buf = match_strdup(s);
+	if (buf) {
+		err = kstrtouint(buf, 10, res);
+		kfree(buf);
+	}
+	return err;
+}
+
 static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 {
 	char *p;
@@ -471,6 +482,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 	while ((p = strsep(&opt, ",")) != NULL) {
 		int token;
 		int value;
+		unsigned uv;
 		substring_t args[MAX_OPT_ARGS];
 		if (!*p)
 			continue;
@@ -494,18 +506,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 			break;
 
 		case OPT_USER_ID:
-			if (match_int(&args[0], &value))
+			if (fuse_match_uint(&args[0], &uv))
 				return 0;
-			d->user_id = make_kuid(current_user_ns(), value);
+			d->user_id = make_kuid(current_user_ns(), uv);
 			if (!uid_valid(d->user_id))
 				return 0;
 			d->user_id_present = 1;
 			break;
 
 		case OPT_GROUP_ID:
-			if (match_int(&args[0], &value))
+			if (fuse_match_uint(&args[0], &uv))
 				return 0;
-			d->group_id = make_kgid(current_user_ns(), value);
+			d->group_id = make_kgid(current_user_ns(), uv);
 			if (!gid_valid(d->group_id))
 				return 0;
 			d->group_id_present = 1;

From 9b87c4e58f2143ba9bc05ffff22d86d172e4f4ac Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 15 Jul 2014 11:05:12 -0400
Subject: [PATCH 0035/1185] tracing: Fix graph tracer with stack tracer on
 other archs

commit 5f8bf2d263a20b986225ae1ed7d6759dc4b93af9 upstream.

Running my ftrace tests on PowerPC, it failed the test that checks
if function_graph tracer is affected by the stack tracer. It was.
Looking into this, I found that the update_function_graph_func()
must be called even if the trampoline function is not changed.
This is because archs like PowerPC do not support ftrace_ops being
passed by assembly and instead uses a helper function (what the
trampoline function points to). Since this function is not changed
even when multiple ftrace_ops are added to the code, the test that
falls out before calling update_function_graph_func() will miss that
the update must still be done.

Call update_function_graph_function() for all calls to
update_ftrace_function()

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 797d3b91a30b..401d9bd1fe42 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -331,12 +331,12 @@ static void update_ftrace_function(void)
 		func = ftrace_ops_list_func;
 	}
 
+	update_function_graph_func();
+
 	/* If there's no change, then do nothing more here */
 	if (ftrace_trace_function == func)
 		return;
 
-	update_function_graph_func();
-
 	/*
 	 * If we are using the list function, it doesn't care
 	 * about the function_trace_ops.

From e250100beddf49178bd36886eea77b376a1e39bd Mon Sep 17 00:00:00 2001
From: "zhangwei(Jovi)" <jovi.zhangwei@huawei.com>
Date: Thu, 18 Jul 2013 16:31:05 +0800
Subject: [PATCH 0036/1185] tracing: Add ftrace_trace_stack into
 __trace_puts/__trace_bputs

commit 8abfb8727f4a724d31f9ccfd8013fbd16d539445 upstream.

Currently trace option stacktrace is not applicable for
trace_printk with constant string argument, the reason is
in __trace_puts/__trace_bputs ftrace_trace_stack is missing.

In contrast, when using trace_printk with non constant string
argument(will call into __trace_printk/__trace_bprintk), then
trace option stacktrace is workable, this inconstant result
will confuses users a lot.

Link: http://lkml.kernel.org/p/51E7A7C9.9040401@huawei.com

Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8fe92ce43f39..98a830d079b9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -423,6 +423,9 @@ int __trace_puts(unsigned long ip, const char *str, int size)
 	struct print_entry *entry;
 	unsigned long irq_flags;
 	int alloc;
+	int pc;
+
+	pc = preempt_count();
 
 	if (unlikely(tracing_selftest_running || tracing_disabled))
 		return 0;
@@ -432,7 +435,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
 	local_save_flags(irq_flags);
 	buffer = global_trace.trace_buffer.buffer;
 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
-					  irq_flags, preempt_count());
+					  irq_flags, pc);
 	if (!event)
 		return 0;
 
@@ -449,6 +452,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
 		entry->buf[size] = '\0';
 
 	__buffer_unlock_commit(buffer, event);
+	ftrace_trace_stack(buffer, irq_flags, 4, pc);
 
 	return size;
 }
@@ -466,6 +470,9 @@ int __trace_bputs(unsigned long ip, const char *str)
 	struct bputs_entry *entry;
 	unsigned long irq_flags;
 	int size = sizeof(struct bputs_entry);
+	int pc;
+
+	pc = preempt_count();
 
 	if (unlikely(tracing_selftest_running || tracing_disabled))
 		return 0;
@@ -473,7 +480,7 @@ int __trace_bputs(unsigned long ip, const char *str)
 	local_save_flags(irq_flags);
 	buffer = global_trace.trace_buffer.buffer;
 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
-					  irq_flags, preempt_count());
+					  irq_flags, pc);
 	if (!event)
 		return 0;
 
@@ -482,6 +489,7 @@ int __trace_bputs(unsigned long ip, const char *str)
 	entry->str			= str;
 
 	__buffer_unlock_commit(buffer, event);
+	ftrace_trace_stack(buffer, irq_flags, 4, pc);
 
 	return 1;
 }

From 48050b8a692fdf5eb71e4c6e445ca506ec6f1359 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 9 Jul 2014 09:22:54 +0800
Subject: [PATCH 0037/1185] hwmon: (da9055) Don't use dash in the name
 attribute

commit 6b00f440dd678d786389a7100a2e03fe44478431 upstream.

Dashes are not allowed in hwmon name attributes.
Use "da9055" instead of "da9055-hwmon".

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/da9055-hwmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/da9055-hwmon.c b/drivers/hwmon/da9055-hwmon.c
index 029ecabc4380..1b275a2881d6 100644
--- a/drivers/hwmon/da9055-hwmon.c
+++ b/drivers/hwmon/da9055-hwmon.c
@@ -204,7 +204,7 @@ static ssize_t da9055_hwmon_show_name(struct device *dev,
 				      struct device_attribute *devattr,
 				      char *buf)
 {
-	return sprintf(buf, "da9055-hwmon\n");
+	return sprintf(buf, "da9055\n");
 }
 
 static ssize_t show_label(struct device *dev,

From 4c6d5fb84ff7ad51e7bf91dbe60ed6fe670f252d Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 9 Jul 2014 09:18:59 +0800
Subject: [PATCH 0038/1185] hwmon: (da9052) Don't use dash in the name
 attribute

commit ee14b644daaa58afe1e91bb9ebd9cf1b18d1f5fa upstream.

Dashes are not allowed in hwmon name attributes.
Use "da9052" instead of "da9052-hwmon".

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/da9052-hwmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/da9052-hwmon.c b/drivers/hwmon/da9052-hwmon.c
index 960fac3fb166..48044b044b7a 100644
--- a/drivers/hwmon/da9052-hwmon.c
+++ b/drivers/hwmon/da9052-hwmon.c
@@ -194,7 +194,7 @@ static ssize_t da9052_hwmon_show_name(struct device *dev,
 				      struct device_attribute *devattr,
 				      char *buf)
 {
-	return sprintf(buf, "da9052-hwmon\n");
+	return sprintf(buf, "da9052\n");
 }
 
 static ssize_t show_label(struct device *dev,

From 0979b7169679cc91edcadd98ffbe389400bbc088 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 16 Jul 2014 17:40:31 -0700
Subject: [PATCH 0039/1185] hwmon: (adt7470) Fix writes to temperature limit
 registers

commit de12d6f4b10b21854441f5242dcb29ea96181e58 upstream.

Temperature limit registers are signed. Limits therefore need
to be clamped to (-128, 127) degrees C and not to (0, 255)
degrees C.

Without this fix, writing a limit of 128 degrees C sets the
actual limit to -128 degrees C.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/adt7470.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index 58637355c1f6..79610bdf1d35 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -515,7 +515,7 @@ static ssize_t set_temp_min(struct device *dev,
 		return -EINVAL;
 
 	temp = DIV_ROUND_CLOSEST(temp, 1000);
-	temp = clamp_val(temp, 0, 255);
+	temp = clamp_val(temp, -128, 127);
 
 	mutex_lock(&data->lock);
 	data->temp_min[attr->index] = temp;
@@ -549,7 +549,7 @@ static ssize_t set_temp_max(struct device *dev,
 		return -EINVAL;
 
 	temp = DIV_ROUND_CLOSEST(temp, 1000);
-	temp = clamp_val(temp, 0, 255);
+	temp = clamp_val(temp, -128, 127);
 
 	mutex_lock(&data->lock);
 	data->temp_max[attr->index] = temp;
@@ -826,7 +826,7 @@ static ssize_t set_pwm_tmin(struct device *dev,
 		return -EINVAL;
 
 	temp = DIV_ROUND_CLOSEST(temp, 1000);
-	temp = clamp_val(temp, 0, 255);
+	temp = clamp_val(temp, -128, 127);
 
 	mutex_lock(&data->lock);
 	data->pwm_tmin[attr->index] = temp;

From 125a0039d6eaee7b9e65be765f704d9278e7ccff Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@kpanic.de>
Date: Thu, 10 Jul 2014 03:29:39 -0700
Subject: [PATCH 0040/1185] igb: do a reset on SR-IOV re-init if device is down

commit 76252723e88681628a3dbb9c09c963e095476f73 upstream.

To properly re-initialize SR-IOV it is necessary to reset the device
even if it is already down. Not doing this may result in Tx unit hangs.

Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 64cbe0dfe043..4d3c8122e2aa 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -7229,6 +7229,8 @@ static int igb_sriov_reinit(struct pci_dev *dev)
 
 	if (netif_running(netdev))
 		igb_close(netdev);
+	else
+		igb_reset(adapter);
 
 	igb_clear_interrupt_scheme(adapter);
 

From 44a5342ff8de29043144129e27abce1f201fc774 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 25 Jun 2014 09:12:30 +0300
Subject: [PATCH 0041/1185] iwlwifi: dvm: don't enable CTS to self

commit 43d826ca5979927131685cc2092c7ce862cb91cd upstream.

We should always prefer to use full RTS protection. Using
CTS to self gives a meaningless improvement, but this flow
is much harder for the firmware which is likely to have
issues with it.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/dvm/rxon.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c
index cd1ad0019185..ca17e4c9eca2 100644
--- a/drivers/net/wireless/iwlwifi/dvm/rxon.c
+++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c
@@ -1072,13 +1072,6 @@ int iwlagn_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *ctx)
 	/* recalculate basic rates */
 	iwl_calc_basic_rates(priv, ctx);
 
-	/*
-	 * force CTS-to-self frames protection if RTS-CTS is not preferred
-	 * one aggregation protection method
-	 */
-	if (!priv->hw_params.use_rts_for_aggregation)
-		ctx->staging.flags |= RXON_FLG_SELF_CTS_EN;
-
 	if ((ctx->vif && ctx->vif->bss_conf.use_short_slot) ||
 	    !(ctx->staging.flags & RXON_FLG_BAND_24G_MSK))
 		ctx->staging.flags |= RXON_FLG_SHORT_SLOT_MSK;
@@ -1484,11 +1477,6 @@ void iwlagn_bss_info_changed(struct ieee80211_hw *hw,
 	else
 		ctx->staging.flags &= ~RXON_FLG_TGG_PROTECT_MSK;
 
-	if (bss_conf->use_cts_prot)
-		ctx->staging.flags |= RXON_FLG_SELF_CTS_EN;
-	else
-		ctx->staging.flags &= ~RXON_FLG_SELF_CTS_EN;
-
 	memcpy(ctx->staging.bssid_addr, bss_conf->bssid, ETH_ALEN);
 
 	if (vif->type == NL80211_IFTYPE_AP ||

From 1ccc3ffad12489d90994243be03017ff6e78ef51 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Mon, 23 Jun 2014 13:22:06 -0700
Subject: [PATCH 0042/1185] shmem: fix faulting into a hole while it's punched

commit f00cdc6df7d7cfcabb5b740911e6788cb0802bdb upstream.

Trinity finds that mmap access to a hole while it's punched from shmem
can prevent the madvise(MADV_REMOVE) or fallocate(FALLOC_FL_PUNCH_HOLE)
from completing, until the reader chooses to stop; with the puncher's
hold on i_mutex locking out all other writers until it can complete.

It appears that the tmpfs fault path is too light in comparison with its
hole-punching path, lacking an i_data_sem to obstruct it; but we don't
want to slow down the common case.

Extend shmem_fallocate()'s existing range notification mechanism, so
shmem_fault() can refrain from faulting pages into the hole while it's
punched, waiting instead on i_mutex (when safe to sleep; or repeatedly
faulting when not).

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/shmem.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 509b393eceeb..61cf45c343e6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -80,11 +80,12 @@ static struct vfsmount *shm_mnt;
 #define SHORT_SYMLINK_LEN 128
 
 /*
- * shmem_fallocate and shmem_writepage communicate via inode->i_private
- * (with i_mutex making sure that it has only one user at a time):
- * we would prefer not to enlarge the shmem inode just for that.
+ * shmem_fallocate communicates with shmem_fault or shmem_writepage via
+ * inode->i_private (with i_mutex making sure that it has only one user at
+ * a time): we would prefer not to enlarge the shmem inode just for that.
  */
 struct shmem_falloc {
+	int	mode;		/* FALLOC_FL mode currently operating */
 	pgoff_t start;		/* start of range currently being fallocated */
 	pgoff_t next;		/* the next page offset to be fallocated */
 	pgoff_t nr_falloced;	/* how many new pages have been fallocated */
@@ -826,6 +827,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 			spin_lock(&inode->i_lock);
 			shmem_falloc = inode->i_private;
 			if (shmem_falloc &&
+			    !shmem_falloc->mode &&
 			    index >= shmem_falloc->start &&
 			    index < shmem_falloc->next)
 				shmem_falloc->nr_unswapped++;
@@ -1300,6 +1302,44 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	int error;
 	int ret = VM_FAULT_LOCKED;
 
+	/*
+	 * Trinity finds that probing a hole which tmpfs is punching can
+	 * prevent the hole-punch from ever completing: which in turn
+	 * locks writers out with its hold on i_mutex.  So refrain from
+	 * faulting pages into the hole while it's being punched, and
+	 * wait on i_mutex to be released if vmf->flags permits.
+	 */
+	if (unlikely(inode->i_private)) {
+		struct shmem_falloc *shmem_falloc;
+
+		spin_lock(&inode->i_lock);
+		shmem_falloc = inode->i_private;
+		if (!shmem_falloc ||
+		    shmem_falloc->mode != FALLOC_FL_PUNCH_HOLE ||
+		    vmf->pgoff < shmem_falloc->start ||
+		    vmf->pgoff >= shmem_falloc->next)
+			shmem_falloc = NULL;
+		spin_unlock(&inode->i_lock);
+		/*
+		 * i_lock has protected us from taking shmem_falloc seriously
+		 * once return from shmem_fallocate() went back up that stack.
+		 * i_lock does not serialize with i_mutex at all, but it does
+		 * not matter if sometimes we wait unnecessarily, or sometimes
+		 * miss out on waiting: we just need to make those cases rare.
+		 */
+		if (shmem_falloc) {
+			if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
+			   !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+				up_read(&vma->vm_mm->mmap_sem);
+				mutex_lock(&inode->i_mutex);
+				mutex_unlock(&inode->i_mutex);
+				return VM_FAULT_RETRY;
+			}
+			/* cond_resched? Leave that to GUP or return to user */
+			return VM_FAULT_NOPAGE;
+		}
+	}
+
 	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
 	if (error)
 		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
@@ -1817,18 +1857,26 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 
 	mutex_lock(&inode->i_mutex);
 
+	shmem_falloc.mode = mode & ~FALLOC_FL_KEEP_SIZE;
+
 	if (mode & FALLOC_FL_PUNCH_HOLE) {
 		struct address_space *mapping = file->f_mapping;
 		loff_t unmap_start = round_up(offset, PAGE_SIZE);
 		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
 
+		shmem_falloc.start = unmap_start >> PAGE_SHIFT;
+		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
+		spin_lock(&inode->i_lock);
+		inode->i_private = &shmem_falloc;
+		spin_unlock(&inode->i_lock);
+
 		if ((u64)unmap_end > (u64)unmap_start)
 			unmap_mapping_range(mapping, unmap_start,
 					    1 + unmap_end - unmap_start, 0);
 		shmem_truncate_range(inode, offset, offset + len - 1);
 		/* No need to unmap again: hole-punching leaves COWed pages */
 		error = 0;
-		goto out;
+		goto undone;
 	}
 
 	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */

From 887675c981bcefc567bd1f18352238d7ce1cf47a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Wed, 23 Jul 2014 14:00:10 -0700
Subject: [PATCH 0043/1185] shmem: fix faulting into a hole, not taking i_mutex

commit 8e205f779d1443a94b5ae81aa359cb535dd3021e upstream.

Commit f00cdc6df7d7 ("shmem: fix faulting into a hole while it's
punched") was buggy: Sasha sent a lockdep report to remind us that
grabbing i_mutex in the fault path is a no-no (write syscall may already
hold i_mutex while faulting user buffer).

We tried a completely different approach (see following patch) but that
proved inadequate: good enough for a rational workload, but not good
enough against trinity - which forks off so many mappings of the object
that contention on i_mmap_mutex while hole-puncher holds i_mutex builds
into serious starvation when concurrent faults force the puncher to fall
back to single-page unmap_mapping_range() searches of the i_mmap tree.

So return to the original umbrella approach, but keep away from i_mutex
this time.  We really don't want to bloat every shmem inode with a new
mutex or completion, just to protect this unlikely case from trinity.
So extend the original with wait_queue_head on stack at the hole-punch
end, and wait_queue item on the stack at the fault end.

This involves further use of i_lock to guard against the races: lockdep
has been happy so far, and I see fs/inode.c:unlock_new_inode() holds
i_lock around wake_up_bit(), which is comparable to what we do here.
i_lock is more convenient, but we could switch to shmem's info->lock.

This issue has been tagged with CVE-2014-4171, which will require commit
f00cdc6df7d7 and this and the following patch to be backported: we
suggest to 3.1+, though in fact the trinity forkbomb effect might go
back as far as 2.6.16, when madvise(,,MADV_REMOVE) came in - or might
not, since much has changed, with i_mmap_mutex a spinlock before 3.0.
Anyone running trinity on 3.0 and earlier? I don't think we need care.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Lukas Czerner <lczerner@redhat.com>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/shmem.c | 78 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 26 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 61cf45c343e6..3d26fedbd20e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -85,7 +85,7 @@ static struct vfsmount *shm_mnt;
  * a time): we would prefer not to enlarge the shmem inode just for that.
  */
 struct shmem_falloc {
-	int	mode;		/* FALLOC_FL mode currently operating */
+	wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
 	pgoff_t start;		/* start of range currently being fallocated */
 	pgoff_t next;		/* the next page offset to be fallocated */
 	pgoff_t nr_falloced;	/* how many new pages have been fallocated */
@@ -827,7 +827,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 			spin_lock(&inode->i_lock);
 			shmem_falloc = inode->i_private;
 			if (shmem_falloc &&
-			    !shmem_falloc->mode &&
+			    !shmem_falloc->waitq &&
 			    index >= shmem_falloc->start &&
 			    index < shmem_falloc->next)
 				shmem_falloc->nr_unswapped++;
@@ -1306,38 +1306,58 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * Trinity finds that probing a hole which tmpfs is punching can
 	 * prevent the hole-punch from ever completing: which in turn
 	 * locks writers out with its hold on i_mutex.  So refrain from
-	 * faulting pages into the hole while it's being punched, and
-	 * wait on i_mutex to be released if vmf->flags permits.
+	 * faulting pages into the hole while it's being punched.  Although
+	 * shmem_undo_range() does remove the additions, it may be unable to
+	 * keep up, as each new page needs its own unmap_mapping_range() call,
+	 * and the i_mmap tree grows ever slower to scan if new vmas are added.
+	 *
+	 * It does not matter if we sometimes reach this check just before the
+	 * hole-punch begins, so that one fault then races with the punch:
+	 * we just need to make racing faults a rare case.
+	 *
+	 * The implementation below would be much simpler if we just used a
+	 * standard mutex or completion: but we cannot take i_mutex in fault,
+	 * and bloating every shmem inode for this unlikely case would be sad.
 	 */
 	if (unlikely(inode->i_private)) {
 		struct shmem_falloc *shmem_falloc;
 
 		spin_lock(&inode->i_lock);
 		shmem_falloc = inode->i_private;
-		if (!shmem_falloc ||
-		    shmem_falloc->mode != FALLOC_FL_PUNCH_HOLE ||
-		    vmf->pgoff < shmem_falloc->start ||
-		    vmf->pgoff >= shmem_falloc->next)
-			shmem_falloc = NULL;
-		spin_unlock(&inode->i_lock);
-		/*
-		 * i_lock has protected us from taking shmem_falloc seriously
-		 * once return from shmem_fallocate() went back up that stack.
-		 * i_lock does not serialize with i_mutex at all, but it does
-		 * not matter if sometimes we wait unnecessarily, or sometimes
-		 * miss out on waiting: we just need to make those cases rare.
-		 */
-		if (shmem_falloc) {
+		if (shmem_falloc &&
+		    shmem_falloc->waitq &&
+		    vmf->pgoff >= shmem_falloc->start &&
+		    vmf->pgoff < shmem_falloc->next) {
+			wait_queue_head_t *shmem_falloc_waitq;
+			DEFINE_WAIT(shmem_fault_wait);
+
+			ret = VM_FAULT_NOPAGE;
 			if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
 			   !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+				/* It's polite to up mmap_sem if we can */
 				up_read(&vma->vm_mm->mmap_sem);
-				mutex_lock(&inode->i_mutex);
-				mutex_unlock(&inode->i_mutex);
-				return VM_FAULT_RETRY;
+				ret = VM_FAULT_RETRY;
 			}
-			/* cond_resched? Leave that to GUP or return to user */
-			return VM_FAULT_NOPAGE;
+
+			shmem_falloc_waitq = shmem_falloc->waitq;
+			prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
+					TASK_UNINTERRUPTIBLE);
+			spin_unlock(&inode->i_lock);
+			schedule();
+
+			/*
+			 * shmem_falloc_waitq points into the shmem_fallocate()
+			 * stack of the hole-punching task: shmem_falloc_waitq
+			 * is usually invalid by the time we reach here, but
+			 * finish_wait() does not dereference it in that case;
+			 * though i_lock needed lest racing with wake_up_all().
+			 */
+			spin_lock(&inode->i_lock);
+			finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
+			spin_unlock(&inode->i_lock);
+			return ret;
 		}
+		spin_unlock(&inode->i_lock);
 	}
 
 	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
@@ -1857,13 +1877,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 
 	mutex_lock(&inode->i_mutex);
 
-	shmem_falloc.mode = mode & ~FALLOC_FL_KEEP_SIZE;
-
 	if (mode & FALLOC_FL_PUNCH_HOLE) {
 		struct address_space *mapping = file->f_mapping;
 		loff_t unmap_start = round_up(offset, PAGE_SIZE);
 		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
+		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
 
+		shmem_falloc.waitq = &shmem_falloc_waitq;
 		shmem_falloc.start = unmap_start >> PAGE_SHIFT;
 		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
 		spin_lock(&inode->i_lock);
@@ -1875,8 +1895,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 					    1 + unmap_end - unmap_start, 0);
 		shmem_truncate_range(inode, offset, offset + len - 1);
 		/* No need to unmap again: hole-punching leaves COWed pages */
+
+		spin_lock(&inode->i_lock);
+		inode->i_private = NULL;
+		wake_up_all(&shmem_falloc_waitq);
+		spin_unlock(&inode->i_lock);
 		error = 0;
-		goto undone;
+		goto out;
 	}
 
 	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
@@ -1892,6 +1917,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		goto out;
 	}
 
+	shmem_falloc.waitq = NULL;
 	shmem_falloc.start = start;
 	shmem_falloc.next  = start;
 	shmem_falloc.nr_falloced = 0;

From 7dc7fb432bc92a988afb49e948218de575b7eb3f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Wed, 23 Jul 2014 14:00:13 -0700
Subject: [PATCH 0044/1185] shmem: fix splicing from a hole while it's punched

commit b1a366500bd537b50c3aad26dc7df083ec03a448 upstream.

shmem_fault() is the actual culprit in trinity's hole-punch starvation,
and the most significant cause of such problems: since a page faulted is
one that then appears page_mapped(), needing unmap_mapping_range() and
i_mmap_mutex to be unmapped again.

But it is not the only way in which a page can be brought into a hole in
the radix_tree while that hole is being punched; and Vlastimil's testing
implies that if enough other processors are busy filling in the hole,
then shmem_undo_range() can be kept from completing indefinitely.

shmem_file_splice_read() is the main other user of SGP_CACHE, which can
instantiate shmem pagecache pages in the read-only case (without holding
i_mutex, so perhaps concurrently with a hole-punch).  Probably it's
silly not to use SGP_READ already (using the ZERO_PAGE for holes): which
ought to be safe, but might bring surprises - not a change to be rushed.

shmem_read_mapping_page_gfp() is an internal interface used by
drivers/gpu/drm GEM (and next by uprobes): it should be okay.  And
shmem_file_read_iter() uses the SGP_DIRTY variant of SGP_CACHE, when
called internally by the kernel (perhaps for a stacking filesystem,
which might rely on holes to be reserved): it's unclear whether it could
be provoked to keep hole-punch busy or not.

We could apply the same umbrella as now used in shmem_fault() to
shmem_file_splice_read() and the others; but it looks ugly, and use over
a range raises questions - should it actually be per page? can these get
starved themselves?

The origin of this part of the problem is my v3.1 commit d0823576bf4b
("mm: pincer in truncate_inode_pages_range"), once it was duplicated
into shmem.c.  It seemed like a nice idea at the time, to ensure
(barring RCU lookup fuzziness) that there's an instant when the entire
hole is empty; but the indefinitely repeated scans to ensure that make
it vulnerable.

Revert that "enhancement" to hole-punch from shmem_undo_range(), but
retain the unproblematic rescanning when it's truncating; add a couple
of comments there.

Remove the "indices[0] >= end" test: that is now handled satisfactorily
by the inner loop, and mem_cgroup_uncharge_start()/end() are too light
to be worth avoiding here.

But if we do not always loop indefinitely, we do need to handle the case
of swap swizzled back to page before shmem_free_swap() gets it: add a
retry for that case, as suggested by Konstantin Khlebnikov; and for the
case of page swizzled back to swap, as suggested by Johannes Weiner.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Lukas Czerner <lczerner@redhat.com>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/shmem.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 3d26fedbd20e..16cc1d77f70a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -534,22 +534,19 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 		return;
 
 	index = start;
-	for ( ; ; ) {
+	while (index < end) {
 		cond_resched();
 		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
 				min(end - index, (pgoff_t)PAGEVEC_SIZE),
 							pvec.pages, indices);
 		if (!pvec.nr) {
-			if (index == start || unfalloc)
+			/* If all gone or hole-punch or unfalloc, we're done */
+			if (index == start || end != -1)
 				break;
+			/* But if truncating, restart to make sure all gone */
 			index = start;
 			continue;
 		}
-		if ((index == start || unfalloc) && indices[0] >= end) {
-			shmem_deswap_pagevec(&pvec);
-			pagevec_release(&pvec);
-			break;
-		}
 		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
@@ -561,8 +558,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			if (radix_tree_exceptional_entry(page)) {
 				if (unfalloc)
 					continue;
-				nr_swaps_freed += !shmem_free_swap(mapping,
-								index, page);
+				if (shmem_free_swap(mapping, index, page)) {
+					/* Swap was replaced by page: retry */
+					index--;
+					break;
+				}
+				nr_swaps_freed++;
 				continue;
 			}
 
@@ -571,6 +572,11 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 				if (page->mapping == mapping) {
 					VM_BUG_ON(PageWriteback(page));
 					truncate_inode_page(mapping, page);
+				} else {
+					/* Page was replaced by swap: retry */
+					unlock_page(page);
+					index--;
+					break;
 				}
 			}
 			unlock_page(page);

From 296692cab2e13d7bae70dd9ebfaf32eb36ec2793 Mon Sep 17 00:00:00 2001
From: Dmitry Popov <ixaphire@qrator.net>
Date: Sat, 5 Jul 2014 02:26:37 +0400
Subject: [PATCH 0045/1185] ip_tunnel: fix ip_tunnel_lookup

[ Upstream commit e0056593b61253f1a8a9941dacda22e73b963cdc ]

This patch fixes 3 similar bugs where incoming packets might be routed into
wrong non-wildcard tunnels:

1) Consider the following setup:
    ip address add 1.1.1.1/24 dev eth0
    ip address add 1.1.1.2/24 dev eth0
    ip tunnel add ipip1 remote 2.2.2.2 local 1.1.1.1 mode ipip dev eth0
    ip link set ipip1 up

Incoming ipip packets from 2.2.2.2 were routed into ipip1 even if it has dst =
1.1.1.2. Moreover even if there was wildcard tunnel like
   ip tunnel add ipip0 remote 2.2.2.2 local any mode ipip dev eth0
but it was created before explicit one (with local 1.1.1.1), incoming ipip
packets with src = 2.2.2.2 and dst = 1.1.1.2 were still routed into ipip1.

Same issue existed with all tunnels that use ip_tunnel_lookup (gre, vti)

2)  ip address add 1.1.1.1/24 dev eth0
    ip tunnel add ipip1 remote 2.2.146.85 local 1.1.1.1 mode ipip dev eth0
    ip link set ipip1 up

Incoming ipip packets with dst = 1.1.1.1 were routed into ipip1, no matter what
src address is. Any remote ip address which has ip_tunnel_hash = 0 raised this
issue, 2.2.146.85 is just an example, there are more than 4 million of them.
And again, wildcard tunnel like
   ip tunnel add ipip0 remote any local 1.1.1.1 mode ipip dev eth0
wouldn't be ever matched if it was created before explicit tunnel like above.

Gre & vti tunnels had the same issue.

3)  ip address add 1.1.1.1/24 dev eth0
    ip tunnel add gre1 remote 2.2.146.84 local 1.1.1.1 key 1 mode gre dev eth0
    ip link set gre1 up

Any incoming gre packet with key = 1 were routed into gre1, no matter what
src/dst addresses are. Any remote ip address which has ip_tunnel_hash = 0 raised
the issue, 2.2.146.84 is just an example, there are more than 4 million of them.
Wildcard tunnel like
   ip tunnel add gre2 remote any local any key 1 mode gre dev eth0
wouldn't be ever matched if it was created before explicit tunnel like above.

All this stuff happened because while looking for a wildcard tunnel we didn't
check that matched tunnel is a wildcard one. Fixed.

Signed-off-by: Dmitry Popov <ixaphire@qrator.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_tunnel.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fa6573264c8a..5642374cb751 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -166,6 +166,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
 
 	hlist_for_each_entry_rcu(t, head, hash_node) {
 		if (remote != t->parms.iph.daddr ||
+		    t->parms.iph.saddr != 0 ||
 		    !(t->dev->flags & IFF_UP))
 			continue;
 
@@ -182,10 +183,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
 	head = &itn->tunnels[hash];
 
 	hlist_for_each_entry_rcu(t, head, hash_node) {
-		if ((local != t->parms.iph.saddr &&
-		     (local != t->parms.iph.daddr ||
-		      !ipv4_is_multicast(local))) ||
-		    !(t->dev->flags & IFF_UP))
+		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
+		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
+			continue;
+
+		if (!(t->dev->flags & IFF_UP))
 			continue;
 
 		if (!ip_tunnel_key_match(&t->parms, flags, key))
@@ -202,6 +204,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
 
 	hlist_for_each_entry_rcu(t, head, hash_node) {
 		if (t->parms.i_key != key ||
+		    t->parms.iph.saddr != 0 ||
+		    t->parms.iph.daddr != 0 ||
 		    !(t->dev->flags & IFF_UP))
 			continue;
 

From 856443cb555a75b9700d3fabf5965b46337de199 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 18 Jun 2014 21:15:03 -0400
Subject: [PATCH 0046/1185] tcp: fix tcp_match_skb_to_sack() for unaligned SACK
 at end of an skb

[ Upstream commit 2cd0d743b05e87445c54ca124a9916f22f16742e ]

If there is an MSS change (or misbehaving receiver) that causes a SACK
to arrive that covers the end of an skb but is less than one MSS, then
tcp_match_skb_to_sack() was rounding up pkt_len to the full length of
the skb ("Round if necessary..."), then chopping all bytes off the skb
and creating a zero-byte skb in the write queue.

This was visible now because the recently simplified TLP logic in
bef1909ee3ed1c ("tcp: fixing TLP's FIN recovery") could find that 0-byte
skb at the end of the write queue, and now that we do not check that
skb's length we could send it as a TLP probe.

Consider the following example scenario:

 mss: 1000
 skb: seq: 0 end_seq: 4000  len: 4000
 SACK: start_seq: 3999 end_seq: 4000

The tcp_match_skb_to_sack() code will compute:

 in_sack = false
 pkt_len = start_seq - TCP_SKB_CB(skb)->seq = 3999 - 0 = 3999
 new_len = (pkt_len / mss) * mss = (3999/1000)*1000 = 3000
 new_len += mss = 4000

Previously we would find the new_len > skb->len check failing, so we
would fall through and set pkt_len = new_len = 4000 and chop off
pkt_len of 4000 from the 4000-byte skb, leaving a 0-byte segment
afterward in the write queue.

With this new commit, we notice that the new new_len >= skb->len check
succeeds, so that we return without trying to fragment.

Fixes: adb92db857ee ("tcp: Make SACK code to split only at mss boundaries")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Ilpo Jarvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ba7d2b7ad9f9..19104e321029 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1130,7 +1130,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 			unsigned int new_len = (pkt_len / mss) * mss;
 			if (!in_sack && new_len < pkt_len) {
 				new_len += mss;
-				if (new_len > skb->len)
+				if (new_len >= skb->len)
 					return 0;
 			}
 			pkt_len = new_len;

From e9013d0f0faef78f90f7bb30e722965fe992dc1e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 18 Jun 2014 23:46:31 +0200
Subject: [PATCH 0047/1185] net: sctp: check proc_dointvec result in
 proc_sctp_do_auth

[ Upstream commit 24599e61b7552673dd85971cf5a35369cd8c119e ]

When writing to the sysctl field net.sctp.auth_enable, it can well
be that the user buffer we handed over to proc_dointvec() via
proc_sctp_do_auth() handler contains something other than integers.

In that case, we would set an uninitialized 4-byte value from the
stack to net->sctp.auth_enable that can be leaked back when reading
the sysctl variable, and it can unintentionally turn auth_enable
on/off based on the stack content since auth_enable is interpreted
as a boolean.

Fix it up by making sure proc_dointvec() returned sucessfully.

Fixes: b14878ccb7fa ("net: sctp: cache auth_enable per endpoint")
Reported-by: Florian Westphal <fwestpha@redhat.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/sysctl.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index fe0ba7488bdf..29299dcabfbb 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -368,8 +368,7 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
 		tbl.data = &net->sctp.auth_enable;
 
 	ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
-
-	if (write) {
+	if (write && ret == 0) {
 		struct sock *sk = net->sctp.ctl_sock;
 
 		net->sctp.auth_enable = new_value;

From 1b56220b0df8f0963bacbf35637545b550484a64 Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Wed, 18 Jun 2014 13:46:02 +0800
Subject: [PATCH 0048/1185] 8021q: fix a potential memory leak

[ Upstream commit 916c1689a09bc1ca81f2d7a34876f8d35aadd11b ]

skb_cow called in vlan_reorder_header does not free the skb when it failed,
and vlan_reorder_header returns NULL to reset original skb when it is called
in vlan_untag, lead to a memory leak.

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/8021q/vlan_core.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 4a78c4de9f20..42ef36a85e69 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -103,8 +103,11 @@ EXPORT_SYMBOL(vlan_dev_vlan_id);
 
 static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
 {
-	if (skb_cow(skb, skb_headroom(skb)) < 0)
+	if (skb_cow(skb, skb_headroom(skb)) < 0) {
+		kfree_skb(skb);
 		return NULL;
+	}
+
 	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
 	skb->mac_header += VLAN_HLEN;
 	return skb;

From 86e48c03d774e01ccd71ecba4fc4b5c2bc0b5b41 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 24 Jun 2014 10:05:11 -0700
Subject: [PATCH 0049/1185] ipv4: fix dst race in sk_dst_get()

[ Upstream commit f88649721268999bdff09777847080a52004f691 ]

When IP route cache had been removed in linux-3.6, we broke assumption
that dst entries were all freed after rcu grace period. DST_NOCACHE
dst were supposed to be freed from dst_release(). But it appears
we want to keep such dst around, either in UDP sockets or tunnels.

In sk_dst_get() we need to make sure dst refcount is not 0
before incrementing it, or else we might end up freeing a dst
twice.

DST_NOCACHE set on a dst does not mean this dst can not be attached
to a socket or a tunnel.

Then, before actual freeing, we need to observe a rcu grace period
to make sure all other cpus can catch the fact the dst is no longer
usable.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dormando <dormando@rydia.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sock.h |  4 ++--
 net/core/dst.c     | 16 +++++++++++-----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 72f710d2f75a..ff57aff205cd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1727,8 +1727,8 @@ sk_dst_get(struct sock *sk)
 
 	rcu_read_lock();
 	dst = rcu_dereference(sk->sk_dst_cache);
-	if (dst)
-		dst_hold(dst);
+	if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+		dst = NULL;
 	rcu_read_unlock();
 	return dst;
 }
diff --git a/net/core/dst.c b/net/core/dst.c
index df9cc810ec8e..c0e021871df8 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -267,6 +267,15 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
 }
 EXPORT_SYMBOL(dst_destroy);
 
+static void dst_destroy_rcu(struct rcu_head *head)
+{
+	struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+
+	dst = dst_destroy(dst);
+	if (dst)
+		__dst_free(dst);
+}
+
 void dst_release(struct dst_entry *dst)
 {
 	if (dst) {
@@ -274,11 +283,8 @@ void dst_release(struct dst_entry *dst)
 
 		newrefcnt = atomic_dec_return(&dst->__refcnt);
 		WARN_ON(newrefcnt < 0);
-		if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
-			dst = dst_destroy(dst);
-			if (dst)
-				__dst_free(dst);
-		}
+		if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+			call_rcu(&dst->rcu_head, dst_destroy_rcu);
 	}
 }
 EXPORT_SYMBOL(dst_release);

From f1e1b06f19e1ddcebcee56ba33845ded7bf719ac Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 30 Jun 2014 01:26:23 -0700
Subject: [PATCH 0050/1185] ipv4: irq safe sk_dst_[re]set() and
 ipv4_sk_update_pmtu() fix

[ Upstream commit 7f502361531e9eecb396cf99bdc9e9a59f7ebd7f ]

We have two different ways to handle changes to sk->sk_dst

First way (used by TCP) assumes socket lock is owned by caller, and use
no extra lock : __sk_dst_set() & __sk_dst_reset()

Another way (used by UDP) uses sk_dst_lock because socket lock is not
always taken. Note that sk_dst_lock is not softirq safe.

These ways are not inter changeable for a given socket type.

ipv4_sk_update_pmtu(), added in linux-3.8, added a race, as it used
the socket lock as synchronization, but users might be UDP sockets.

Instead of converting sk_dst_lock to a softirq safe version, use xchg()
as we did for sk_rx_dst in commit e47eb5dfb296b ("udp: ipv4: do not use
sk_dst_lock from softirq context")

In a follow up patch, we probably can remove sk_dst_lock, as it is
only used in IPv6.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Fixes: 9cb3a50c5f63e ("ipv4: Invalidate the socket cached route on pmtu events if possible")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sock.h | 12 ++++++------
 net/ipv4/route.c   | 15 ++++++++-------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index ff57aff205cd..4d2358113da2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1767,9 +1767,11 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst)
 static inline void
 sk_dst_set(struct sock *sk, struct dst_entry *dst)
 {
-	spin_lock(&sk->sk_dst_lock);
-	__sk_dst_set(sk, dst);
-	spin_unlock(&sk->sk_dst_lock);
+	struct dst_entry *old_dst;
+
+	sk_tx_queue_clear(sk);
+	old_dst = xchg(&sk->sk_dst_cache, dst);
+	dst_release(old_dst);
 }
 
 static inline void
@@ -1781,9 +1783,7 @@ __sk_dst_reset(struct sock *sk)
 static inline void
 sk_dst_reset(struct sock *sk)
 {
-	spin_lock(&sk->sk_dst_lock);
-	__sk_dst_reset(sk);
-	spin_unlock(&sk->sk_dst_lock);
+	sk_dst_set(sk, NULL);
 }
 
 extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7256eef088b2..2b9887becb5c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -985,20 +985,21 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 	const struct iphdr *iph = (const struct iphdr *) skb->data;
 	struct flowi4 fl4;
 	struct rtable *rt;
-	struct dst_entry *dst;
+	struct dst_entry *odst = NULL;
 	bool new = false;
 
 	bh_lock_sock(sk);
-	rt = (struct rtable *) __sk_dst_get(sk);
+	odst = sk_dst_get(sk);
 
-	if (sock_owned_by_user(sk) || !rt) {
+	if (sock_owned_by_user(sk) || !odst) {
 		__ipv4_sk_update_pmtu(skb, sk, mtu);
 		goto out;
 	}
 
 	__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
 
-	if (!__sk_dst_check(sk, 0)) {
+	rt = (struct rtable *)odst;
+	if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
 		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
 		if (IS_ERR(rt))
 			goto out;
@@ -1008,8 +1009,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 
 	__ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
 
-	dst = dst_check(&rt->dst, 0);
-	if (!dst) {
+	if (!dst_check(&rt->dst, 0)) {
 		if (new)
 			dst_release(&rt->dst);
 
@@ -1021,10 +1021,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 	}
 
 	if (new)
-		__sk_dst_set(sk, &rt->dst);
+		sk_dst_set(sk, &rt->dst);
 
 out:
 	bh_unlock_sock(sk);
+	dst_release(odst);
 }
 EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
 

From 4d8eb541f3bed4daf65dc188e7aa0824b1ac0d75 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 Jul 2014 02:39:38 -0700
Subject: [PATCH 0051/1185] net: fix sparse warning in sk_dst_set()

[ Upstream commit 5925a0555bdaf0b396a84318cbc21ba085f6c0d3 ]

sk_dst_cache has __rcu annotation, so we need a cast to avoid
following sparse error :

include/net/sock.h:1774:19: warning: incorrect type in initializer (different address spaces)
include/net/sock.h:1774:19:    expected struct dst_entry [noderef] <asn:4>*__ret
include/net/sock.h:1774:19:    got struct dst_entry *dst

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Fixes: 7f502361531e ("ipv4: irq safe sk_dst_[re]set() and ipv4_sk_update_pmtu() fix")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 4d2358113da2..26b15c0780be 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1770,7 +1770,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst)
 	struct dst_entry *old_dst;
 
 	sk_tx_queue_clear(sk);
-	old_dst = xchg(&sk->sk_dst_cache, dst);
+	old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst);
 	dst_release(old_dst);
 }
 

From eb7e73eafa8fc5168bc734431f831c9f6aef134a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 26 Jun 2014 00:44:02 -0700
Subject: [PATCH 0052/1185] bnx2x: fix possible panic under memory stress

[ Upstream commit 07b0f00964def8af9321cfd6c4a7e84f6362f728 ]

While it is legal to kfree(NULL), it is not wise to use :
put_page(virt_to_head_page(NULL))

 BUG: unable to handle kernel paging request at ffffeba400000000
 IP: [<ffffffffc01f5928>] virt_to_head_page+0x36/0x44 [bnx2x]

Reported-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ariel Elior <ariel.elior@qlogic.com>
Fixes: d46d132cc021 ("bnx2x: use netdev_alloc_frag()")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 70be100feeb4..b04f7f128f49 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -745,7 +745,8 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
 
 		return;
 	}
-	bnx2x_frag_free(fp, new_data);
+	if (new_data)
+		bnx2x_frag_free(fp, new_data);
 drop:
 	/* drop the packet and keep the buffer in the bin */
 	DP(NETIF_MSG_RX_STATUS,

From 00be00119aa3e62aa234a9ccc03010b2504c1096 Mon Sep 17 00:00:00 2001
From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Sat, 28 Jun 2014 18:26:37 +0200
Subject: [PATCH 0053/1185] tcp: Fix divide by zero when pushing during
 tcp-repair

[ Upstream commit 5924f17a8a30c2ae18d034a86ee7581b34accef6 ]

When in repair-mode and TCP_RECV_QUEUE is set, we end up calling
tcp_push with mss_now being 0. If data is in the send-queue and
tcp_set_skb_tso_segs gets called, we crash because it will divide by
mss_now:

[  347.151939] divide error: 0000 [#1] SMP
[  347.152907] Modules linked in:
[  347.152907] CPU: 1 PID: 1123 Comm: packetdrill Not tainted 3.16.0-rc2 #4
[  347.152907] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[  347.152907] task: f5b88540 ti: f3c82000 task.ti: f3c82000
[  347.152907] EIP: 0060:[<c1601359>] EFLAGS: 00210246 CPU: 1
[  347.152907] EIP is at tcp_set_skb_tso_segs+0x49/0xa0
[  347.152907] EAX: 00000b67 EBX: f5acd080 ECX: 00000000 EDX: 00000000
[  347.152907] ESI: f5a28f40 EDI: f3c88f00 EBP: f3c83d10 ESP: f3c83d00
[  347.152907]  DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
[  347.152907] CR0: 80050033 CR2: 083158b0 CR3: 35146000 CR4: 000006b0
[  347.152907] Stack:
[  347.152907]  c167f9d9 f5acd080 000005b4 00000002 f3c83d20 c16013e6 f3c88f00 f5acd080
[  347.152907]  f3c83da0 c1603b5a f3c83d38 c10a0188 00000000 00000000 f3c83d84 c10acc85
[  347.152907]  c1ad5ec0 00000000 00000000 c1ad679c 010003e0 00000000 00000000 f3c88fc8
[  347.152907] Call Trace:
[  347.152907]  [<c167f9d9>] ? apic_timer_interrupt+0x2d/0x34
[  347.152907]  [<c16013e6>] tcp_init_tso_segs+0x36/0x50
[  347.152907]  [<c1603b5a>] tcp_write_xmit+0x7a/0xbf0
[  347.152907]  [<c10a0188>] ? up+0x28/0x40
[  347.152907]  [<c10acc85>] ? console_unlock+0x295/0x480
[  347.152907]  [<c10ad24f>] ? vprintk_emit+0x1ef/0x4b0
[  347.152907]  [<c1605716>] __tcp_push_pending_frames+0x36/0xd0
[  347.152907]  [<c15f4860>] tcp_push+0xf0/0x120
[  347.152907]  [<c15f7641>] tcp_sendmsg+0xf1/0xbf0
[  347.152907]  [<c116d920>] ? kmem_cache_free+0xf0/0x120
[  347.152907]  [<c106a682>] ? __sigqueue_free+0x32/0x40
[  347.152907]  [<c106a682>] ? __sigqueue_free+0x32/0x40
[  347.152907]  [<c114f0f0>] ? do_wp_page+0x3e0/0x850
[  347.152907]  [<c161c36a>] inet_sendmsg+0x4a/0xb0
[  347.152907]  [<c1150269>] ? handle_mm_fault+0x709/0xfb0
[  347.152907]  [<c15a006b>] sock_aio_write+0xbb/0xd0
[  347.152907]  [<c1180b79>] do_sync_write+0x69/0xa0
[  347.152907]  [<c1181023>] vfs_write+0x123/0x160
[  347.152907]  [<c1181d55>] SyS_write+0x55/0xb0
[  347.152907]  [<c167f0d8>] sysenter_do_call+0x12/0x28

This can easily be reproduced with the following packetdrill-script (the
"magic" with netem, sk_pacing and limit_output_bytes is done to prevent
the kernel from pushing all segments, because hitting the limit without
doing this is not so easy with packetdrill):

0   socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0  setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0

+0  bind(3, ..., ...) = 0
+0  listen(3, 1) = 0

+0  < S 0:0(0) win 32792 <mss 1460>
+0  > S. 0:0(0) ack 1 <mss 1460>
+0.1  < . 1:1(0) ack 1 win 65000

+0  accept(3, ..., ...) = 4

// This forces that not all segments of the snd-queue will be pushed
+0 `tc qdisc add dev tun0 root netem delay 10ms`
+0 `sysctl -w net.ipv4.tcp_limit_output_bytes=2`
+0 setsockopt(4, SOL_SOCKET, 47, [2], 4) = 0

+0 write(4,...,10000) = 10000
+0 write(4,...,10000) = 10000

// Set tcp-repair stuff, particularly TCP_RECV_QUEUE
+0 setsockopt(4, SOL_TCP, 19, [1], 4) = 0
+0 setsockopt(4, SOL_TCP, 20, [1], 4) = 0

// This now will make the write push the remaining segments
+0 setsockopt(4, SOL_SOCKET, 47, [20000], 4) = 0
+0 `sysctl -w net.ipv4.tcp_limit_output_bytes=130000`

// Now we will crash
+0 write(4,...,1000) = 1000

This happens since ec3423257508 (tcp: fix retransmission in repair
mode). Prior to that, the call to tcp_push was prevented by a check for
tp->repair.

The patch fixes it, by adding the new goto-label out_nopush. When exiting
tcp_sendmsg and a push is not required, which is the case for tp->repair,
we go to this label.

When repairing and calling send() with TCP_RECV_QUEUE, the data is
actually put in the receive-queue. So, no push is required because no
data has been added to the send-queue.

Cc: Andrew Vagin <avagin@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Fixes: ec3423257508 (tcp: fix retransmission in repair mode)
Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Acked-by: Andrew Vagin <avagin@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39bdb14b3214..5d4bd6ca3ab1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1065,7 +1065,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (unlikely(tp->repair)) {
 		if (tp->repair_queue == TCP_RECV_QUEUE) {
 			copied = tcp_send_rcvq(sk, msg, size);
-			goto out;
+			goto out_nopush;
 		}
 
 		err = -EINVAL;
@@ -1238,6 +1238,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 out:
 	if (copied)
 		tcp_push(sk, flags, mss_now, tp->nonagle);
+out_nopush:
 	release_sock(sk);
 	return copied + copied_syn;
 

From 08d9137a5e01f7c977b81feefc480da6ce9d7a4b Mon Sep 17 00:00:00 2001
From: Edward Allcutt <edward.allcutt@openmarket.com>
Date: Mon, 30 Jun 2014 16:16:02 +0100
Subject: [PATCH 0054/1185] ipv4: icmp: Fix pMTU handling for rare case

[ Upstream commit 68b7107b62983f2cff0948292429d5f5999df096 ]

Some older router implementations still send Fragmentation Needed
errors with the Next-Hop MTU field set to zero. This is explicitly
described as an eventuality that hosts must deal with by the
standard (RFC 1191) since older standards specified that those
bits must be zero.

Linux had a generic (for all of IPv4) implementation of the algorithm
described in the RFC for searching a list of MTU plateaus for a good
value. Commit 46517008e116 ("ipv4: Kill ip_rt_frag_needed().")
removed this as part of the changes to remove the routing cache.
Subsequently any Fragmentation Needed packet with a zero Next-Hop
MTU has been discarded without being passed to the per-protocol
handlers or notifying userspace for raw sockets.

When there is a router which does not implement RFC 1191 on an
MTU limited path then this results in stalled connections since
large packets are discarded and the local protocols are not
notified so they never attempt to lower the pMTU.

One example I have seen is an OpenBSD router terminating IPSec
tunnels. It's worth pointing out that this case is distinct from
the BSD 4.2 bug which incorrectly calculated the Next-Hop MTU
since the commit in question dismissed that as a valid concern.

All of the per-protocols handlers implement the simple approach from
RFC 1191 of immediately falling back to the minimum value. Although
this is sub-optimal it is vastly preferable to connections hanging
indefinitely.

Remove the Next-Hop MTU != 0 check and allow such packets
to follow the normal path.

Fixes: 46517008e116 ("ipv4: Kill ip_rt_frag_needed().")
Signed-off-by: Edward Allcutt <edward.allcutt@openmarket.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/icmp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 76e10b47e053..ea78ef5ac352 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -697,8 +697,6 @@ static void icmp_unreach(struct sk_buff *skb)
 					       &iph->daddr);
 			} else {
 				info = ntohs(icmph->un.frag.mtu);
-				if (!info)
-					goto out;
 			}
 			break;
 		case ICMP_SR_FAILED:

From c86572ab06fe9e97e3867e0d0b1af4f1aa763adb Mon Sep 17 00:00:00 2001
From: Bernd Wachter <bernd.wachter@jolla.com>
Date: Tue, 1 Jul 2014 22:01:09 +0300
Subject: [PATCH 0055/1185] net: qmi_wwan: Add ID for Telewell TW-LTE 4G v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 8dcb4b1526747d8431f9895e153dd478c9d16186 ]

There's a new version of the Telewell 4G modem working with, but not
recognized by this driver.

Signed-off-by: Bernd Wachter <bernd.wachter@jolla.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 6fb0082b3308..bf2e5c19b9be 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -721,6 +721,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x19d2, 0x1424, 2)},
 	{QMI_FIXED_INTF(0x19d2, 0x1425, 2)},
 	{QMI_FIXED_INTF(0x19d2, 0x1426, 2)},	/* ZTE MF91 */
+	{QMI_FIXED_INTF(0x19d2, 0x1428, 2)},	/* Telewell TW-LTE 4G v2 */
 	{QMI_FIXED_INTF(0x19d2, 0x2002, 4)},	/* ZTE (Vodafone) K3765-Z */
 	{QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)},    /* Sierra Wireless MC7700 */
 	{QMI_FIXED_INTF(0x114f, 0x68a2, 8)},    /* Sierra Wireless MC7750 */

From 2de8b0c1e08a88325380538d9dfdf9c42b281ead Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 17 Jul 2014 13:33:51 +0200
Subject: [PATCH 0056/1185] net: qmi_wwan: add two Sierra Wireless/Netgear
 devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 5343330010a892b76a97fd93ad3c455a4a32a7fb ]

Add two device IDs found in an out-of-tree driver downloadable
from Netgear.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index bf2e5c19b9be..6c584f8a2268 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -647,6 +647,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
 	{QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
 	{QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
+	{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
 	{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},	/* Huawei E1820 */
 	{QMI_FIXED_INTF(0x16d8, 0x6003, 0)},	/* CMOTech 6003 */
@@ -734,6 +735,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1199, 0x901f, 8)},    /* Sierra Wireless EM7355 */
 	{QMI_FIXED_INTF(0x1199, 0x9041, 8)},	/* Sierra Wireless MC7305/MC7355 */
 	{QMI_FIXED_INTF(0x1199, 0x9051, 8)},	/* Netgear AirCard 340U */
+	{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
 	{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},	/* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
 	{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},	/* Alcatel L800MA */
 	{QMI_FIXED_INTF(0x2357, 0x0201, 4)},	/* TP-LINK HSUPA Modem MA180 */

From 48b60bb7b53285622a808193334a0d558ebbea87 Mon Sep 17 00:00:00 2001
From: dingtianhong <dingtianhong@huawei.com>
Date: Wed, 2 Jul 2014 13:50:48 +0800
Subject: [PATCH 0057/1185] igmp: fix the problem when mc leave group

[ Upstream commit 52ad353a5344f1f700c5b777175bdfa41d3cd65a ]

The problem was triggered by these steps:

1) create socket, bind and then setsockopt for add mc group.
   mreq.imr_multiaddr.s_addr = inet_addr("255.0.0.37");
   mreq.imr_interface.s_addr = inet_addr("192.168.1.2");
   setsockopt(sockfd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq));

2) drop the mc group for this socket.
   mreq.imr_multiaddr.s_addr = inet_addr("255.0.0.37");
   mreq.imr_interface.s_addr = inet_addr("0.0.0.0");
   setsockopt(sockfd, IPPROTO_IP, IP_DROP_MEMBERSHIP, &mreq, sizeof(mreq));

3) and then drop the socket, I found the mc group was still used by the dev:

   netstat -g

   Interface       RefCnt Group
   --------------- ------ ---------------------
   eth2		   1	  255.0.0.37

Normally even though the IP_DROP_MEMBERSHIP return error, the mc group still need
to be released for the netdev when drop the socket, but this process was broken when
route default is NULL, the reason is that:

The ip_mc_leave_group() will choose the in_dev by the imr_interface.s_addr, if input addr
is NULL, the default route dev will be chosen, then the ifindex is got from the dev,
then polling the inet->mc_list and return -ENODEV, but if the default route dev is NULL,
the in_dev and ifIndex is both NULL, when polling the inet->mc_list, the mc group will be
released from the mc_list, but the dev didn't dec the refcnt for this mc group, so
when dropping the socket, the mc_list is NULL and the dev still keep this group.

v1->v2: According Hideaki's suggestion, we should align with IPv6 (RFC3493) and BSDs,
	so I add the checking for the in_dev before polling the mc_list, make sure when
	we remove the mc group, dec the refcnt to the real dev which was using the mc address.
	The problem would never happened again.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/igmp.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 089b4af4fecc..38d63ca8a6b5 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1874,6 +1874,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 
 	rtnl_lock();
 	in_dev = ip_mc_find_dev(net, imr);
+	if (!in_dev) {
+		ret = -ENODEV;
+		goto out;
+	}
 	ifindex = imr->imr_ifindex;
 	for (imlp = &inet->mc_list;
 	     (iml = rtnl_dereference(*imlp)) != NULL;
@@ -1891,16 +1895,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 
 		*imlp = iml->next_rcu;
 
-		if (in_dev)
-			ip_mc_dec_group(in_dev, group);
+		ip_mc_dec_group(in_dev, group);
 		rtnl_unlock();
 		/* decrease mem now to avoid the memleak warning */
 		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
 		kfree_rcu(iml, rcu);
 		return 0;
 	}
-	if (!in_dev)
-		ret = -ENODEV;
+out:
 	rtnl_unlock();
 	return ret;
 }

From 76fd3c89bb35027abbd483929d92267479f7346a Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 2 Jul 2014 12:07:16 -0700
Subject: [PATCH 0058/1185] tcp: fix false undo corner cases

[ Upstream commit 6e08d5e3c8236e7484229e46fdf92006e1dd4c49 ]

The undo code assumes that, upon entering loss recovery, TCP
1) always retransmit something
2) the retransmission never fails locally (e.g., qdisc drop)

so undo_marker is set in tcp_enter_recovery() and undo_retrans is
incremented only when tcp_retransmit_skb() is successful.

When the assumption is broken because TCP's cwnd is too small to
retransmit or the retransmit fails locally. The next (DUP)ACK
would incorrectly revert the cwnd and the congestion state in
tcp_try_undo_dsack() or tcp_may_undo(). Subsequent (DUP)ACKs
may enter the recovery state. The sender repeatedly enter and
(incorrectly) exit recovery states if the retransmits continue to
fail locally while receiving (DUP)ACKs.

The fix is to initialize undo_retrans to -1 and start counting on
the first retransmission. Always increment undo_retrans even if the
retransmissions fail locally because they couldn't cause DSACKs to
undo the cwnd reduction.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c  | 8 ++++----
 net/ipv4/tcp_output.c | 6 ++++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 19104e321029..ea7f52f3062d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1075,7 +1075,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 	}
 
 	/* D-SACK for already forgotten data... Do dumb counting. */
-	if (dup_sack && tp->undo_marker && tp->undo_retrans &&
+	if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
 	    !after(end_seq_0, prior_snd_una) &&
 	    after(end_seq_0, tp->undo_marker))
 		tp->undo_retrans--;
@@ -1154,7 +1154,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
 
 	/* Account D-SACK for retransmitted packet. */
 	if (dup_sack && (sacked & TCPCB_RETRANS)) {
-		if (tp->undo_marker && tp->undo_retrans &&
+		if (tp->undo_marker && tp->undo_retrans > 0 &&
 		    after(end_seq, tp->undo_marker))
 			tp->undo_retrans--;
 		if (sacked & TCPCB_SACKED_ACKED)
@@ -1850,7 +1850,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp)
 	tp->lost_out = 0;
 
 	tp->undo_marker = 0;
-	tp->undo_retrans = 0;
+	tp->undo_retrans = -1;
 }
 
 void tcp_clear_retrans(struct tcp_sock *tp)
@@ -2700,7 +2700,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 
 	tp->prior_ssthresh = 0;
 	tp->undo_marker = tp->snd_una;
-	tp->undo_retrans = tp->retrans_out;
+	tp->undo_retrans = tp->retrans_out ? : -1;
 
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		if (!ece_ack)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4a4e8746d1b2..56e29f0e230e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2428,13 +2428,15 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		if (!tp->retrans_stamp)
 			tp->retrans_stamp = TCP_SKB_CB(skb)->when;
 
-		tp->undo_retrans += tcp_skb_pcount(skb);
-
 		/* snd_nxt is stored to detect loss of retransmitted segment,
 		 * see tcp_input.c tcp_sacktag_write_queue().
 		 */
 		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
 	}
+
+	if (tp->undo_retrans < 0)
+		tp->undo_retrans = 0;
+	tp->undo_retrans += tcp_skb_pcount(skb);
 	return err;
 }
 

From d5f758a35b50da3c1629c18b149d23ef99c91f70 Mon Sep 17 00:00:00 2001
From: Andrey Utkin <andrey.krieger.utkin@gmail.com>
Date: Mon, 7 Jul 2014 23:22:50 +0300
Subject: [PATCH 0059/1185] appletalk: Fix socket referencing in skb

[ Upstream commit 36beddc272c111689f3042bf3d10a64d8a805f93 ]

Setting just skb->sk without taking its reference and setting a
destructor is invalid. However, in the places where this was done, skb
is used in a way not requiring skb->sk setting. So dropping the setting
of skb->sk.
Thanks to Eric Dumazet <eric.dumazet@gmail.com> for correct solution.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=79441
Reported-by: Ed Martin <edman007@edman007.com>
Signed-off-by: Andrey Utkin <andrey.krieger.utkin@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/appletalk/ddp.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 0018daccdea9..8799e171addf 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1489,8 +1489,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 
 	/* Queue packet (standard) */
-	skb->sk = sock;
-
 	if (sock_queue_rcv_skb(sock, skb) < 0)
 		goto drop;
 
@@ -1644,7 +1642,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 	if (!skb)
 		goto out;
 
-	skb->sk = sk;
 	skb_reserve(skb, ddp_dl->header_length);
 	skb_reserve(skb, dev->hard_header_len);
 	skb->dev = dev;

From 8a8d269dd25aa12c24c76c99efea9f63edb88a90 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 8 Jul 2014 10:49:43 +0200
Subject: [PATCH 0060/1185] net: mvneta: fix operation in 10 Mbit/s mode

[ Upstream commit 4d12bc63ab5e48c1d78fa13883cf6fefcea3afb1 ]

As reported by Maggie Mae Roxas, the mvneta driver doesn't behave
properly in 10 Mbit/s mode. This is due to a misconfiguration of the
MVNETA_GMAC_AUTONEG_CONFIG register: bit MVNETA_GMAC_CONFIG_MII_SPEED
must be set for a 100 Mbit/s speed, but cleared for a 10 Mbit/s speed,
which the driver was not properly doing. This commit adjusts that by
setting the MVNETA_GMAC_CONFIG_MII_SPEED bit only in 100 Mbit/s mode,
and relying on the fact that all the speed related bits of this
register are cleared at the beginning of the mvneta_adjust_link()
function.

This problem exists since c5aff18204da0 ("net: mvneta: driver for
Marvell Armada 370/XP network unit") which is the commit that
introduced the mvneta driver in the kernel.

Cc: <stable@vger.kernel.org> # v3.8+
Fixes: c5aff18204da0 ("net: mvneta: driver for Marvell Armada 370/XP network unit")
Reported-by: Maggie Mae Roxas <maggie.mae.roxas@gmail.com>
Cc: Maggie Mae Roxas <maggie.mae.roxas@gmail.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index a602aeeb3acb..dd33a112f474 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2306,7 +2306,7 @@ static void mvneta_adjust_link(struct net_device *ndev)
 
 			if (phydev->speed == SPEED_1000)
 				val |= MVNETA_GMAC_CONFIG_GMII_SPEED;
-			else
+			else if (phydev->speed == SPEED_100)
 				val |= MVNETA_GMAC_CONFIG_MII_SPEED;
 
 			mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);

From ba502e1e236f4043c849cd11c159ca783643a4de Mon Sep 17 00:00:00 2001
From: Thomas Fitzsimmons <fitzsim@fitzsim.org>
Date: Tue, 8 Jul 2014 19:44:07 -0400
Subject: [PATCH 0061/1185] net: mvneta: Fix big endian issue in
 mvneta_txq_desc_csum()

[ Upstream commit 0a1985879437d14bda8c90d0dae3455c467d7642 ]

This commit fixes the command value generated for CSUM calculation
when running in big endian mode.  The Ethernet protocol ID for IP was
being unconditionally byte-swapped in the layer 3 protocol check (with
swab16), which caused the mvneta driver to not function correctly in
big endian mode.  This patch byte-swaps the ID conditionally with
htons.

Cc: <stable@vger.kernel.org> # v3.13+
Signed-off-by: Thomas Fitzsimmons <fitzsim@fitzsim.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index dd33a112f474..658613021919 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1145,7 +1145,7 @@ static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto,
 	command =  l3_offs    << MVNETA_TX_L3_OFF_SHIFT;
 	command |= ip_hdr_len << MVNETA_TX_IP_HLEN_SHIFT;
 
-	if (l3_proto == swab16(ETH_P_IP))
+	if (l3_proto == htons(ETH_P_IP))
 		command |= MVNETA_TXD_IP_CSUM;
 	else
 		command |= MVNETA_TX_L3_IP6;

From c29503e9c04e142bdc14f25df4569486c9e67bd1 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 9 Jul 2014 10:31:22 -0700
Subject: [PATCH 0062/1185] netlink: Fix handling of error from netlink_dump().

[ Upstream commit ac30ef832e6af0505b6f0251a6659adcfa74975e ]

netlink_dump() returns a negative errno value on error.  Until now,
netlink_recvmsg() directly recorded that negative value in sk->sk_err, but
that's wrong since sk_err takes positive errno values.  (This manifests as
userspace receiving a positive return value from the recv() system call,
falsely indicating success.) This bug was introduced in the commit that
started checking the netlink_dump() return value, commit b44d211 (netlink:
handle errors from netlink_dump()).

Multithreaded Netlink dumps are one way to trigger this behavior in
practice, as described in the commit message for the userspace workaround
posted here:
    http://openvswitch.org/pipermail/dev/2014-June/042339.html

This commit also fixes the same bug in netlink_poll(), introduced in commit
cd1df525d (netlink: add flow control for memory mapped I/O).

Signed-off-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index be34adde692f..5ed562dfe743 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -500,7 +500,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
 		while (nlk->cb != NULL && netlink_dump_space(nlk)) {
 			err = netlink_dump(sk);
 			if (err < 0) {
-				sk->sk_err = err;
+				sk->sk_err = -err;
 				sk->sk_error_report(sk);
 				break;
 			}
@@ -2272,7 +2272,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
 		ret = netlink_dump(sk);
 		if (ret) {
-			sk->sk_err = ret;
+			sk->sk_err = -ret;
 			sk->sk_error_report(sk);
 		}
 	}

From c771cc33f93bac30415cdb6d4f9619261fbd1a9c Mon Sep 17 00:00:00 2001
From: Suresh Reddy <Suresh.Reddy@emulex.com>
Date: Fri, 11 Jul 2014 14:03:01 +0530
Subject: [PATCH 0063/1185] be2net: set EQ DB clear-intr bit in be_open()

[ Upstream commit 4cad9f3b61c7268fa89ab8096e23202300399b5d ]

On BE3, if the clear-interrupt bit of the EQ doorbell is not set the first
time it is armed, ocassionally we have observed that the EQ doesn't raise
anymore interrupts even if it is in armed state.
This patch fixes this by setting the clear-interrupt bit when EQs are
armed for the first time in be_open().

Signed-off-by: Suresh Reddy <Suresh.Reddy@emulex.com>
Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 7371626c56a1..d81a7dbfeef6 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2663,7 +2663,7 @@ static int be_open(struct net_device *netdev)
 
 	for_all_evt_queues(adapter, eqo, i) {
 		napi_enable(&eqo->napi);
-		be_eq_notify(adapter, eqo->q.id, true, false, 0);
+		be_eq_notify(adapter, eqo->q.id, true, true, 0);
 	}
 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
 

From 60008435941d4ad1a55763a9cb6d4e9c0e20f374 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Fri, 11 Jul 2014 08:45:27 -0400
Subject: [PATCH 0064/1185] tipc: clear 'next'-pointer of message fragments
 before reassembly

[ Upstream commit 999417549c16dd0e3a382aa9f6ae61688db03181 ]

If the 'next' pointer of the last fragment buffer in a message is not
zeroed before reassembly, we risk ending up with a corrupt message,
since the reassembly function itself isn't doing this.

Currently, when a buffer is retrieved from the deferred queue of the
broadcast link, the next pointer is not cleared, with the result as
described above.

This commit corrects this, and thereby fixes a bug that may occur when
long broadcast messages are transmitted across dual interfaces. The bug
has been present since 40ba3cdf542a469aaa9083fa041656e59b109b90 ("tipc:
message reassembly using fragment chain")

This commit should be applied to both net and net-next.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/tipc/bcast.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e5f3da507823..bf2755419ec6 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -531,6 +531,7 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
 
 		buf = node->bclink.deferred_head;
 		node->bclink.deferred_head = buf->next;
+		buf->next = NULL;
 		node->bclink.deferred_size--;
 		goto receive;
 	}

From b3b3ba5714ee9f77748223a0dbcf40b90e8e0773 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sat, 12 Jul 2014 20:30:35 +0200
Subject: [PATCH 0065/1185] net: sctp: fix information leaks in ulpevent layer

[ Upstream commit 8f2e5ae40ec193bc0a0ed99e95315c3eebca84ea ]

While working on some other SCTP code, I noticed that some
structures shared with user space are leaking uninitialized
stack or heap buffer. In particular, struct sctp_sndrcvinfo
has a 2 bytes hole between .sinfo_flags and .sinfo_ppid that
remains unfilled by us in sctp_ulpevent_read_sndrcvinfo() when
putting this into cmsg. But also struct sctp_remote_error
contains a 2 bytes hole that we don't fill but place into a skb
through skb_copy_expand() via sctp_ulpevent_make_remote_error().

Both structures are defined by the IETF in RFC6458:

* Section 5.3.2. SCTP Header Information Structure:

  The sctp_sndrcvinfo structure is defined below:

  struct sctp_sndrcvinfo {
    uint16_t sinfo_stream;
    uint16_t sinfo_ssn;
    uint16_t sinfo_flags;
    <-- 2 bytes hole  -->
    uint32_t sinfo_ppid;
    uint32_t sinfo_context;
    uint32_t sinfo_timetolive;
    uint32_t sinfo_tsn;
    uint32_t sinfo_cumtsn;
    sctp_assoc_t sinfo_assoc_id;
  };

* 6.1.3. SCTP_REMOTE_ERROR:

  A remote peer may send an Operation Error message to its peer.
  This message indicates a variety of error conditions on an
  association. The entire ERROR chunk as it appears on the wire
  is included in an SCTP_REMOTE_ERROR event. Please refer to the
  SCTP specification [RFC4960] and any extensions for a list of
  possible error formats. An SCTP error notification has the
  following format:

  struct sctp_remote_error {
    uint16_t sre_type;
    uint16_t sre_flags;
    uint32_t sre_length;
    uint16_t sre_error;
    <-- 2 bytes hole  -->
    sctp_assoc_t sre_assoc_id;
    uint8_t  sre_data[];
  };

Fix this by setting both to 0 before filling them out. We also
have other structures shared between user and kernel space in
SCTP that contains holes (e.g. struct sctp_paddrthlds), but we
copy that buffer over from user space first and thus don't need
to care about it in that cases.

While at it, we can also remove lengthy comments copied from
the draft, instead, we update the comment with the correct RFC
number where one can look it up.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/ulpevent.c | 122 ++++++--------------------------------------
 1 file changed, 15 insertions(+), 107 deletions(-)

diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 10c018a5b9fe..ca907f2f5e5a 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -373,9 +373,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
  * specification [SCTP] and any extensions for a list of possible
  * error formats.
  */
-struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
-	const struct sctp_association *asoc, struct sctp_chunk *chunk,
-	__u16 flags, gfp_t gfp)
+struct sctp_ulpevent *
+sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
+				struct sctp_chunk *chunk, __u16 flags,
+				gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_remote_error *sre;
@@ -394,8 +395,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
 	/* Copy the skb to a new skb with room for us to prepend
 	 * notification with.
 	 */
-	skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error),
-			      0, gfp);
+	skb = skb_copy_expand(chunk->skb, sizeof(*sre), 0, gfp);
 
 	/* Pull off the rest of the cause TLV from the chunk.  */
 	skb_pull(chunk->skb, elen);
@@ -406,62 +406,21 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
 	event = sctp_skb2event(skb);
 	sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
 
-	sre = (struct sctp_remote_error *)
-		skb_push(skb, sizeof(struct sctp_remote_error));
+	sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre));
 
 	/* Trim the buffer to the right length.  */
-	skb_trim(skb, sizeof(struct sctp_remote_error) + elen);
+	skb_trim(skb, sizeof(*sre) + elen);
 
-	/* Socket Extensions for SCTP
-	 * 5.3.1.3 SCTP_REMOTE_ERROR
-	 *
-	 * sre_type:
-	 *   It should be SCTP_REMOTE_ERROR.
-	 */
+	/* RFC6458, Section 6.1.3. SCTP_REMOTE_ERROR */
+	memset(sre, 0, sizeof(*sre));
 	sre->sre_type = SCTP_REMOTE_ERROR;
-
-	/*
-	 * Socket Extensions for SCTP
-	 * 5.3.1.3 SCTP_REMOTE_ERROR
-	 *
-	 * sre_flags: 16 bits (unsigned integer)
-	 *   Currently unused.
-	 */
 	sre->sre_flags = 0;
-
-	/* Socket Extensions for SCTP
-	 * 5.3.1.3 SCTP_REMOTE_ERROR
-	 *
-	 * sre_length: sizeof (__u32)
-	 *
-	 * This field is the total length of the notification data,
-	 * including the notification header.
-	 */
 	sre->sre_length = skb->len;
-
-	/* Socket Extensions for SCTP
-	 * 5.3.1.3 SCTP_REMOTE_ERROR
-	 *
-	 * sre_error: 16 bits (unsigned integer)
-	 * This value represents one of the Operational Error causes defined in
-	 * the SCTP specification, in network byte order.
-	 */
 	sre->sre_error = cause;
-
-	/* Socket Extensions for SCTP
-	 * 5.3.1.3 SCTP_REMOTE_ERROR
-	 *
-	 * sre_assoc_id: sizeof (sctp_assoc_t)
-	 *
-	 * The association id field, holds the identifier for the association.
-	 * All notifications for a given association have the same association
-	 * identifier.  For TCP style socket, this field is ignored.
-	 */
 	sctp_ulpevent_set_owner(event, asoc);
 	sre->sre_assoc_id = sctp_assoc2id(asoc);
 
 	return event;
-
 fail:
 	return NULL;
 }
@@ -906,7 +865,9 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event)
 	return notification->sn_header.sn_type;
 }
 
-/* Copy out the sndrcvinfo into a msghdr.  */
+/* RFC6458, Section 5.3.2. SCTP Header Information Structure
+ * (SCTP_SNDRCV, DEPRECATED)
+ */
 void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
 				   struct msghdr *msghdr)
 {
@@ -915,74 +876,21 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
 	if (sctp_ulpevent_is_notification(event))
 		return;
 
-	/* Sockets API Extensions for SCTP
-	 * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
-	 *
-	 * sinfo_stream: 16 bits (unsigned integer)
-	 *
-	 * For recvmsg() the SCTP stack places the message's stream number in
-	 * this value.
-	*/
+	memset(&sinfo, 0, sizeof(sinfo));
 	sinfo.sinfo_stream = event->stream;
-	/* sinfo_ssn: 16 bits (unsigned integer)
-	 *
-	 * For recvmsg() this value contains the stream sequence number that
-	 * the remote endpoint placed in the DATA chunk.  For fragmented
-	 * messages this is the same number for all deliveries of the message
-	 * (if more than one recvmsg() is needed to read the message).
-	 */
 	sinfo.sinfo_ssn = event->ssn;
-	/* sinfo_ppid: 32 bits (unsigned integer)
-	 *
-	 * In recvmsg() this value is
-	 * the same information that was passed by the upper layer in the peer
-	 * application.  Please note that byte order issues are NOT accounted
-	 * for and this information is passed opaquely by the SCTP stack from
-	 * one end to the other.
-	 */
 	sinfo.sinfo_ppid = event->ppid;
-	/* sinfo_flags: 16 bits (unsigned integer)
-	 *
-	 * This field may contain any of the following flags and is composed of
-	 * a bitwise OR of these values.
-	 *
-	 * recvmsg() flags:
-	 *
-	 * SCTP_UNORDERED - This flag is present when the message was sent
-	 *                 non-ordered.
-	 */
 	sinfo.sinfo_flags = event->flags;
-	/* sinfo_tsn: 32 bit (unsigned integer)
-	 *
-	 * For the receiving side, this field holds a TSN that was
-	 * assigned to one of the SCTP Data Chunks.
-	 */
 	sinfo.sinfo_tsn = event->tsn;
-	/* sinfo_cumtsn: 32 bit (unsigned integer)
-	 *
-	 * This field will hold the current cumulative TSN as
-	 * known by the underlying SCTP layer.  Note this field is
-	 * ignored when sending and only valid for a receive
-	 * operation when sinfo_flags are set to SCTP_UNORDERED.
-	 */
 	sinfo.sinfo_cumtsn = event->cumtsn;
-	/* sinfo_assoc_id: sizeof (sctp_assoc_t)
-	 *
-	 * The association handle field, sinfo_assoc_id, holds the identifier
-	 * for the association announced in the COMMUNICATION_UP notification.
-	 * All notifications for a given association have the same identifier.
-	 * Ignored for one-to-one style sockets.
-	 */
 	sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc);
-
-	/* context value that is set via SCTP_CONTEXT socket option. */
+	/* Context value that is set via SCTP_CONTEXT socket option. */
 	sinfo.sinfo_context = event->asoc->default_rcv_context;
-
 	/* These fields are not used while receiving. */
 	sinfo.sinfo_timetolive = 0;
 
 	put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV,
-		 sizeof(struct sctp_sndrcvinfo), (void *)&sinfo);
+		 sizeof(sinfo), &sinfo);
 }
 
 /* Do accounting for bytes received and hold a reference to the association

From dc1a6f415ed2405f4589b238d9b08a7d6613c8af Mon Sep 17 00:00:00 2001
From: Christoph Schulz <develop@kristov.de>
Date: Sun, 13 Jul 2014 00:53:15 +0200
Subject: [PATCH 0066/1185] net: pppoe: use correct channel MTU when using
 Multilink PPP

[ Upstream commit a8a3e41c67d24eb12f9ab9680cbb85e24fcd9711 ]

The PPP channel MTU is used with Multilink PPP when ppp_mp_explode() (see
ppp_generic module) tries to determine how big a fragment might be. According
to RFC 1661, the MTU excludes the 2-byte PPP protocol field, see the
corresponding comment and code in ppp_mp_explode():

		/*
		 * hdrlen includes the 2-byte PPP protocol field, but the
		 * MTU counts only the payload excluding the protocol field.
		 * (RFC1661 Section 2)
		 */
		mtu = pch->chan->mtu - (hdrlen - 2);

However, the pppoe module *does* include the PPP protocol field in the channel
MTU, which is wrong as it causes the PPP payload to be 1-2 bytes too big under
certain circumstances (one byte if PPP protocol compression is used, two
otherwise), causing the generated Ethernet packets to be dropped. So the pppoe
module has to subtract two bytes from the channel MTU. This error only
manifests itself when using Multilink PPP, as otherwise the channel MTU is not
used anywhere.

In the following, I will describe how to reproduce this bug. We configure two
pppd instances for multilink PPP over two PPPoE links, say eth2 and eth3, with
a MTU of 1492 bytes for each link and a MRRU of 2976 bytes. (This MRRU is
computed by adding the two link MTUs and subtracting the MP header twice, which
is 4 bytes long.) The necessary pppd statements on both sides are "multilink
mtu 1492 mru 1492 mrru 2976". On the client side, we additionally need "plugin
rp-pppoe.so eth2" and "plugin rp-pppoe.so eth3", respectively; on the server
side, we additionally need to start two pppoe-server instances to be able to
establish two PPPoE sessions, one over eth2 and one over eth3. We set the MTU
of the PPP network interface to the MRRU (2976) on both sides of the connection
in order to make use of the higher bandwidth. (If we didn't do that, IP
fragmentation would kick in, which we want to avoid.)

Now we send a ICMPv4 echo request with a payload of 2948 bytes from client to
server over the PPP link. This results in the following network packet:

   2948 (echo payload)
 +    8 (ICMPv4 header)
 +   20 (IPv4 header)
---------------------
   2976 (PPP payload)

These 2976 bytes do not exceed the MTU of the PPP network interface, so the
IP packet is not fragmented. Now the multilink PPP code in ppp_mp_explode()
prepends one protocol byte (0x21 for IPv4), making the packet one byte bigger
than the negotiated MRRU. So this packet would have to be divided in three
fragments. But this does not happen as each link MTU is assumed to be two bytes
larger. So this packet is diveded into two fragments only, one of size 1489 and
one of size 1488. Now we have for that bigger fragment:

   1489 (PPP payload)
 +    4 (MP header)
 +    2 (PPP protocol field for the MP payload (0x3d))
 +    6 (PPPoE header)
--------------------------
   1501 (Ethernet payload)

This packet exceeds the link MTU and is discarded.

If one configures the link MTU on the client side to 1501, one can see the
discarded Ethernet frames with tcpdump running on the client. A

ping -s 2948 -c 1 192.168.15.254

leads to the smaller fragment that is correctly received on the server side:

(tcpdump -vvvne -i eth3 pppoes and ppp proto 0x3d)
52:54:00:ad:87:fd > 52:54:00:79:5c:d0, ethertype PPPoE S (0x8864),
  length 1514: PPPoE  [ses 0x3] MLPPP (0x003d), length 1494: seq 0x000,
  Flags [end], length 1492

and to the bigger fragment that is not received on the server side:

(tcpdump -vvvne -i eth2 pppoes and ppp proto 0x3d)
52:54:00:70:9e:89 > 52:54:00:5d:6f:b0, ethertype PPPoE S (0x8864),
  length 1515: PPPoE  [ses 0x5] MLPPP (0x003d), length 1495: seq 0x000,
  Flags [begin], length 1493

With the patch below, we correctly obtain three fragments:

52:54:00:ad:87:fd > 52:54:00:79:5c:d0, ethertype PPPoE S (0x8864),
  length 1514: PPPoE  [ses 0x1] MLPPP (0x003d), length 1494: seq 0x000,
  Flags [begin], length 1492
52:54:00:70:9e:89 > 52:54:00:5d:6f:b0, ethertype PPPoE S (0x8864),
  length 1514: PPPoE  [ses 0x1] MLPPP (0x003d), length 1494: seq 0x000,
  Flags [none], length 1492
52:54:00:ad:87:fd > 52:54:00:79:5c:d0, ethertype PPPoE S (0x8864),
  length 27: PPPoE  [ses 0x1] MLPPP (0x003d), length 7: seq 0x000,
  Flags [end], length 5

And the ICMPv4 echo request is successfully received at the server side:

IP (tos 0x0, ttl 64, id 21925, offset 0, flags [DF], proto ICMP (1),
  length 2976)
    192.168.222.2 > 192.168.15.254: ICMP echo request, id 30530, seq 0,
      length 2956

The bug was introduced in commit c9aa6895371b2a257401f59d3393c9f7ac5a8698
("[PPPOE]: Advertise PPPoE MTU") from the very beginning. This patch applies
to 3.10 upwards but the fix can be applied (with minor modifications) to
kernels as old as 2.6.32.

Signed-off-by: Christoph Schulz <develop@kristov.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ppp/pppoe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 6839fb07a4c9..becfa3ef7fdc 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -675,7 +675,7 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 		po->chan.hdrlen = (sizeof(struct pppoe_hdr) +
 				   dev->hard_header_len);
 
-		po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr);
+		po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr) - 2;
 		po->chan.private = sk;
 		po->chan.ops = &pppoe_chan_ops;
 

From 1c81dac91e065e39413f8ff5d22b444087b0ed11 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Wed, 16 Jul 2014 10:02:26 -0400
Subject: [PATCH 0067/1185] sunvnet: clean up objects created in vnet_new() on
 vnet_exit()

[ Upstream commit a4b70a07ed12a71131cab7adce2ce91c71b37060 ]

Nothing cleans up the objects created by
vnet_new(), they are completely leaked.

vnet_exit(), after doing the vio_unregister_driver() to clean
up ports, should call a helper function that iterates over vnet_list
and cleans up those objects. This includes unregister_netdevice()
as well as free_netdev().

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Reviewed-by: Karl Volz <karl.volz@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/sun/sunvnet.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 3df56840a3b9..398faff8be7a 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -1083,6 +1083,24 @@ static struct vnet *vnet_find_or_create(const u64 *local_mac)
 	return vp;
 }
 
+static void vnet_cleanup(void)
+{
+	struct vnet *vp;
+	struct net_device *dev;
+
+	mutex_lock(&vnet_list_mutex);
+	while (!list_empty(&vnet_list)) {
+		vp = list_first_entry(&vnet_list, struct vnet, list);
+		list_del(&vp->list);
+		dev = vp->dev;
+		/* vio_unregister_driver() should have cleaned up port_list */
+		BUG_ON(!list_empty(&vp->port_list));
+		unregister_netdev(dev);
+		free_netdev(dev);
+	}
+	mutex_unlock(&vnet_list_mutex);
+}
+
 static const char *local_mac_prop = "local-mac-address";
 
 static struct vnet *vnet_find_parent(struct mdesc_handle *hp,
@@ -1240,7 +1258,6 @@ static int vnet_port_remove(struct vio_dev *vdev)
 
 		kfree(port);
 
-		unregister_netdev(vp->dev);
 	}
 	return 0;
 }
@@ -1268,6 +1285,7 @@ static int __init vnet_init(void)
 static void __exit vnet_exit(void)
 {
 	vio_unregister_driver(&vnet_port_driver);
+	vnet_cleanup();
 }
 
 module_init(vnet_init);

From 443ba0f457a6c5f2eeec64fe3f80efc7cbb10133 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20Sch=C3=B6lling?= <manuel.schoelling@gmx.de>
Date: Sat, 7 Jun 2014 23:57:25 +0200
Subject: [PATCH 0068/1185] dns_resolver: assure that dns_query() result is
 null-terminated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 84a7c0b1db1c17d5ded8d3800228a608e1070b40 ]

dns_query() credulously assumes that keys are null-terminated and
returns a copy of a memory block that is off by one.

Signed-off-by: Manuel Schölling <manuel.schoelling@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dns_resolver/dns_query.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index c32be292c7e3..ede0e2d7412e 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -150,7 +150,9 @@ int dns_query(const char *type, const char *name, size_t namelen,
 	if (!*_result)
 		goto put;
 
-	memcpy(*_result, upayload->data, len + 1);
+	memcpy(*_result, upayload->data, len);
+	*_result[len] = '\0';
+
 	if (_expiry)
 		*_expiry = rkey->expiry;
 

From 72a0a659b5b2aa6a8d3ade5a7fbd0578bd1dd749 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 21 Jul 2014 00:06:48 +0100
Subject: [PATCH 0069/1185] dns_resolver: Null-terminate the right string

[ Upstream commit 640d7efe4c08f06c4ae5d31b79bd8740e7f6790a ]

*_result[len] is parsed as *(_result[len]) which is not at all what we
want to touch here.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: 84a7c0b1db1c ("dns_resolver: assure that dns_query() result is null-terminated")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/dns_resolver/dns_query.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index ede0e2d7412e..2022b46ab38f 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -151,7 +151,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
 		goto put;
 
 	memcpy(*_result, upayload->data, len);
-	*_result[len] = '\0';
+	(*_result)[len] = '\0';
 
 	if (_expiry)
 		*_expiry = rkey->expiry;

From 36b526620dcc8e01330964ef88c1ec5217027781 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 Jul 2014 07:17:42 +0200
Subject: [PATCH 0070/1185] ipv4: fix buffer overflow in ip_options_compile()

[ Upstream commit 10ec9472f05b45c94db3c854d22581a20b97db41 ]

There is a benign buffer overflow in ip_options_compile spotted by
AddressSanitizer[1] :

Its benign because we always can access one extra byte in skb->head
(because header is followed by struct skb_shared_info), and in this case
this byte is not even used.

[28504.910798] ==================================================================
[28504.912046] AddressSanitizer: heap-buffer-overflow in ip_options_compile
[28504.913170] Read of size 1 by thread T15843:
[28504.914026]  [<ffffffff81802f91>] ip_options_compile+0x121/0x9c0
[28504.915394]  [<ffffffff81804a0d>] ip_options_get_from_user+0xad/0x120
[28504.916843]  [<ffffffff8180dedf>] do_ip_setsockopt.isra.15+0x8df/0x1630
[28504.918175]  [<ffffffff8180ec60>] ip_setsockopt+0x30/0xa0
[28504.919490]  [<ffffffff8181e59b>] tcp_setsockopt+0x5b/0x90
[28504.920835]  [<ffffffff8177462f>] sock_common_setsockopt+0x5f/0x70
[28504.922208]  [<ffffffff817729c2>] SyS_setsockopt+0xa2/0x140
[28504.923459]  [<ffffffff818cfb69>] system_call_fastpath+0x16/0x1b
[28504.924722]
[28504.925106] Allocated by thread T15843:
[28504.925815]  [<ffffffff81804995>] ip_options_get_from_user+0x35/0x120
[28504.926884]  [<ffffffff8180dedf>] do_ip_setsockopt.isra.15+0x8df/0x1630
[28504.927975]  [<ffffffff8180ec60>] ip_setsockopt+0x30/0xa0
[28504.929175]  [<ffffffff8181e59b>] tcp_setsockopt+0x5b/0x90
[28504.930400]  [<ffffffff8177462f>] sock_common_setsockopt+0x5f/0x70
[28504.931677]  [<ffffffff817729c2>] SyS_setsockopt+0xa2/0x140
[28504.932851]  [<ffffffff818cfb69>] system_call_fastpath+0x16/0x1b
[28504.934018]
[28504.934377] The buggy address ffff880026382828 is located 0 bytes to the right
[28504.934377]  of 40-byte region [ffff880026382800, ffff880026382828)
[28504.937144]
[28504.937474] Memory state around the buggy address:
[28504.938430]  ffff880026382300: ........ rrrrrrrr rrrrrrrr rrrrrrrr
[28504.939884]  ffff880026382400: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28504.941294]  ffff880026382500: .....rrr rrrrrrrr rrrrrrrr rrrrrrrr
[28504.942504]  ffff880026382600: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28504.943483]  ffff880026382700: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28504.944511] >ffff880026382800: .....rrr rrrrrrrr rrrrrrrr rrrrrrrr
[28504.945573]                         ^
[28504.946277]  ffff880026382900: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28505.094949]  ffff880026382a00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28505.096114]  ffff880026382b00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28505.097116]  ffff880026382c00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28505.098472]  ffff880026382d00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28505.099804] Legend:
[28505.100269]  f - 8 freed bytes
[28505.100884]  r - 8 redzone bytes
[28505.101649]  . - 8 allocated bytes
[28505.102406]  x=1..7 - x allocated bytes + (8-x) redzone bytes
[28505.103637] ==================================================================

[1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_options.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ec7264514a82..089ed81d1878 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -288,6 +288,10 @@ int ip_options_compile(struct net *net,
 			optptr++;
 			continue;
 		}
+		if (unlikely(l < 2)) {
+			pp_ptr = optptr;
+			goto error;
+		}
 		optlen = optptr[1];
 		if (optlen<2 || optlen>l) {
 			pp_ptr = optptr;

From 6b3f0da3d2555bb9a2c03865f4af3324f2b08f44 Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Date: Wed, 25 Jun 2014 10:09:07 +0900
Subject: [PATCH 0071/1185] perf/x86/intel: ignore CondChgd bit to avoid false
 NMI handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit b292d7a10487aee6e74b1c18b8d95b92f40d4a4f upstream.

Currently, any NMI is falsely handled by a NMI handler of NMI watchdog
if CondChgd bit in MSR_CORE_PERF_GLOBAL_STATUS MSR is set.

For example, we use external NMI to make system panic to get crash
dump, but in this case, the external NMI is falsely handled do to the
issue.

This commit deals with the issue simply by ignoring CondChgd bit.

Here is explanation in detail.

On x86 NMI watchdog uses performance monitoring feature to
periodically signal NMI each time performance counter gets overflowed.

intel_pmu_handle_irq() is called as a NMI_LOCAL handler from a NMI
handler of NMI watchdog, perf_event_nmi_handler(). It identifies an
owner of a given NMI by looking at overflow status bits in
MSR_CORE_PERF_GLOBAL_STATUS MSR. If some of the bits are set, then it
handles the given NMI as its own NMI.

The problem is that the intel_pmu_handle_irq() doesn't distinguish
CondChgd bit from other bits. Unlike the other status bits, CondChgd
bit doesn't represent overflow status for performance counters. Thus,
CondChgd bit cannot be thought of as a mark indicating a given NMI is
NMI watchdog's.

As a result, if CondChgd bit is set, any NMI is falsely handled by the
NMI handler of NMI watchdog. Also, if type of the falsely handled NMI
is either NMI_UNKNOWN, NMI_SERR or NMI_IO_CHECK, the corresponding
action is never performed until CondChgd bit is cleared.

I noticed this behavior on systems with Ivy Bridge processors: Intel
Xeon CPU E5-2630 v2 and Intel Xeon CPU E7-8890 v2. On both systems,
CondChgd bit in MSR_CORE_PERF_GLOBAL_STATUS MSR has already been set
in the beginning at boot. Then the CondChgd bit is immediately cleared
by next wrmsr to MSR_CORE_PERF_GLOBAL_CTRL MSR and appears to remain
0.

On the other hand, on older processors such as Nehalem, Xeon E7540,
CondChgd bit is not set in the beginning at boot.

I'm not sure about exact behavior of CondChgd bit, in particular when
this bit is set. Although I read Intel System Programmer's Manual to
figure out that, the descriptions I found are:

  In 18.9.1:

  "The MSR_PERF_GLOBAL_STATUS MSR also provides a ¡sticky bit¢ to
   indicate changes to the state of performancmonitoring hardware"

  In Table 35-2 IA-32 Architectural MSRs

  63 CondChg: status bits of this register has changed.

These are different from the bahviour I see on the actual system as I
explained above.

At least, I think ignoring CondChgd bit should be enough for NMI
watchdog perspective.

Signed-off-by: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20140625.103503.409316067.d.hatayama@jp.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a9e22073bd56..b45ac6affa9c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1198,6 +1198,15 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 
 	intel_pmu_lbr_read();
 
+	/*
+	 * CondChgd bit 63 doesn't mean any overflow status. Ignore
+	 * and clear the bit.
+	 */
+	if (__test_and_clear_bit(63, (unsigned long *)&status)) {
+		if (!status)
+			goto done;
+	}
+
 	/*
 	 * PEBS overflow sets bit 62 in the global status register
 	 */

From 8503df8d0c177e9e1c5468663b8954205ac069c9 Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <akarwar@marvell.com>
Date: Fri, 20 Jun 2014 11:45:25 -0700
Subject: [PATCH 0072/1185] mwifiex: fix Tx timeout issue

commit d76744a93246eccdca1106037e8ee29debf48277 upstream.

https://bugzilla.kernel.org/show_bug.cgi?id=70191
https://bugzilla.kernel.org/show_bug.cgi?id=77581

It is observed that sometimes Tx packet is downloaded without
adding driver's txpd header. This results in firmware parsing
garbage data as packet length. Sometimes firmware is unable
to read the packet if length comes out as invalid. This stops
further traffic and timeout occurs.

The root cause is uninitialized fields in tx_info(skb->cb) of
packet used to get garbage values. In this case if
MWIFIEX_BUF_FLAG_REQUEUED_PKT flag is mistakenly set, txpd
header was skipped. This patch makes sure that tx_info is
correctly initialized to fix the problem.

Reported-by: Andrew Wiley <wiley.andrew.j@gmail.com>
Reported-by: Linus Gasser <list@markas-al-nour.org>
Reported-by: Michael Hirsch <hirsch@teufel.de>
Tested-by: Xinming Hu <huxm@marvell.com>
Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: Maithili Hinge <maithili@marvell.com>
Signed-off-by: Avinash Patil <patila@marvell.com>
Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/mwifiex/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c
index fc3fe8ddcf62..83c61964d082 100644
--- a/drivers/net/wireless/mwifiex/main.c
+++ b/drivers/net/wireless/mwifiex/main.c
@@ -501,6 +501,7 @@ mwifiex_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	tx_info = MWIFIEX_SKB_TXCB(skb);
+	memset(tx_info, 0, sizeof(*tx_info));
 	tx_info->bss_num = priv->bss_num;
 	tx_info->bss_type = priv->bss_type;
 

From 16de9ea386e182600a473a57edde7579a24d4664 Mon Sep 17 00:00:00 2001
From: Martin Lau <kafai@fb.com>
Date: Mon, 9 Jun 2014 23:06:42 -0700
Subject: [PATCH 0073/1185] ring-buffer: Fix polling on trace_pipe

commit 97b8ee845393701edc06e27ccec2876ff9596019 upstream.

ring_buffer_poll_wait() should always put the poll_table to its wait_queue
even there is immediate data available.  Otherwise, the following epoll and
read sequence will eventually hang forever:

1. Put some data to make the trace_pipe ring_buffer read ready first
2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
3. epoll_wait()
4. read(trace_pipe_fd) till EAGAIN
5. Add some more data to the trace_pipe ring_buffer
6. epoll_wait() -> this epoll_wait() will block forever

~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
  ring_buffer_poll_wait() returns immediately without adding poll_table,
  which has poll_table->_qproc pointing to ep_poll_callback(), to its
  wait_queue.
~ During the epoll_wait() call in step 3 and step 6,
  ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
  because the poll_table->_qproc is NULL and it is how epoll works.
~ When there is new data available in step 6, ring_buffer does not know
  it has to call ep_poll_callback() because it is not in its wait queue.
  Hence, block forever.

Other poll implementation seems to call poll_wait() unconditionally as the very
first thing to do.  For example, tcp_poll() in tcp.c.

Link: http://lkml.kernel.org/p/20140610060637.GA14045@devbig242.prn2.facebook.com

Fixes: 2a2cc8f7c4d0 "ftrace: allow the event pipe to be polled"
Reviewed-by: Chris Mason <clm@fb.com>
Signed-off-by: Martin Lau <kafai@fb.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/ring_buffer.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8e94c1102636..4063d5fe5e44 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -616,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct rb_irq_work *work;
 
-	if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
-	    (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
-		return POLLIN | POLLRDNORM;
-
 	if (cpu == RING_BUFFER_ALL_CPUS)
 		work = &buffer->irq_work;
 	else {

From db9e4bf382abcd9b57a283084072131f6569a802 Mon Sep 17 00:00:00 2001
From: Matthias Brugger <matthias.bgg@gmail.com>
Date: Thu, 3 Jul 2014 13:58:52 +0200
Subject: [PATCH 0074/1185] irqchip: gic: Add support for cortex a7 compatible
 string

commit a97e8027b1d28eafe6bafe062556c1ec926a49c6 upstream.

Patch 0a68214b "ARM: DT: Add binding for GIC virtualization extentions (VGIC)" added
the "arm,cortex-a7-gic" compatible string, but the corresponding IRQCHIP_DECLARE
was never added to the gic driver.

To let real Cortex-A7 SoCs use it, add the necessary declaration to the device driver.

Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lkml.kernel.org/r/1404388732-28890-1-git-send-email-matthias.bgg@gmail.com
Fixes: 0a68214b76ca ("ARM: DT: Add binding for GIC virtualization extentions (VGIC)")
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/irqchip/irq-gic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4e11218d644e..493ca430f093 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -858,6 +858,7 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent)
 }
 IRQCHIP_DECLARE(cortex_a15_gic, "arm,cortex-a15-gic", gic_of_init);
 IRQCHIP_DECLARE(cortex_a9_gic, "arm,cortex-a9-gic", gic_of_init);
+IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init);
 IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init);
 IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init);
 

From 4003b69e90b58f35a60061e6f78a4d98a6ff2727 Mon Sep 17 00:00:00 2001
From: Tomasz Figa <t.figa@samsung.com>
Date: Thu, 17 Jul 2014 17:23:44 +0200
Subject: [PATCH 0075/1185] irqchip: gic: Fix core ID calculation when topology
 is read from DT

commit 29e697b11853d3f83b1864ae385abdad4aa2c361 upstream.

Certain GIC implementation, namely those found on earlier, single
cluster, Exynos SoCs, have registers mapped without per-CPU banking,
which means that the driver needs to use different offset for each CPU.

Currently the driver calculates the offset by multiplying value returned
by cpu_logical_map() by CPU offset parsed from DT. This is correct when
CPU topology is not specified in DT and aforementioned function returns
core ID alone. However when DT contains CPU topology, the function
changes to return cluster ID as well, which is non-zero on mentioned
SoCs and so breaks the calculation in GIC driver.

This patch fixes this by masking out cluster ID in CPU offset
calculation so that only core ID is considered. Multi-cluster Exynos
SoCs already have banked GIC implementations, so this simple fix should
be enough.

Reported-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reported-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Tomasz Figa <t.figa@samsung.com>
Fixes: db0d4db22a78d ("ARM: gic: allow GIC to support non-banked setups")
Link: https://lkml.kernel.org/r/1405610624-18722-1-git-send-email-t.figa@samsung.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/irqchip/irq-gic.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 493ca430f093..c8ee1cb023b8 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -42,6 +42,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqchip/arm-gic.h>
 
+#include <asm/cputype.h>
 #include <asm/irq.h>
 #include <asm/exception.h>
 #include <asm/smp_plat.h>
@@ -754,7 +755,9 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 		}
 
 		for_each_possible_cpu(cpu) {
-			unsigned long offset = percpu_offset * cpu_logical_map(cpu);
+			u32 mpidr = cpu_logical_map(cpu);
+			u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+			unsigned long offset = percpu_offset * core_id;
 			*per_cpu_ptr(gic->dist_base.percpu_base, cpu) = dist_base + offset;
 			*per_cpu_ptr(gic->cpu_base.percpu_base, cpu) = cpu_base + offset;
 		}

From e1bb259863012b0498678e0c319a9420d2108215 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 15 Jul 2014 09:48:53 -0400
Subject: [PATCH 0076/1185] drm/radeon: set default bl level to something
 reasonable

commit 201bb62402e0227375c655446ea04fcd0acf7287 upstream.

If the value in the scratch register is 0, set it to the
max level.  This fixes an issue where the console fb blanking
code calls back into the backlight driver on unblank and then
sets the backlight level to 0 after the driver has already
set the mode and enabled the backlight.

bugs:
https://bugs.freedesktop.org/show_bug.cgi?id=81382
https://bugs.freedesktop.org/show_bug.cgi?id=70207

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Tested-by: David Heidelberger <david.heidelberger@ixit.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/atombios_encoders.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 5802d7486354..1b564d7e4191 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -183,7 +183,6 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder,
 	struct backlight_properties props;
 	struct radeon_backlight_privdata *pdata;
 	struct radeon_encoder_atom_dig *dig;
-	u8 backlight_level;
 	char bl_name[16];
 
 	/* Mac laptops with multiple GPUs use the gmux driver for backlight
@@ -222,12 +221,17 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder,
 
 	pdata->encoder = radeon_encoder;
 
-	backlight_level = radeon_atom_get_backlight_level_from_reg(rdev);
-
 	dig = radeon_encoder->enc_priv;
 	dig->bl_dev = bd;
 
 	bd->props.brightness = radeon_atom_backlight_get_brightness(bd);
+	/* Set a reasonable default here if the level is 0 otherwise
+	 * fbdev will attempt to turn the backlight on after console
+	 * unblanking and it will try and restore 0 which turns the backlight
+	 * off again.
+	 */
+	if (bd->props.brightness == 0)
+		bd->props.brightness = RADEON_MAX_BL_LEVEL;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
 

From 1ed9cbc93c613efa69df58a1d4c8037adb105f43 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 12 May 2014 16:35:39 +0800
Subject: [PATCH 0077/1185] drm/qxl: return IRQ_NONE if it was not our irq

commit fbb60fe35ad579b511de8604b06a30b43846473b upstream.

Return IRQ_NONE if it was not our irq. This is necessary for the case
when qxl is sharing irq line with a device A in a crash kernel. If qxl
is initialized before A and A's irq was raised during this gap,
returning IRQ_HANDLED in this case will cause this irq to be raised
again after EOI since kernel think it was handled but in fact it was
not.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/qxl/qxl_irq.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c
index 21393dc4700a..f4b6b89b98f3 100644
--- a/drivers/gpu/drm/qxl/qxl_irq.c
+++ b/drivers/gpu/drm/qxl/qxl_irq.c
@@ -33,6 +33,9 @@ irqreturn_t qxl_irq_handler(DRM_IRQ_ARGS)
 
 	pending = xchg(&qdev->ram_header->int_pending, 0);
 
+	if (!pending)
+		return IRQ_NONE;
+
 	atomic_inc(&qdev->irq_received);
 
 	if (pending & QXL_INTERRUPT_DISPLAY) {

From c08ca3d473e8500ca0128688ad311f06594430d2 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 14 Jul 2014 17:57:19 -0400
Subject: [PATCH 0078/1185] drm/radeon: avoid leaking edid data

commit 0ac66effe7fcdee55bda6d5d10d3372c95a41920 upstream.

In some cases we fetch the edid in the detect() callback
in order to determine what sort of monitor is connected.
If that happens, don't fetch the edid again in the get_modes()
callback or we will leak the edid.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_display.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 06ccfe477650..a84de32a91f5 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -688,6 +688,10 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
 	struct radeon_device *rdev = dev->dev_private;
 	int ret = 0;
 
+	/* don't leak the edid if we already fetched it in detect() */
+	if (radeon_connector->edid)
+		goto got_edid;
+
 	/* on hw with routers, select right port */
 	if (radeon_connector->router.ddc_valid)
 		radeon_router_select_ddc_port(radeon_connector);
@@ -727,6 +731,7 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
 			radeon_connector->edid = radeon_bios_get_hardcoded_edid(rdev);
 	}
 	if (radeon_connector->edid) {
+got_edid:
 		drm_mode_connector_update_edid_property(&radeon_connector->base, radeon_connector->edid);
 		ret = drm_add_edid_modes(&radeon_connector->base, radeon_connector->edid);
 		drm_edid_to_eld(&radeon_connector->base, radeon_connector->edid);

From c933192733ddf436c578183ca0687c7db5fff468 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 7 Jul 2014 14:06:11 -0700
Subject: [PATCH 0079/1185] alarmtimer: Fix bug where relative alarm timers
 were treated as absolute

commit 16927776ae757d0d132bdbfabbfe2c498342bd59 upstream.

Sharvil noticed with the posix timer_settime interface, using the
CLOCK_REALTIME_ALARM or CLOCK_BOOTTIME_ALARM clockid, if the users
tried to specify a relative time timer, it would incorrectly be
treated as absolute regardless of the state of the flags argument.

This patch corrects this, properly checking the absolute/relative flag,
as well as adds further error checking that no invalid flag bits are set.

Reported-by: Sharvil Nanavati <sharvil@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Sharvil Nanavati <sharvil@google.com>
Link: http://lkml.kernel.org/r/1404767171-6902-1-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/time/alarmtimer.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index a8f5084dcde7..294bf4ef1f47 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -540,9 +540,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 				struct itimerspec *new_setting,
 				struct itimerspec *old_setting)
 {
+	ktime_t exp;
+
 	if (!rtcdev)
 		return -ENOTSUPP;
 
+	if (flags & ~TIMER_ABSTIME)
+		return -EINVAL;
+
 	if (old_setting)
 		alarm_timer_get(timr, old_setting);
 
@@ -552,8 +557,16 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 
 	/* start the timer */
 	timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
-	alarm_start(&timr->it.alarm.alarmtimer,
-			timespec_to_ktime(new_setting->it_value));
+	exp = timespec_to_ktime(new_setting->it_value);
+	/* Convert (if necessary) to absolute time */
+	if (flags != TIMER_ABSTIME) {
+		ktime_t now;
+
+		now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
+		exp = ktime_add(now, exp);
+	}
+
+	alarm_start(&timr->it.alarm.alarmtimer, exp);
 	return 0;
 }
 
@@ -685,6 +698,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
 	if (!alarmtimer_get_rtcdev())
 		return -ENOTSUPP;
 
+	if (flags & ~TIMER_ABSTIME)
+		return -EINVAL;
+
 	if (!capable(CAP_WAKE_ALARM))
 		return -EPERM;
 

From 4bbfb80c25ba29ba4fa1d0c7ed99f3c8b32a500f Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 14 Jul 2014 16:35:54 -0400
Subject: [PATCH 0080/1185] dm thin metadata: do not allow the data block size
 to change

commit 9aec8629ec829fc9403788cd959e05dd87988bd1 upstream.

The block size for the thin-pool's data device must remained fixed for
the life of the thin-pool.  Disallow any attempt to change the
thin-pool's data block size.

It should be noted that attempting to change the data block size via
thin-pool table reload will be ignored as a side-effect of the thin-pool
handover that the thin-pool target does during thin-pool table reload.

Here is an example outcome of attempting to load a thin-pool table that
reduced the thin-pool's data block size from 1024K to 512K.

Before:
kernel: device-mapper: thin: 253:4: growing the data device from 204800 to 409600 blocks

After:
kernel: device-mapper: thin metadata: changing the data block size (from 2048 to 1024) is not supported
kernel: device-mapper: table: 253:4: thin-pool: Error creating metadata object
kernel: device-mapper: ioctl: error adding target to table

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-thin-metadata.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 5f49d704f275..3b1503dc1f13 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -591,6 +591,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
 
 	disk_super = dm_block_data(sblock);
 
+	/* Verify the data block size hasn't changed */
+	if (le32_to_cpu(disk_super->data_block_size) != pmd->data_block_size) {
+		DMERR("changing the data block size (from %u to %llu) is not supported",
+		      le32_to_cpu(disk_super->data_block_size),
+		      (unsigned long long)pmd->data_block_size);
+		r = -EINVAL;
+		goto bad_unlock_sblock;
+	}
+
 	r = __check_incompat_features(disk_super, pmd);
 	if (r < 0)
 		goto bad_unlock_sblock;

From 8a09a31a13d9efce4e253e4facd63d631809b89e Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 14 Jul 2014 16:59:39 -0400
Subject: [PATCH 0081/1185] dm cache metadata: do not allow the data block size
 to change

commit 048e5a07f282c57815b3901d4a68a77fa131ce0a upstream.

The block size for the dm-cache's data device must remained fixed for
the life of the cache.  Disallow any attempt to change the cache's data
block size.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-cache-metadata.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index a33e07f4222e..de737ba1d351 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -384,6 +384,15 @@ static int __open_metadata(struct dm_cache_metadata *cmd)
 
 	disk_super = dm_block_data(sblock);
 
+	/* Verify the data block size hasn't changed */
+	if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) {
+		DMERR("changing the data block size (from %u to %llu) is not supported",
+		      le32_to_cpu(disk_super->data_block_size),
+		      (unsigned long long)cmd->data_block_size);
+		r = -EINVAL;
+		goto bad;
+	}
+
 	r = __check_incompat_features(disk_super, cmd);
 	if (r < 0)
 		goto bad;

From 804536e8e033d7917a1384b89d1e29a3457ec429 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 15 Jul 2014 08:51:27 +0200
Subject: [PATCH 0082/1185] PM / sleep: Fix request_firmware() error at resume

commit 4320f6b1d9db4ca912c5eb6ecb328b2e090e1586 upstream.

The commit [247bc037: PM / Sleep: Mitigate race between the freezer
and request_firmware()] introduced the finer state control, but it
also leads to a new bug; for example, a bug report regarding the
firmware loading of intel BT device at suspend/resume:
  https://bugzilla.novell.com/show_bug.cgi?id=873790

The root cause seems to be a small window between the process resume
and the clear of usermodehelper lock.  The request_firmware() function
checks the UMH lock and gives up when it's in UMH_DISABLE state.  This
is for avoiding the invalid  f/w loading during suspend/resume phase.
The problem is, however, that usermodehelper_enable() is called at the
end of thaw_processes().  Thus, a thawed process in between can kick
off the f/w loader code path (in this case, via btusb_setup_intel())
even before the call of usermodehelper_enable().  Then
usermodehelper_read_trylock() returns an error and request_firmware()
spews WARN_ON() in the end.

This oneliner patch fixes the issue just by setting to UMH_FREEZING
state again before restarting tasks, so that the call of
request_firmware() will be blocked until the end of this function
instead of returning an error.

Fixes: 247bc0374254 (PM / Sleep: Mitigate race between the freezer and request_firmware())
Link: https://bugzilla.novell.com/show_bug.cgi?id=873790
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/power/process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 98088e0e71e8..1b212bee1510 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -174,6 +174,7 @@ void thaw_processes(void)
 
 	printk("Restarting tasks ... ");
 
+	__usermodehelper_set_disable_depth(UMH_FREEZING);
 	thaw_workqueues();
 
 	read_lock(&tasklist_lock);

From e6be7d3115436b2527c60973c901ec3a7c6afe15 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 6 Jun 2014 19:53:16 +0200
Subject: [PATCH 0083/1185] locking/mutex: Disable optimistic spinning on some
 architectures

commit 4badad352a6bb202ec68afa7a574c0bb961e5ebc upstream.

The optimistic spin code assumes regular stores and cmpxchg() play nice;
this is found to not be true for at least: parisc, sparc32, tile32,
metag-lock1, arc-!llsc and hexagon.

There is further wreckage, but this in particular seemed easy to
trigger, so blacklist this.

Opt in for known good archs.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: David Miller <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: John David Anglin <dave.anglin@bell.net>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/20140606175316.GV13930@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/Kconfig     | 1 +
 arch/arm64/Kconfig   | 1 +
 arch/powerpc/Kconfig | 1 +
 arch/sparc/Kconfig   | 1 +
 arch/x86/Kconfig     | 1 +
 kernel/Kconfig.locks | 5 ++++-
 6 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 18a9f5ef643a..d41951246cd6 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -4,6 +4,7 @@ config ARM
 	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAVE_CUSTOM_GPIO_H
+	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT if MMU
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 56b3f6d447ae..0677ff4814fa 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,6 +1,7 @@
 config ARM64
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index fe404e77246e..7f656f119ea6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -138,6 +138,7 @@ config PPC
 	select ARCH_USE_BUILTIN_BSWAP
 	select OLD_SIGSUSPEND
 	select OLD_SIGACTION if PPC32
+	select ARCH_SUPPORTS_ATOMIC_RMW
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 2668b3142fa2..03a1bc3c3dde 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -77,6 +77,7 @@ config SPARC64
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select HAVE_C_RECORDMCOUNT
 	select NO_BOOTMEM
+	select ARCH_SUPPORTS_ATOMIC_RMW
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fe120da25625..af88b27ce313 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -121,6 +121,7 @@ config X86
 	select OLD_SIGACTION if X86_32
 	select COMPAT_OLD_SIGACTION if IA32_EMULATION
 	select RTC_LIB
+	select ARCH_SUPPORTS_ATOMIC_RMW
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 44511d100eaa..e4d30533c562 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -220,6 +220,9 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE
 
 endif
 
+config ARCH_SUPPORTS_ATOMIC_RMW
+	bool
+
 config MUTEX_SPIN_ON_OWNER
 	def_bool y
-	depends on SMP && !DEBUG_MUTEXES
+	depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW

From 4aba6e36347232a9e0cc2e9c8daf42a6bdcdad66 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mguzik@redhat.com>
Date: Sat, 14 Jun 2014 15:00:09 +0200
Subject: [PATCH 0084/1185] sched: Fix possible divide by zero in avg_atom()
 calculation

commit b0ab99e7736af88b8ac1b7ae50ea287fffa2badc upstream.

proc_sched_show_task() does:

  if (nr_switches)
	do_div(avg_atom, nr_switches);

nr_switches is unsigned long and do_div truncates it to 32 bits, which
means it can test non-zero on e.g. x86-64 and be truncated to zero for
division.

Fix the problem by using div64_ul() instead.

As a side effect calculations of avg_atom for big nr_switches are now correct.

Signed-off-by: Mateusz Guzik <mguzik@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1402750809-31991-1-git-send-email-mguzik@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sched/debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index e745a1548367..701b6c8a4b12 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -551,7 +551,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
-			do_div(avg_atom, nr_switches);
+			avg_atom = div64_ul(avg_atom, nr_switches);
 		else
 			avg_atom = -1LL;
 

From a290f3552cc7b68398df8bbca5290bad0867827b Mon Sep 17 00:00:00 2001
From: Anton Kolesov <Anton.Kolesov@synopsys.com>
Date: Fri, 20 Jun 2014 20:28:39 +0400
Subject: [PATCH 0085/1185] ARC: Implement ptrace(PTRACE_GET_THREAD_AREA)

commit a4b6cb735b25aa84a462a1985e3e43bebaf5beb4 upstream.

This patch adds implementation of GET_THREAD_AREA ptrace request type. This
is required by GDB to debug NPTL applications.

Signed-off-by: Anton Kolesov <Anton.Kolesov@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/uapi/asm/ptrace.h | 1 +
 arch/arc/kernel/ptrace.c           | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h
index 30333cec0fef..ef9d79a3db25 100644
--- a/arch/arc/include/uapi/asm/ptrace.h
+++ b/arch/arc/include/uapi/asm/ptrace.h
@@ -11,6 +11,7 @@
 #ifndef _UAPI__ASM_ARC_PTRACE_H
 #define _UAPI__ASM_ARC_PTRACE_H
 
+#define PTRACE_GET_THREAD_AREA	25
 
 #ifndef __ASSEMBLY__
 /*
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
index 0851604bb9cd..f8a36ed9e0d5 100644
--- a/arch/arc/kernel/ptrace.c
+++ b/arch/arc/kernel/ptrace.c
@@ -136,6 +136,10 @@ long arch_ptrace(struct task_struct *child, long request,
 	pr_debug("REQ=%ld: ADDR =0x%lx, DATA=0x%lx)\n", request, addr, data);
 
 	switch (request) {
+	case PTRACE_GET_THREAD_AREA:
+		ret = put_user(task_thread_info(child)->thr_ptr,
+			       (unsigned long __user *)data);
+		break;
 	default:
 		ret = ptrace_request(child, request, addr, data);
 		break;

From 92488f4c9f687cc0e274be561f7b168743f59f20 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 28 Jul 2014 08:00:59 -0700
Subject: [PATCH 0086/1185] Linux 3.10.50

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b8b8d33eab55..8d891c66803c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 49
+SUBLEVEL = 50
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From 687f999e1fbd3b553bccbd7f52996ae56c5e327e Mon Sep 17 00:00:00 2001
From: JP Abgrall <jpa@google.com>
Date: Wed, 23 Jul 2014 16:55:07 -0700
Subject: [PATCH 0087/1185] ext4: Add support for FIDTRIM, a best-effort ioctl
 for deep discard trim

* What
This provides an interface for issuing an FITRIM which uses the
secure discard instead of just a discard.
Only the eMMC command is "secure", and not how the FS uses it:
due to the fact that the FS might reassign a region somewhere else,
the original deleted data will not be affected by the "trim" which only
handles un-used regions.
So we'll just call it "deep discard", and note that this is a
"best effort" cleanup.

* Why
Once in a while, We want to be able to cleanup most of the unused blocks
after erasing a bunch of files.
We don't want to constantly secure-discard via a mount option.

From an eMMC spec perspective, it tells the device to really get rid of
all the data for the specified blocks and not just put them back into the
pool of free ones (unlike the normal TRIM). The eMMC spec says the
secure trim handling must make sure the data (and metadata) is not available
anymore. A simple TRIM doesn't clear the data, it just puts blocks in the
free pool.
JEDEC Standard No. 84-A441
  7.6.9 Secure Erase
  7.6.10 Secure Trim

From an FS perspective, it is acceptable to leave some data behind.
 - directory entries related to deleted files
 - databases entries related to deleted files
 - small-file data stored in inode extents
 - blocks held by the FS waiting to be re-used (mitigated by sync).
 - blocks reassigned by the FS prior to FIDTRIM.

Change-Id: I676a1404a80130d93930c84898360f2e6fb2f81e
Signed-off-by: Geremy Condra <gcondra@google.com>
Signed-off-by: JP Abgrall <jpa@google.com>
---
 fs/ext4/ext4.h          |  3 ++-
 fs/ext4/ioctl.c         |  6 +++++-
 fs/ext4/mballoc.c       | 28 ++++++++++++++++++----------
 include/uapi/linux/fs.h |  2 ++
 4 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5aae3d12d400..09ace38b451a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2050,7 +2050,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
 		ext4_group_t i, struct ext4_group_desc *desc);
 extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 				ext4_fsblk_t block, unsigned long count);
-extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+extern int ext4_trim_fs(struct super_block *, struct fstrim_range *,
+				unsigned long blkdev_flags);
 
 /* inode.c */
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 9491ac0590f7..d01a05593be5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -594,11 +594,13 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return err;
 	}
 
+	case FIDTRIM:
 	case FITRIM:
 	{
 		struct request_queue *q = bdev_get_queue(sb->s_bdev);
 		struct fstrim_range range;
 		int ret = 0;
+		int flags  = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0;
 
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -606,13 +608,15 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (!blk_queue_discard(q))
 			return -EOPNOTSUPP;
 
+		if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q))
+			return -EOPNOTSUPP;
 		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
 		    sizeof(range)))
 			return -EFAULT;
 
 		range.minlen = max((unsigned int)range.minlen,
 				   q->limits.discard_granularity);
-		ret = ext4_trim_fs(sb, &range);
+		ret = ext4_trim_fs(sb, &range, flags);
 		if (ret < 0)
 			return ret;
 
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index def84082a9a9..50375b0f6022 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2705,7 +2705,8 @@ int ext4_mb_release(struct super_block *sb)
 }
 
 static inline int ext4_issue_discard(struct super_block *sb,
-		ext4_group_t block_group, ext4_grpblk_t cluster, int count)
+		ext4_group_t block_group, ext4_grpblk_t cluster, int count,
+		unsigned long flags)
 {
 	ext4_fsblk_t discard_block;
 
@@ -2714,7 +2715,7 @@ static inline int ext4_issue_discard(struct super_block *sb,
 	count = EXT4_C2B(EXT4_SB(sb), count);
 	trace_ext4_discard_blocks(sb,
 			(unsigned long long) discard_block, count);
-	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
 }
 
 /*
@@ -2736,7 +2737,7 @@ static void ext4_free_data_callback(struct super_block *sb,
 	if (test_opt(sb, DISCARD)) {
 		err = ext4_issue_discard(sb, entry->efd_group,
 					 entry->efd_start_cluster,
-					 entry->efd_count);
+					 entry->efd_count, 0);
 		if (err && err != -EOPNOTSUPP)
 			ext4_msg(sb, KERN_WARNING, "discard request in"
 				 " group:%d block:%d count:%d failed"
@@ -4755,7 +4756,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 		 * them with group lock_held
 		 */
 		if (test_opt(sb, DISCARD)) {
-			err = ext4_issue_discard(sb, block_group, bit, count);
+			err = ext4_issue_discard(sb, block_group, bit, count,
+						 0);
 			if (err && err != -EOPNOTSUPP)
 				ext4_msg(sb, KERN_WARNING, "discard request in"
 					 " group:%d block:%d count:%lu failed"
@@ -4950,13 +4952,15 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
  * @count:	number of blocks to TRIM
  * @group:	alloc. group we are working with
  * @e4b:	ext4 buddy for the group
+ * @blkdev_flags: flags for the block device
  *
  * Trim "count" blocks starting at "start" in the "group". To assure that no
  * one will allocate those blocks, mark it as used in buddy bitmap. This must
  * be called with under the group lock.
  */
 static int ext4_trim_extent(struct super_block *sb, int start, int count,
-			     ext4_group_t group, struct ext4_buddy *e4b)
+			    ext4_group_t group, struct ext4_buddy *e4b,
+			    unsigned long blkdev_flags)
 {
 	struct ext4_free_extent ex;
 	int ret = 0;
@@ -4975,7 +4979,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
 	 */
 	mb_mark_used(e4b, &ex);
 	ext4_unlock_group(sb, group);
-	ret = ext4_issue_discard(sb, group, start, count);
+	ret = ext4_issue_discard(sb, group, start, count, blkdev_flags);
 	ext4_lock_group(sb, group);
 	mb_free_blocks(NULL, e4b, start, ex.fe_len);
 	return ret;
@@ -4988,6 +4992,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
  * @start:		first group block to examine
  * @max:		last group block to examine
  * @minblocks:		minimum extent block count
+ * @blkdev_flags:	flags for the block device
  *
  * ext4_trim_all_free walks through group's buddy bitmap searching for free
  * extents. When the free block is found, ext4_trim_extent is called to TRIM
@@ -5002,7 +5007,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
 static ext4_grpblk_t
 ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 		   ext4_grpblk_t start, ext4_grpblk_t max,
-		   ext4_grpblk_t minblocks)
+		   ext4_grpblk_t minblocks, unsigned long blkdev_flags)
 {
 	void *bitmap;
 	ext4_grpblk_t next, count = 0, free_count = 0;
@@ -5035,7 +5040,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 
 		if ((next - start) >= minblocks) {
 			ret = ext4_trim_extent(sb, start,
-					       next - start, group, &e4b);
+					       next - start, group, &e4b,
+					       blkdev_flags);
 			if (ret && ret != -EOPNOTSUPP)
 				break;
 			ret = 0;
@@ -5077,6 +5083,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
  * ext4_trim_fs() -- trim ioctl handle function
  * @sb:			superblock for filesystem
  * @range:		fstrim_range structure
+ * @blkdev_flags:	flags for the block device
  *
  * start:	First Byte to trim
  * len:		number of Bytes to trim from start
@@ -5085,7 +5092,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
  * start to start+len. For each such a group ext4_trim_all_free function
  * is invoked to trim all free space.
  */
-int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
+			unsigned long blkdev_flags)
 {
 	struct ext4_group_info *grp;
 	ext4_group_t group, first_group, last_group;
@@ -5141,7 +5149,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
 		if (grp->bb_free >= minlen) {
 			cnt = ext4_trim_all_free(sb, group, first_cluster,
-						end, minlen);
+						end, minlen, blkdev_flags);
 			if (cnt < 0) {
 				ret = cnt;
 				break;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index a4ed56cf0eac..5014a5c472ed 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -154,6 +154,8 @@ struct inodes_stat_t {
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
 #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
 
+#define FIDTRIM	_IOWR('f', 128, struct fstrim_range)	/* Deep discard trim */
+
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
 #define	FS_IOC_GETVERSION		_IOR('v', 1, long)

From 926a693a348963a08b5710552e4e7209d2dc126c Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 16 Jun 2014 09:08:29 -0300
Subject: [PATCH 0088/1185] media: hdpvr: fix two audio bugs

commit 3445857b22eafb70a6ac258979e955b116bfd2c6 upstream.

When the audio encoding is changed the driver calls hdpvr_set_audio
with the current opt->audio_input value. However, that should have
been opt->audio_input + 1. So changing the audio encoding inadvertently
changes the input as well. This bug has always been there.

The second bug was introduced in kernel 3.10 and that broke the
default_audio_input module option handling: the audio encoding was
never switched to AC3 if default_audio_input was set to 2 (SPDIF input).

In addition, since starting with 3.10 the audio encoding is always set
at the start the first bug now always happens when the driver is loaded.
In the past this bug would only surface if the user would change the
audio encoding after the driver was loaded.

Also fixes a small trivial typo (bufffer -> buffer).

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reported-by: Scott Doty <scott@corp.sonic.net>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/hdpvr/hdpvr-video.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c
index 774ba0e820be..eed70a4d24e6 100644
--- a/drivers/media/usb/hdpvr/hdpvr-video.c
+++ b/drivers/media/usb/hdpvr/hdpvr-video.c
@@ -81,7 +81,7 @@ static void hdpvr_read_bulk_callback(struct urb *urb)
 }
 
 /*=========================================================================*/
-/* bufffer bits */
+/* buffer bits */
 
 /* function expects dev->io_mutex to be hold by caller */
 int hdpvr_cancel_queue(struct hdpvr_device *dev)
@@ -921,7 +921,7 @@ static int hdpvr_s_ctrl(struct v4l2_ctrl *ctrl)
 	case V4L2_CID_MPEG_AUDIO_ENCODING:
 		if (dev->flags & HDPVR_FLAG_AC3_CAP) {
 			opt->audio_codec = ctrl->val;
-			return hdpvr_set_audio(dev, opt->audio_input,
+			return hdpvr_set_audio(dev, opt->audio_input + 1,
 					      opt->audio_codec);
 		}
 		return 0;
@@ -1191,7 +1191,7 @@ int hdpvr_register_videodev(struct hdpvr_device *dev, struct device *parent,
 	v4l2_ctrl_new_std_menu(hdl, &hdpvr_ctrl_ops,
 		V4L2_CID_MPEG_AUDIO_ENCODING,
 		ac3 ? V4L2_MPEG_AUDIO_ENCODING_AC3 : V4L2_MPEG_AUDIO_ENCODING_AAC,
-		0x7, V4L2_MPEG_AUDIO_ENCODING_AAC);
+		0x7, ac3 ? dev->options.audio_codec : V4L2_MPEG_AUDIO_ENCODING_AAC);
 	v4l2_ctrl_new_std_menu(hdl, &hdpvr_ctrl_ops,
 		V4L2_CID_MPEG_VIDEO_ENCODING,
 		V4L2_MPEG_VIDEO_ENCODING_MPEG_4_AVC, 0x3,

From 18bfdaeaa42e36d49bf82443e890db9448d4bebf Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 4 Jul 2014 05:44:39 -0300
Subject: [PATCH 0089/1185] media: tda10071: force modulation to QPSK on DVB-S

commit db4175ae2095634dbecd4c847da439f9c83e1b3b upstream.

Only supported modulation for DVB-S is QPSK. Modulation parameter
contains invalid value for DVB-S on some cases, which leads driver
refusing tuning attempt. Due to that, hard code modulation to QPSK
in case of DVB-S.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/dvb-frontends/tda10071.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c
index 36eb27d3fdf1..def7812d7b22 100644
--- a/drivers/media/dvb-frontends/tda10071.c
+++ b/drivers/media/dvb-frontends/tda10071.c
@@ -667,6 +667,7 @@ static int tda10071_set_frontend(struct dvb_frontend *fe)
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	int ret, i;
 	u8 mode, rolloff, pilot, inversion, div;
+	fe_modulation_t modulation;
 
 	dev_dbg(&priv->i2c->dev, "%s: delivery_system=%d modulation=%d " \
 		"frequency=%d symbol_rate=%d inversion=%d pilot=%d " \
@@ -701,10 +702,13 @@ static int tda10071_set_frontend(struct dvb_frontend *fe)
 
 	switch (c->delivery_system) {
 	case SYS_DVBS:
+		modulation = QPSK;
 		rolloff = 0;
 		pilot = 2;
 		break;
 	case SYS_DVBS2:
+		modulation = c->modulation;
+
 		switch (c->rolloff) {
 		case ROLLOFF_20:
 			rolloff = 2;
@@ -749,7 +753,7 @@ static int tda10071_set_frontend(struct dvb_frontend *fe)
 
 	for (i = 0, mode = 0xff; i < ARRAY_SIZE(TDA10071_MODCOD); i++) {
 		if (c->delivery_system == TDA10071_MODCOD[i].delivery_system &&
-			c->modulation == TDA10071_MODCOD[i].modulation &&
+			modulation == TDA10071_MODCOD[i].modulation &&
 			c->fec_inner == TDA10071_MODCOD[i].fec) {
 			mode = TDA10071_MODCOD[i].val;
 			dev_dbg(&priv->i2c->dev, "%s: mode found=%02x\n",

From 668b7a05f2fa59c60ca9820269e3d8dfaa218693 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 2 Jul 2014 12:46:23 -0400
Subject: [PATCH 0090/1185] block: provide compat ioctl for BLKZEROOUT

commit 3b3a1814d1703027f9867d0f5cbbfaf6c7482474 upstream.

This patch provides the compat BLKZEROOUT ioctl. The argument is a pointer
to two uint64_t values, so there is no need to translate it.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/compat_ioctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7c668c8a6f95..21ad6869a5ce 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -689,6 +689,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case BLKROSET:
 	case BLKDISCARD:
 	case BLKSECDISCARD:
+	case BLKZEROOUT:
 	/*
 	 * the ones below are implemented in blkdev_locked_ioctl,
 	 * but we call blkdev_ioctl, which gets the lock for us

From cb454b6d31756674d2e0ceaa336ec87019728d9b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Jul 2014 12:25:28 +0200
Subject: [PATCH 0091/1185] block: don't assume last put of shared tags is for
 the host

commit d45b3279a5a2252cafcd665bbf2db8c9b31ef783 upstream.

There is no inherent reason why the last put of a tag structure must be
the one for the Scsi_Host, as device model objects can be held for
arbitrary periods.  Merge blk_free_tags and __blk_free_tags into a single
funtion that just release a references and get rid of the BUG() when the
host reference wasn't the last.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/blk-tag.c | 33 +++++++--------------------------
 1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/block/blk-tag.c b/block/blk-tag.c
index cc345e1d8d4e..0c51b4b34f47 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -27,18 +27,15 @@ struct request *blk_queue_find_tag(struct request_queue *q, int tag)
 EXPORT_SYMBOL(blk_queue_find_tag);
 
 /**
- * __blk_free_tags - release a given set of tag maintenance info
+ * blk_free_tags - release a given set of tag maintenance info
  * @bqt:	the tag map to free
  *
- * Tries to free the specified @bqt.  Returns true if it was
- * actually freed and false if there are still references using it
+ * Drop the reference count on @bqt and frees it when the last reference
+ * is dropped.
  */
-static int __blk_free_tags(struct blk_queue_tag *bqt)
+void blk_free_tags(struct blk_queue_tag *bqt)
 {
-	int retval;
-
-	retval = atomic_dec_and_test(&bqt->refcnt);
-	if (retval) {
+	if (atomic_dec_and_test(&bqt->refcnt)) {
 		BUG_ON(find_first_bit(bqt->tag_map, bqt->max_depth) <
 							bqt->max_depth);
 
@@ -50,9 +47,8 @@ static int __blk_free_tags(struct blk_queue_tag *bqt)
 
 		kfree(bqt);
 	}
-
-	return retval;
 }
+EXPORT_SYMBOL(blk_free_tags);
 
 /**
  * __blk_queue_free_tags - release tag maintenance info
@@ -69,27 +65,12 @@ void __blk_queue_free_tags(struct request_queue *q)
 	if (!bqt)
 		return;
 
-	__blk_free_tags(bqt);
+	blk_free_tags(bqt);
 
 	q->queue_tags = NULL;
 	queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q);
 }
 
-/**
- * blk_free_tags - release a given set of tag maintenance info
- * @bqt:	the tag map to free
- *
- * For externally managed @bqt frees the map.  Callers of this
- * function must guarantee to have released all the queues that
- * might have been using this tag map.
- */
-void blk_free_tags(struct blk_queue_tag *bqt)
-{
-	if (unlikely(!__blk_free_tags(bqt)))
-		BUG();
-}
-EXPORT_SYMBOL(blk_free_tags);
-
 /**
  * blk_queue_free_tags - release tag maintenance info
  * @q:  the request queue for the device

From 97a230703c9f6cbed3bdd4d4a627863c14a8a2de Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Sat, 12 Jul 2014 12:08:24 +0800
Subject: [PATCH 0092/1185] libata: support the ata host which implements a
 queue depth less than 32

commit 1871ee134b73fb4cadab75752a7152ed2813c751 upstream.

The sata on fsl mpc8315e is broken after the commit 8a4aeec8d2d6
("libata/ahci: accommodate tag ordered controllers"). The reason is
that the ata controller on this SoC only implement a queue depth of
16. When issuing the commands in tag order, all the commands in tag
16 ~ 31 are mapped to tag 0 unconditionally and then causes the sata
malfunction. It makes no senses to use a 32 queue in software while
the hardware has less queue depth. So consider the queue depth
implemented by the hardware when requesting a command tag.

Fixes: 8a4aeec8d2d6 ("libata/ahci: accommodate tag ordered controllers")
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-core.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index bf00fbcde8ad..d7fb8269cb73 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4758,6 +4758,10 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
  *	ata_qc_new - Request an available ATA command, for queueing
  *	@ap: target port
  *
+ *	Some ATA host controllers may implement a queue depth which is less
+ *	than ATA_MAX_QUEUE. So we shouldn't allocate a tag which is beyond
+ *	the hardware limitation.
+ *
  *	LOCKING:
  *	None.
  */
@@ -4765,14 +4769,16 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
 static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
 {
 	struct ata_queued_cmd *qc = NULL;
-	unsigned int i, tag;
+	unsigned int i, tag, max_queue;
+
+	max_queue = ap->scsi_host->can_queue;
 
 	/* no command while frozen */
 	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
 		return NULL;
 
-	for (i = 0; i < ATA_MAX_QUEUE; i++) {
-		tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE;
+	for (i = 0, tag = ap->last_tag + 1; i < max_queue; i++, tag++) {
+		tag = tag < max_queue ? tag : 0;
 
 		/* the last tag is reserved for internal command. */
 		if (tag == ATA_TAG_INTERNAL)
@@ -6154,6 +6160,16 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
 {
 	int i, rc;
 
+	/*
+	 * The max queue supported by hardware must not be greater than
+	 * ATA_MAX_QUEUE.
+	 */
+	if (sht->can_queue > ATA_MAX_QUEUE) {
+		dev_err(host->dev, "BUG: the hardware max queue is too large\n");
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
 	/* host must have been started */
 	if (!(host->flags & ATA_HOST_STARTED)) {
 		dev_err(host->dev, "BUG: trying to register unstarted host\n");

From 03cccb9c9ec674407285d3a000b9380c946e21c1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Jul 2014 09:05:27 -0400
Subject: [PATCH 0093/1185] libata: introduce ata_host->n_tags to avoid oops on
 SAS controllers

commit 1a112d10f03e83fb3a2fdc4c9165865dec8a3ca6 upstream.

1871ee134b73 ("libata: support the ata host which implements a queue
depth less than 32") directly used ata_port->scsi_host->can_queue from
ata_qc_new() to determine the number of tags supported by the host;
unfortunately, SAS controllers doing SATA don't initialize ->scsi_host
leading to the following oops.

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
 IP: [<ffffffff814e0618>] ata_qc_new_init+0x188/0x1b0
 PGD 0
 Oops: 0002 [#1] SMP
 Modules linked in: isci libsas scsi_transport_sas mgag200 drm_kms_helper ttm
 CPU: 1 PID: 518 Comm: udevd Not tainted 3.16.0-rc6+ #62
 Hardware name: Intel Corporation S2600CO/S2600CO, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013
 task: ffff880c1a00b280 ti: ffff88061a000000 task.ti: ffff88061a000000
 RIP: 0010:[<ffffffff814e0618>]  [<ffffffff814e0618>] ata_qc_new_init+0x188/0x1b0
 RSP: 0018:ffff88061a003ae8  EFLAGS: 00010012
 RAX: 0000000000000001 RBX: ffff88000241ca80 RCX: 00000000000000fa
 RDX: 0000000000000020 RSI: 0000000000000020 RDI: ffff8806194aa298
 RBP: ffff88061a003ae8 R08: ffff8806194a8000 R09: 0000000000000000
 R10: 0000000000000000 R11: ffff88000241ca80 R12: ffff88061ad58200
 R13: ffff8806194aa298 R14: ffffffff814e67a0 R15: ffff8806194a8000
 FS:  00007f3ad7fe3840(0000) GS:ffff880627620000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000058 CR3: 000000061a118000 CR4: 00000000001407e0
 Stack:
  ffff88061a003b20 ffffffff814e96e1 ffff88000241ca80 ffff88061ad58200
  ffff8800b6bf6000 ffff880c1c988000 ffff880619903850 ffff88061a003b68
  ffffffffa0056ce1 ffff88061a003b48 0000000013d6e6f8 ffff88000241ca80
 Call Trace:
  [<ffffffff814e96e1>] ata_sas_queuecmd+0xa1/0x430
  [<ffffffffa0056ce1>] sas_queuecommand+0x191/0x220 [libsas]
  [<ffffffff8149afee>] scsi_dispatch_cmd+0x10e/0x300 [<ffffffff814a3bc5>] scsi_request_fn+0x2f5/0x550
  [<ffffffff81317613>] __blk_run_queue+0x33/0x40
  [<ffffffff8131781a>] queue_unplugged+0x2a/0x90
  [<ffffffff8131ceb4>] blk_flush_plug_list+0x1b4/0x210
  [<ffffffff8131d274>] blk_finish_plug+0x14/0x50
  [<ffffffff8117eaa8>] __do_page_cache_readahead+0x198/0x1f0
  [<ffffffff8117ee21>] force_page_cache_readahead+0x31/0x50
  [<ffffffff8117ee7e>] page_cache_sync_readahead+0x3e/0x50
  [<ffffffff81172ac6>] generic_file_read_iter+0x496/0x5a0
  [<ffffffff81219897>] blkdev_read_iter+0x37/0x40
  [<ffffffff811e307e>] new_sync_read+0x7e/0xb0
  [<ffffffff811e3734>] vfs_read+0x94/0x170
  [<ffffffff811e43c6>] SyS_read+0x46/0xb0
  [<ffffffff811e33d1>] ? SyS_lseek+0x91/0xb0
  [<ffffffff8171ee29>] system_call_fastpath+0x16/0x1b
 Code: 00 00 00 88 50 29 83 7f 08 01 19 d2 83 e2 f0 83 ea 50 88 50 34 c6 81 1d 02 00 00 40 c6 81 17 02 00 00 00 5d c3 66 0f 1f 44 00 00 <89> 14 25 58 00 00 00

Fix it by introducing ata_host->n_tags which is initialized to
ATA_MAX_QUEUE - 1 in ata_host_init() for SAS controllers and set to
scsi_host_template->can_queue in ata_host_register() for !SAS ones.
As SAS hosts are never registered, this will give them the same
ATA_MAX_QUEUE - 1 as before.  Note that we can't use
scsi_host->can_queue directly for SAS hosts anyway as they can go
higher than the libata maximum.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Mike Qiu <qiudayu@linux.vnet.ibm.com>
Reported-by: Jesse Brandeburg <jesse.brandeburg@gmail.com>
Reported-by: Peter Hurley <peter@hurleysoftware.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Fixes: 1871ee134b73 ("libata: support the ata host which implements a queue depth less than 32")
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-core.c | 16 ++++------------
 include/linux/libata.h    |  1 +
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index d7fb8269cb73..ca7c23d58a03 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4769,9 +4769,8 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
 static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
 {
 	struct ata_queued_cmd *qc = NULL;
-	unsigned int i, tag, max_queue;
-
-	max_queue = ap->scsi_host->can_queue;
+	unsigned int max_queue = ap->host->n_tags;
+	unsigned int i, tag;
 
 	/* no command while frozen */
 	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
@@ -6079,6 +6078,7 @@ void ata_host_init(struct ata_host *host, struct device *dev,
 {
 	spin_lock_init(&host->lock);
 	mutex_init(&host->eh_mutex);
+	host->n_tags = ATA_MAX_QUEUE - 1;
 	host->dev = dev;
 	host->ops = ops;
 }
@@ -6160,15 +6160,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
 {
 	int i, rc;
 
-	/*
-	 * The max queue supported by hardware must not be greater than
-	 * ATA_MAX_QUEUE.
-	 */
-	if (sht->can_queue > ATA_MAX_QUEUE) {
-		dev_err(host->dev, "BUG: the hardware max queue is too large\n");
-		WARN_ON(1);
-		return -EINVAL;
-	}
+	host->n_tags = clamp(sht->can_queue, 1, ATA_MAX_QUEUE - 1);
 
 	/* host must have been started */
 	if (!(host->flags & ATA_HOST_STARTED)) {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index eec130af2dfa..cc82cfb66259 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -547,6 +547,7 @@ struct ata_host {
 	struct device 		*dev;
 	void __iomem * const	*iomap;
 	unsigned int		n_ports;
+	unsigned int		n_tags;			/* nr of NCQ tags */
 	void			*private_data;
 	struct ata_port_operations *ops;
 	unsigned long		flags;

From d35acb6ef571d995a03dd03b03f69f051bf018e0 Mon Sep 17 00:00:00 2001
From: Romain Degez <romain.degez@gmail.com>
Date: Fri, 11 Jul 2014 18:08:13 +0200
Subject: [PATCH 0094/1185] ahci: add support for the Promise FastTrak TX8660
 SATA HBA (ahci mode)

commit b32bfc06aefab61acc872dec3222624e6cd867ed upstream.

Add support of the Promise FastTrak TX8660 SATA HBA in ahci mode by
registering the board in the ahci_pci_tbl[].

Note: this HBA also provide a hardware RAID mode when activated in
BIOS but specific drivers from the manufacturer are required in this
case.

Signed-off-by: Romain Degez <romain.degez@gmail.com>
Tested-by: Romain Degez <romain.degez@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/ahci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index b0d33d9533aa..3b39687c6336 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -455,6 +455,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 
 	/* Promise */
 	{ PCI_VDEVICE(PROMISE, 0x3f20), board_ahci },	/* PDC42819 */
+	{ PCI_VDEVICE(PROMISE, 0x3781), board_ahci },   /* FastTrak TX8660 ahci-mode */
 
 	/* Asmedia */
 	{ PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci },	/* ASM1060 */

From cebdb6fa24dfd48af3bba1af6ba485b45430fb1c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 5 Jul 2014 18:43:21 -0400
Subject: [PATCH 0095/1185] blkcg: don't call into policy draining if root_blkg
 is already gone

commit 0b462c89e31f7eb6789713437eb551833ee16ff3 upstream.

While a queue is being destroyed, all the blkgs are destroyed and its
->root_blkg pointer is set to NULL.  If someone else starts to drain
while the queue is in this state, the following oops happens.

  NULL pointer dereference at 0000000000000028
  IP: [<ffffffff8144e944>] blk_throtl_drain+0x84/0x230
  PGD e4a1067 PUD b773067 PMD 0
  Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
  Modules linked in: cfq_iosched(-) [last unloaded: cfq_iosched]
  CPU: 1 PID: 537 Comm: bash Not tainted 3.16.0-rc3-work+ #2
  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  task: ffff88000e222250 ti: ffff88000efd4000 task.ti: ffff88000efd4000
  RIP: 0010:[<ffffffff8144e944>]  [<ffffffff8144e944>] blk_throtl_drain+0x84/0x230
  RSP: 0018:ffff88000efd7bf0  EFLAGS: 00010046
  RAX: 0000000000000000 RBX: ffff880015091450 RCX: 0000000000000001
  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
  RBP: ffff88000efd7c10 R08: 0000000000000000 R09: 0000000000000001
  R10: ffff88000e222250 R11: 0000000000000000 R12: ffff880015091450
  R13: ffff880015092e00 R14: ffff880015091d70 R15: ffff88001508fc28
  FS:  00007f1332650740(0000) GS:ffff88001fa80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
  CR2: 0000000000000028 CR3: 0000000009446000 CR4: 00000000000006e0
  Stack:
   ffffffff8144e8f6 ffff880015091450 0000000000000000 ffff880015091d80
   ffff88000efd7c28 ffffffff8144ae2f ffff880015091450 ffff88000efd7c58
   ffffffff81427641 ffff880015091450 ffffffff82401f00 ffff880015091450
  Call Trace:
   [<ffffffff8144ae2f>] blkcg_drain_queue+0x1f/0x60
   [<ffffffff81427641>] __blk_drain_queue+0x71/0x180
   [<ffffffff81429b3e>] blk_queue_bypass_start+0x6e/0xb0
   [<ffffffff814498b8>] blkcg_deactivate_policy+0x38/0x120
   [<ffffffff8144ec44>] blk_throtl_exit+0x34/0x50
   [<ffffffff8144aea5>] blkcg_exit_queue+0x35/0x40
   [<ffffffff8142d476>] blk_release_queue+0x26/0xd0
   [<ffffffff81454968>] kobject_cleanup+0x38/0x70
   [<ffffffff81454848>] kobject_put+0x28/0x60
   [<ffffffff81427505>] blk_put_queue+0x15/0x20
   [<ffffffff817d07bb>] scsi_device_dev_release_usercontext+0x16b/0x1c0
   [<ffffffff810bc339>] execute_in_process_context+0x89/0xa0
   [<ffffffff817d064c>] scsi_device_dev_release+0x1c/0x20
   [<ffffffff817930e2>] device_release+0x32/0xa0
   [<ffffffff81454968>] kobject_cleanup+0x38/0x70
   [<ffffffff81454848>] kobject_put+0x28/0x60
   [<ffffffff817934d7>] put_device+0x17/0x20
   [<ffffffff817d11b9>] __scsi_remove_device+0xa9/0xe0
   [<ffffffff817d121b>] scsi_remove_device+0x2b/0x40
   [<ffffffff817d1257>] sdev_store_delete+0x27/0x30
   [<ffffffff81792ca8>] dev_attr_store+0x18/0x30
   [<ffffffff8126f75e>] sysfs_kf_write+0x3e/0x50
   [<ffffffff8126ea87>] kernfs_fop_write+0xe7/0x170
   [<ffffffff811f5e9f>] vfs_write+0xaf/0x1d0
   [<ffffffff811f69bd>] SyS_write+0x4d/0xc0
   [<ffffffff81d24692>] system_call_fastpath+0x16/0x1b

776687bce42b ("block, blk-mq: draining can't be skipped even if
bypass_depth was non-zero") made it easier to trigger this bug by
making blk_queue_bypass_start() drain even when it loses the first
bypass test to blk_cleanup_queue(); however, the bug has always been
there even before the commit as blk_queue_bypass_start() could race
against queue destruction, win the initial bypass test but perform the
actual draining after blk_cleanup_queue() already destroyed all blkgs.

Fix it by skippping calling into policy draining if all the blkgs are
already gone.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Shirish Pargaonkar <spargaonkar@suse.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Reported-by: Jet Chen <jet.chen@intel.com>
Tested-by: Shirish Pargaonkar <spargaonkar@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/blk-cgroup.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e8918ffaf96d..b95219d2168d 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -876,6 +876,13 @@ void blkcg_drain_queue(struct request_queue *q)
 {
 	lockdep_assert_held(q->queue_lock);
 
+	/*
+	 * @q could be exiting and already have destroyed all blkgs as
+	 * indicated by NULL root_blkg.  If so, don't confuse policies.
+	 */
+	if (!q->root_blkg)
+		return;
+
 	blk_throtl_drain(q);
 }
 

From efd39f7786aa4e7a847d5f8c9f5506e3b9ad6b38 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 18 Jul 2014 11:43:01 -0700
Subject: [PATCH 0096/1185] tracing: Fix wraparound problems in "uptime" trace
 clock

commit 58d4e21e50ff3cc57910a8abc20d7e14375d2f61 upstream.

The "uptime" trace clock added in:

    commit 8aacf017b065a805d27467843490c976835eb4a5
    tracing: Add "uptime" trace clock that uses jiffies

has wraparound problems when the system has been up more
than 1 hour 11 minutes and 34 seconds. It converts jiffies
to nanoseconds using:
        (u64)jiffies_to_usecs(jiffy) * 1000ULL
but since jiffies_to_usecs() only returns a 32-bit value, it
truncates at 2^32 microseconds.  An additional problem on 32-bit
systems is that the argument is "unsigned long", so fixing the
return value only helps until 2^32 jiffies (49.7 days on a HZ=1000
system).

Avoid these problems by using jiffies_64 as our basis, and
not converting to nanoseconds (we do convert to clock_t because
user facing API must not be dependent on internal kernel
HZ values).

Link: http://lkml.kernel.org/p/99d63c5bfe9b320a3b428d773825a37095bf6a51.1405708254.git.tony.luck@intel.com

Fixes: 8aacf017b065 "tracing: Add "uptime" trace clock that uses jiffies"
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace.c       | 2 +-
 kernel/trace/trace_clock.c | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 98a830d079b9..18cdf91b2f85 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -741,7 +741,7 @@ static struct {
 	{ trace_clock_local,	"local",	1 },
 	{ trace_clock_global,	"global",	1 },
 	{ trace_clock_counter,	"counter",	0 },
-	{ trace_clock_jiffies,	"uptime",	1 },
+	{ trace_clock_jiffies,	"uptime",	0 },
 	{ trace_clock,		"perf",		1 },
 	ARCH_TRACE_CLOCKS
 };
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 26dc348332b7..57b67b1f24d1 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -59,13 +59,14 @@ u64 notrace trace_clock(void)
 
 /*
  * trace_jiffy_clock(): Simply use jiffies as a clock counter.
+ * Note that this use of jiffies_64 is not completely safe on
+ * 32-bit systems. But the window is tiny, and the effect if
+ * we are affected is that we will have an obviously bogus
+ * timestamp on a trace event - i.e. not life threatening.
  */
 u64 notrace trace_clock_jiffies(void)
 {
-	u64 jiffy = jiffies - INITIAL_JIFFIES;
-
-	/* Return nsecs */
-	return (u64)jiffies_to_usecs(jiffy) * 1000ULL;
+	return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES);
 }
 
 /*

From a654d23f04adca40c74c4e820a6bf67fe9a187a1 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Sat, 21 Sep 2013 21:56:34 +0000
Subject: [PATCH 0097/1185] slab_common: Do not check for duplicate slab names

commit 3e374919b314f20e2a04f641ebc1093d758f66a4 upstream.

SLUB can alias multiple slab kmem_create_requests to one slab cache to save
memory and increase the cache hotness. As a result the name of the slab can be
stale. Only check the name for duplicates if we are in debug mode where we do
not merge multiple caches.

This fixes the following problem reported by Jonathan Brassow:

  The problem with kmem_cache* is this:

  *) Assume CONFIG_SLUB is set
  1) kmem_cache_create(name="foo-a")
  - creates new kmem_cache structure
  2) kmem_cache_create(name="foo-b")
  - If identical cache characteristics, it will be merged with the previously
    created cache associated with "foo-a".  The cache's refcount will be
    incremented and an alias will be created via sysfs_slab_alias().
  3) kmem_cache_destroy(<ptr>)
  - Attempting to destroy cache associated with "foo-a", but instead the
    refcount is simply decremented.  I don't even think the sysfs aliases are
    ever removed...
  4) kmem_cache_create(name="foo-a")
  - This FAILS because kmem_cache_sanity_check colides with the existing
    name ("foo-a") associated with the non-removed cache.

  This is a problem for RAID (specifically dm-raid) because the name used
  for the kmem_cache_create is ("raid%d-%p", level, mddev).  If the cache
  persists for long enough, the memory address of an old mddev will be
  reused for a new mddev - causing an identical formulation of the cache
  name.  Even though kmem_cache_destory had long ago been used to delete
  the old cache, the merging of caches has cause the name and cache of that
  old instance to be preserved and causes a colision (and thus failure) in
  kmem_cache_create().  I see this regularly in my testing.

Reported-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/slab_common.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2d414508e9ec..8b05120dfc09 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -55,6 +55,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
 			continue;
 		}
 
+#if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON)
 		/*
 		 * For simplicity, we won't check this in the list of memcg
 		 * caches. We have control over memcg naming, and if there
@@ -68,6 +69,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
 			s = NULL;
 			return -EINVAL;
 		}
+#endif
 	}
 
 	WARN_ON(strchr(name, ' '));	/* It confuses parsers */

From 6264198b34d26aa752f89fe9c5fcfdf4290c7fb5 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 4 Mar 2014 17:13:47 -0500
Subject: [PATCH 0098/1185] slab_common: fix the check for duplicate slab names

commit 694617474e33b8603fc76e090ed7d09376514b1a upstream.

The patch 3e374919b314f20e2a04f641ebc1093d758f66a4 is supposed to fix the
problem where kmem_cache_create incorrectly reports duplicate cache name
and fails. The problem is described in the header of that patch.

However, the patch doesn't really fix the problem because of these
reasons:

* the logic to test for debugging is reversed. It was intended to perform
  the check only if slub debugging is enabled (which implies that caches
  with the same parameters are not merged). Therefore, there should be
  #if !defined(CONFIG_SLUB) || defined(CONFIG_SLUB_DEBUG_ON)
  The current code has the condition reversed and performs the test if
  debugging is disabled.

* slub debugging may be enabled or disabled based on kernel command line,
  CONFIG_SLUB_DEBUG_ON is just the default settings. Therefore the test
  based on definition of CONFIG_SLUB_DEBUG_ON is unreliable.

This patch fixes the problem by removing the test
"!defined(CONFIG_SLUB_DEBUG_ON)". Therefore, duplicate names are never
checked if the SLUB allocator is used.

Note to stable kernel maintainers: when backporint this patch, please
backport also the patch 3e374919b314f20e2a04f641ebc1093d758f66a4.

Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/slab_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 8b05120dfc09..7d21d3fddbf0 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -55,7 +55,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
 			continue;
 		}
 
-#if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON)
+#if !defined(CONFIG_SLUB)
 		/*
 		 * For simplicity, we won't check this in the list of memcg
 		 * caches. We have control over memcg naming, and if there

From 6d53522cde3626ce08e280dc979bb0bb08d6d08b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@chromium.org>
Date: Sat, 19 Jul 2014 16:30:31 -0700
Subject: [PATCH 0099/1185] Input: fix defuzzing logic

commit 50c5d36dab930b1f1b1e3348b8608aa8b9ee7610 upstream.

We attempt to remove noise from coordinates reported by devices in
input_handle_abs_event(), unfortunately, unless we were dropping the
event altogether, we were ignoring the adjusted value and were passing
on the original value instead.

Reviewed-by: Andrew de los Reyes <adlr@chromium.org>
Reviewed-by: Benson Leung <bleung@chromium.org>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Reviewed-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/input.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 66984e272c45..a161021c4526 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -257,9 +257,10 @@ static int input_handle_abs_event(struct input_dev *dev,
 }
 
 static int input_get_disposition(struct input_dev *dev,
-			  unsigned int type, unsigned int code, int value)
+			  unsigned int type, unsigned int code, int *pval)
 {
 	int disposition = INPUT_IGNORE_EVENT;
+	int value = *pval;
 
 	switch (type) {
 
@@ -357,6 +358,7 @@ static int input_get_disposition(struct input_dev *dev,
 		break;
 	}
 
+	*pval = value;
 	return disposition;
 }
 
@@ -365,7 +367,7 @@ static void input_handle_event(struct input_dev *dev,
 {
 	int disposition;
 
-	disposition = input_get_disposition(dev, type, code, value);
+	disposition = input_get_disposition(dev, type, code, &value);
 
 	if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event)
 		dev->event(dev, type, code, value);

From d1cc001905146d58c17ac8452eb96f226767819d Mon Sep 17 00:00:00 2001
From: Silesh C V <svellattu@mvista.com>
Date: Wed, 23 Jul 2014 13:59:59 -0700
Subject: [PATCH 0100/1185] coredump: fix the setting of PF_DUMPCORE

commit aed8adb7688d5744cb484226820163af31d2499a upstream.

Commit 079148b919d0 ("coredump: factor out the setting of PF_DUMPCORE")
cleaned up the setting of PF_DUMPCORE by removing it from all the
linux_binfmt->core_dump() and moving it to zap_threads().But this ended
up clearing all the previously set flags.  This causes issues during
core generation when tsk->flags is checked again (eg.  for PF_USED_MATH
to dump floating point registers).  Fix this.

Signed-off-by: Silesh C V <svellattu@mvista.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Mandeep Singh Baines <msb@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/coredump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index dafafbafa731..1d402ce5b72f 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -299,7 +299,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	if (unlikely(nr < 0))
 		return nr;
 
-	tsk->flags = PF_DUMPCORE;
+	tsk->flags |= PF_DUMPCORE;
 	if (atomic_read(&mm->mm_users) == nr + 1)
 		goto done;
 	/*

From c423ba6f8e807b1d18baa729a58e52f625406d1e Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Wed, 23 Jul 2014 19:44:12 -0400
Subject: [PATCH 0101/1185] parisc: Remove SA_RESTORER define

commit 20dbea494543aefaace874cc3ec93a39b94b1ec4 upstream.

The sa_restorer field in struct sigaction is obsolete and no longer in
the parisc implementation.  However, the core code assumes the field is
present if SA_RESTORER is defined. So, the define needs to be removed.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/include/uapi/asm/signal.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h
index a2fa297196bc..f5645d6a89f2 100644
--- a/arch/parisc/include/uapi/asm/signal.h
+++ b/arch/parisc/include/uapi/asm/signal.h
@@ -69,8 +69,6 @@
 #define SA_NOMASK	SA_NODEFER
 #define SA_ONESHOT	SA_RESETHAND
 
-#define SA_RESTORER	0x04000000 /* obsolete -- ignored */
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 

From 69d15f41f7ee1ae8694511a1f134174ec1ffa337 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 18 Jul 2014 07:31:18 -0700
Subject: [PATCH 0102/1185] hwmon: (smsc47m192) Fix temperature limit and vrm
 write operations

commit 043572d5444116b9d9ad8ae763cf069e7accbc30 upstream.

Temperature limit clamps are applied after converting the temperature
from milli-degrees C to degrees C, so either the clamp limit needs
to be specified in degrees C, not milli-degrees C, or clamping must
happen before converting to degrees C. Use the latter method to avoid
overflows.

vrm is an u8, so the written value needs to be limited to [0, 255].

Cc: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/smsc47m192.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/smsc47m192.c b/drivers/hwmon/smsc47m192.c
index efee4c59239f..34b9a601ad07 100644
--- a/drivers/hwmon/smsc47m192.c
+++ b/drivers/hwmon/smsc47m192.c
@@ -86,7 +86,7 @@ static inline u8 IN_TO_REG(unsigned long val, int n)
  */
 static inline s8 TEMP_TO_REG(int val)
 {
-	return clamp_val(SCALE(val, 1, 1000), -128000, 127000);
+	return SCALE(clamp_val(val, -128000, 127000), 1, 1000);
 }
 
 static inline int TEMP_FROM_REG(s8 val)
@@ -384,6 +384,8 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
 	err = kstrtoul(buf, 10, &val);
 	if (err)
 		return err;
+	if (val > 255)
+		return -EINVAL;
 
 	data->vrm = val;
 	return count;

From b0c6b604a01b85e4f0ce86272e8e49ae9e52a8d5 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Tue, 22 Jul 2014 10:26:06 +0200
Subject: [PATCH 0103/1185] x86_32, entry: Store badsys error code in %eax

commit 8142b215501f8b291a108a202b3a053a265b03dd upstream.

Commit 554086d ("x86_32, entry: Do syscall exit work on badsys
(CVE-2014-4508)") introduced a regression in the x86_32 syscall entry
code, resulting in syscall() not returning proper errors for undefined
syscalls on CPUs supporting the sysenter feature.

The following code:

> int result = syscall(666);
> printf("result=%d errno=%d error=%s\n", result, errno, strerror(errno));

results in:

> result=666 errno=0 error=Success

Obviously, the syscall return value is the called syscall number, but it
should have been an ENOSYS error. When run under ptrace it behaves
correctly, which makes it hard to debug in the wild:

> result=-1 errno=38 error=Function not implemented

The %eax register is the return value register. For debugging via ptrace
the syscall entry code stores the complete register context on the
stack. The badsys handlers only store the ENOSYS error code in the
ptrace register set and do not set %eax like a regular syscall handler
would. The old resume_userspace call chain contains code that clobbers
%eax and it restores %eax from the ptrace registers afterwards. The same
goes for the ptrace-enabled call chain. When ptrace is not used, the
syscall return value is the passed-in syscall number from the untouched
%eax register.

Use %eax as the return value register in syscall_badsys and
sysenter_badsys, like a real syscall handler does, and have the caller
push the value onto the stack for ptrace access.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Link: http://lkml.kernel.org/r/alpine.LNX.2.11.1407221022380.31021@titan.int.lan.stealer.net
Reviewed-and-tested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/entry_32.S | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index ac6328176097..08fa44443a01 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -436,8 +436,8 @@ sysenter_do_call:
 	cmpl $(NR_syscalls), %eax
 	jae sysenter_badsys
 	call *sys_call_table(,%eax,4)
-	movl %eax,PT_EAX(%esp)
 sysenter_after_call:
+	movl %eax,PT_EAX(%esp)
 	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
@@ -517,6 +517,7 @@ ENTRY(system_call)
 	jae syscall_badsys
 syscall_call:
 	call *sys_call_table(,%eax,4)
+syscall_after_call:
 	movl %eax,PT_EAX(%esp)		# store the return value
 syscall_exit:
 	LOCKDEP_SYS_EXIT
@@ -686,12 +687,12 @@ syscall_fault:
 END(syscall_fault)
 
 syscall_badsys:
-	movl $-ENOSYS,PT_EAX(%esp)
-	jmp syscall_exit
+	movl $-ENOSYS,%eax
+	jmp syscall_after_call
 END(syscall_badsys)
 
 sysenter_badsys:
-	movl $-ENOSYS,PT_EAX(%esp)
+	movl $-ENOSYS,%eax
 	jmp sysenter_after_call
 END(syscall_badsys)
 	CFI_ENDPROC

From 32226c206801036be8d17e183ee51391f601a6cc Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Wed, 23 Jul 2014 14:00:19 -0700
Subject: [PATCH 0104/1185] mm: hugetlb: fix copy_hugetlb_page_range()

commit 0253d634e0803a8376a0d88efee0bf523d8673f9 upstream.

Commit 4a705fef9862 ("hugetlb: fix copy_hugetlb_page_range() to handle
migration/hwpoisoned entry") changed the order of
huge_ptep_set_wrprotect() and huge_ptep_get(), which leads to breakage
in some workloads like hugepage-backed heap allocation via libhugetlbfs.
This patch fixes it.

The test program for the problem is shown below:

  $ cat heap.c
  #include <unistd.h>
  #include <stdlib.h>
  #include <string.h>

  #define HPS 0x200000

  int main() {
  	int i;
  	char *p = malloc(HPS);
  	memset(p, '1', HPS);
  	for (i = 0; i < 5; i++) {
  		if (!fork()) {
  			memset(p, '2', HPS);
  			p = malloc(HPS);
  			memset(p, '3', HPS);
  			free(p);
  			return 0;
  		}
  	}
  	sleep(1);
  	free(p);
  	return 0;
  }

  $ export HUGETLB_MORECORE=yes ; export HUGETLB_NO_PREFAULT= ; hugectl --heap ./heap

Fixes 4a705fef9862 ("hugetlb: fix copy_hugetlb_page_range() to handle
migration/hwpoisoned entry"), so is applicable to -stable kernels which
include it.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reported-by: Guillaume Morin <guillaume@morinfr.org>
Suggested-by: Guillaume Morin <guillaume@morinfr.org>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/hugetlb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dbc949c409c7..7de4f67c81fe 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2400,6 +2400,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 		} else {
 			if (cow)
 				huge_ptep_set_wrprotect(src, addr, src_pte);
+			entry = huge_ptep_get(src_pte);
 			ptepage = pte_page(entry);
 			get_page(ptepage);
 			page_dup_rmap(ptepage);

From e1d8240bdd4d65dd1de6eda2b2351ddd2fbb9ca4 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 26 Jul 2014 14:52:01 -0700
Subject: [PATCH 0105/1185] Fix gcc-4.9.0 miscompilation of load_balance() in
 scheduler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 2062afb4f804afef61cbe62a30cac9a46e58e067 upstream.

Michel Dänzer and a couple of other people reported inexplicable random
oopses in the scheduler, and the cause turns out to be gcc mis-compiling
the load_balance() function when debugging is enabled.  The gcc bug
apparently goes back to gcc-4.5, but slight optimization changes means
that it now showed up as a problem in 4.9.0 and 4.9.1.

The instruction scheduling problem causes gcc to schedule a spill
operation to before the stack frame has been created, which in turn can
corrupt the spilled value if an interrupt comes in.  There may be other
effects of this bug too, but that's the code generation problem seen in
Michel's case.

This is fixed in current gcc HEAD, but the workaround as suggested by
Markus Trippelsdorf is pretty simple: use -fno-var-tracking-assignments
when compiling the kernel, which disables the gcc code that causes the
problem.  This can result in slightly worse debug information for
variable accesses, but that is infinitely preferable to actual code
generation problems.

Doing this unconditionally (not just for CONFIG_DEBUG_INFO) also allows
non-debug builds to verify that the debug build would be identical: we
can do

    export GCC_COMPARE_DEBUG=1

to make gcc internally verify that the result of the build is
independent of the "-g" flag (it will make the compiler build everything
twice, toggling the debug flag, and compare the results).

Without the "-fno-var-tracking-assignments" option, the build would fail
(even with 4.8.3 that didn't show the actual stack frame bug) with a gcc
compare failure.

See also gcc bugzilla:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801

Reported-by: Michel Dänzer <michel@daenzer.net>
Suggested-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Makefile b/Makefile
index 8d891c66803c..ffd8082dceb2 100644
--- a/Makefile
+++ b/Makefile
@@ -614,6 +614,8 @@ KBUILD_CFLAGS	+= -fomit-frame-pointer
 endif
 endif
 
+KBUILD_CFLAGS   += $(call cc-option, -fno-var-tracking-assignments)
+
 ifdef CONFIG_DEBUG_INFO
 KBUILD_CFLAGS	+= -g
 KBUILD_AFLAGS	+= -gdwarf-2

From a940d7b23bc073c774f3733c79f82102ffccff4e Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 23 Jun 2014 15:29:40 +0200
Subject: [PATCH 0106/1185] s390/ptrace: fix PSW mask check

commit dab6cf55f81a6e16b8147aed9a843e1691dcd318 upstream.

The PSW mask check of the PTRACE_POKEUSR_AREA command is incorrect.
The PSW_MASK_USER define contains the PSW_MASK_ASC bits, the ptrace
interface accepts all combinations for the address-space-control
bits. To protect the kernel space the PSW mask check in ptrace needs
to reject the address-space-control bit combination for home space.

Fixes CVE-2014-3534

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kernel/ptrace.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index a314c57f4e94..9677d935583c 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -314,7 +314,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		 * psw and gprs are stored on the stack
 		 */
 		if (addr == (addr_t) &dummy->regs.psw.mask &&
-		    ((data & ~PSW_MASK_USER) != psw_user_bits ||
+		    (((data^psw_user_bits) & ~PSW_MASK_USER) ||
+		     (((data^psw_user_bits) & PSW_MASK_ASC) &&
+		      ((data|psw_user_bits) & PSW_MASK_ASC) == PSW_MASK_ASC) ||
 		     ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))))
 			/* Invalid psw mask. */
 			return -EINVAL;
@@ -627,7 +629,10 @@ static int __poke_user_compat(struct task_struct *child,
 		 */
 		if (addr == (addr_t) &dummy32->regs.psw.mask) {
 			/* Build a 64 bit psw mask from 31 bit mask. */
-			if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits)
+			if (((tmp^psw32_user_bits) & ~PSW32_MASK_USER) ||
+			    (((tmp^psw32_user_bits) & PSW32_MASK_ASC) &&
+			     ((tmp|psw32_user_bits) & PSW32_MASK_ASC)
+			     == PSW32_MASK_ASC))
 				/* Invalid psw mask. */
 				return -EINVAL;
 			regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |

From 819ab9941c98f18b0f8c7ffb815e4f07186d2a5f Mon Sep 17 00:00:00 2001
From: Michael Brown <mbrown@fensystems.co.uk>
Date: Thu, 10 Jul 2014 12:26:20 +0100
Subject: [PATCH 0107/1185] x86/efi: Include a .bss section within the PE/COFF
 headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit c7fb93ec51d462ec3540a729ba446663c26a0505 upstream.

The PE/COFF headers currently describe only the initialised-data
portions of the image, and result in no space being allocated for the
uninitialised-data portions.  Consequently, the EFI boot stub will end
up overwriting unexpected areas of memory, with unpredictable results.

Fix by including a .bss section in the PE/COFF headers (functionally
equivalent to the init_size field in the bzImage header).

Signed-off-by: Michael Brown <mbrown@fensystems.co.uk>
Cc: Thomas Bächler <thomas@archlinux.org>
Cc: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/boot/header.S      | 26 ++++++++++++++++++++++----
 arch/x86/boot/tools/build.c | 37 ++++++++++++++++++++++++++++++-------
 2 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 9ec06a1f6d61..425712462178 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -91,10 +91,9 @@ bs_die:
 
 	.section ".bsdata", "a"
 bugger_off_msg:
-	.ascii	"Direct floppy boot is not supported. "
-	.ascii	"Use a boot loader program instead.\r\n"
+	.ascii	"Use a boot loader.\r\n"
 	.ascii	"\n"
-	.ascii	"Remove disk and press any key to reboot ...\r\n"
+	.ascii	"Remove disk and press any key to reboot...\r\n"
 	.byte	0
 
 #ifdef CONFIG_EFI_STUB
@@ -108,7 +107,7 @@ coff_header:
 #else
 	.word	0x8664				# x86-64
 #endif
-	.word	3				# nr_sections
+	.word	4				# nr_sections
 	.long	0 				# TimeDateStamp
 	.long	0				# PointerToSymbolTable
 	.long	1				# NumberOfSymbols
@@ -250,6 +249,25 @@ section_table:
 	.word	0				# NumberOfLineNumbers
 	.long	0x60500020			# Characteristics (section flags)
 
+	#
+	# The offset & size fields are filled in by build.c.
+	#
+	.ascii	".bss"
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.long	0
+	.long	0x0
+	.long	0				# Size of initialized data
+						# on disk
+	.long	0x0
+	.long	0				# PointerToRelocations
+	.long	0				# PointerToLineNumbers
+	.word	0				# NumberOfRelocations
+	.word	0				# NumberOfLineNumbers
+	.long	0xc8000080			# Characteristics (section flags)
+
 #endif /* CONFIG_EFI_STUB */
 
 	# Kernel attributes; used by setup.  This is part 1 of the
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 94c544650020..971a0ce062aa 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -141,7 +141,7 @@ static void usage(void)
 
 #ifdef CONFIG_EFI_STUB
 
-static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
+static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset)
 {
 	unsigned int pe_header;
 	unsigned short num_sections;
@@ -162,10 +162,10 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz
 			put_unaligned_le32(size, section + 0x8);
 
 			/* section header vma field */
-			put_unaligned_le32(offset, section + 0xc);
+			put_unaligned_le32(vma, section + 0xc);
 
 			/* section header 'size of initialised data' field */
-			put_unaligned_le32(size, section + 0x10);
+			put_unaligned_le32(datasz, section + 0x10);
 
 			/* section header 'file offset' field */
 			put_unaligned_le32(offset, section + 0x14);
@@ -177,6 +177,11 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz
 	}
 }
 
+static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
+{
+	update_pecoff_section_header_fields(section_name, offset, size, size, offset);
+}
+
 static void update_pecoff_setup_and_reloc(unsigned int size)
 {
 	u32 setup_offset = 0x200;
@@ -201,9 +206,6 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
 
 	pe_header = get_unaligned_le32(&buf[0x3c]);
 
-	/* Size of image */
-	put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
-
 	/*
 	 * Size of code: Subtract the size of the first sector (512 bytes)
 	 * which includes the header.
@@ -218,6 +220,22 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
 	update_pecoff_section_header(".text", text_start, text_sz);
 }
 
+static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz)
+{
+	unsigned int pe_header;
+	unsigned int bss_sz = init_sz - file_sz;
+
+	pe_header = get_unaligned_le32(&buf[0x3c]);
+
+	/* Size of uninitialized data */
+	put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]);
+
+	/* Size of image */
+	put_unaligned_le32(init_sz, &buf[pe_header + 0x50]);
+
+	update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0);
+}
+
 #endif /* CONFIG_EFI_STUB */
 
 
@@ -268,6 +286,9 @@ int main(int argc, char ** argv)
 	int fd;
 	void *kernel;
 	u32 crc = 0xffffffffUL;
+#ifdef CONFIG_EFI_STUB
+	unsigned int init_sz;
+#endif
 
 	/* Defaults for old kernel */
 #ifdef CONFIG_X86_32
@@ -338,7 +359,9 @@ int main(int argc, char ** argv)
 	put_unaligned_le32(sys_size, &buf[0x1f4]);
 
 #ifdef CONFIG_EFI_STUB
-	update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz));
+	update_pecoff_text(setup_sectors * 512, i + (sys_size * 16));
+	init_sz = get_unaligned_le32(&buf[0x260]);
+	update_pecoff_bss(i + (sys_size * 16), init_sz);
 
 #ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */
 	efi_stub_entry -= 0x200;

From c5f0c0e7525443add533495e93ba8de6feab2396 Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Wed, 26 Mar 2014 22:37:45 +0000
Subject: [PATCH 0108/1185] core, nfqueue, openvswitch: Orphan frags in
 skb_zerocopy and handle errors

commit 36d5fe6a000790f56039afe26834265db0a3ad4c upstream.

skb_zerocopy can copy elements of the frags array between skbs, but it doesn't
orphan them. Also, it doesn't handle errors, so this patch takes care of that
as well, and modify the callers accordingly. skb_tx_error() is also added to
the callers so they will signal the failed delivery towards the creator of the
skb.

Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[bwh: Backported to 3.13: skb_zerocopy() is new in 3.14, but was moved from a
 static function in nfnetlink_queue.  We need to patch that and its caller, but
 not openvswitch.]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nfnetlink_queue_core.c | 29 ++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 5352b2d2d5bf..2b8199f68785 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -227,22 +227,23 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 	spin_unlock_bh(&queue->lock);
 }
 
-static void
+static int
 nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
 {
 	int i, j = 0;
 	int plen = 0; /* length of skb->head fragment */
+	int ret;
 	struct page *page;
 	unsigned int offset;
 
 	/* dont bother with small payloads */
-	if (len <= skb_tailroom(to)) {
-		skb_copy_bits(from, 0, skb_put(to, len), len);
-		return;
-	}
+	if (len <= skb_tailroom(to))
+		return skb_copy_bits(from, 0, skb_put(to, len), len);
 
 	if (hlen) {
-		skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
+		ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
+		if (unlikely(ret))
+			return ret;
 		len -= hlen;
 	} else {
 		plen = min_t(int, skb_headlen(from), len);
@@ -260,6 +261,11 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
 	to->len += len + plen;
 	to->data_len += len + plen;
 
+	if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
+		skb_tx_error(from);
+		return -ENOMEM;
+	}
+
 	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
 		if (!len)
 			break;
@@ -270,6 +276,8 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
 		j++;
 	}
 	skb_shinfo(to)->nr_frags = j;
+
+	return 0;
 }
 
 static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet)
@@ -355,13 +363,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
 	skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid,
 				  GFP_ATOMIC);
-	if (!skb)
+	if (!skb) {
+		skb_tx_error(entskb);
 		return NULL;
+	}
 
 	nlh = nlmsg_put(skb, 0, 0,
 			NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
 			sizeof(struct nfgenmsg), 0);
 	if (!nlh) {
+		skb_tx_error(entskb);
 		kfree_skb(skb);
 		return NULL;
 	}
@@ -481,13 +492,15 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		nla->nla_type = NFQA_PAYLOAD;
 		nla->nla_len = nla_attr_size(data_len);
 
-		nfqnl_zcopy(skb, entskb, data_len, hlen);
+		if (nfqnl_zcopy(skb, entskb, data_len, hlen))
+			goto nla_put_failure;
 	}
 
 	nlh->nlmsg_len = skb->len;
 	return skb;
 
 nla_put_failure:
+	skb_tx_error(entskb);
 	kfree_skb(skb);
 	net_err_ratelimited("nf_queue: error creating packet message\n");
 	return NULL;

From 10a622493d7f9343e8b4118031ff0c21a27cc4e9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 31 Jul 2014 14:55:39 -0700
Subject: [PATCH 0109/1185] Linux 3.10.51

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ffd8082dceb2..f9f6ee59c61a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 50
+SUBLEVEL = 51
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From e31266f78058409d18d48e0afa8339e77322b17f Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@linaro.org>
Date: Mon, 4 Aug 2014 15:47:44 +0100
Subject: [PATCH 0110/1185] gator: Version 5.19

Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 drivers/gator/Makefile                     |  15 +-
 drivers/gator/gator.h                      |  16 +-
 drivers/gator/gator_backtrace.c            |   2 +-
 drivers/gator/gator_buffer.c               |   6 +-
 drivers/gator/gator_events_armv7.c         |   4 +-
 drivers/gator/gator_events_block.c         |  12 +-
 drivers/gator/gator_events_mali_4xx.c      | 144 ++++-------
 drivers/gator/gator_events_mali_common.c   |  36 +--
 drivers/gator/gator_events_mali_common.h   |  20 +-
 drivers/gator/gator_events_mali_t6xx.c     |   5 +-
 drivers/gator/gator_events_mali_t6xx_hw.c  | 185 ++++++++++++---
 drivers/gator/gator_events_mmapped.c       |  26 +-
 drivers/gator/gator_events_perf_pmu.c      |   6 +-
 drivers/gator/gator_events_scorpion.c      |   4 +-
 drivers/gator/gator_events_threads.c       | 115 +++++++++
 drivers/gator/gator_iks.c                  |   2 +-
 drivers/gator/gator_main.c                 | 126 ++++++++--
 drivers/gator/gator_marshaling.c           | 119 ++++------
 drivers/gator/gator_trace_gpu.c            | 178 ++++++++------
 drivers/gator/gator_trace_gpu.h            |  79 -------
 drivers/gator/gator_trace_power.c          |   4 +-
 drivers/gator/gator_trace_sched.c          |  85 ++++---
 drivers/gator/mali/mali_dd_gator_api.h     |  40 ++++
 drivers/gator/mali_t6xx.mk                 |   4 +
 tools/gator/daemon/Android.mk              |   6 +-
 tools/gator/daemon/Application.mk          |   1 +
 tools/gator/daemon/Buffer.cpp              |  36 ++-
 tools/gator/daemon/Buffer.h                |  11 +-
 tools/gator/daemon/CapturedXML.cpp         |  11 +-
 tools/gator/daemon/CapturedXML.h           |   2 +-
 tools/gator/daemon/Child.cpp               |  45 ++--
 tools/gator/daemon/Child.h                 |   2 +-
 tools/gator/daemon/ConfigurationXML.cpp    |  10 +-
 tools/gator/daemon/Counter.h               |   4 +
 tools/gator/daemon/DriverSource.cpp        |  53 ++++-
 tools/gator/daemon/DriverSource.h          |   5 +
 tools/gator/daemon/EventsXML.cpp           |  16 +-
 tools/gator/daemon/EventsXML.h             |   5 +-
 tools/gator/daemon/ExternalSource.cpp      | 177 +++++++++++++-
 tools/gator/daemon/ExternalSource.h        |  11 +-
 tools/gator/daemon/FSDriver.cpp            | 212 +++++++++++++++++
 tools/gator/daemon/FSDriver.h              |  44 ++++
 tools/gator/daemon/Fifo.h                  |   2 +-
 tools/gator/daemon/Hwmon.cpp               |  16 +-
 tools/gator/daemon/KMod.cpp                |  11 +-
 tools/gator/daemon/LocalCapture.h          |   2 +-
 tools/gator/daemon/Logging.h               |   2 +-
 tools/gator/daemon/Makefile                |   8 +-
 tools/gator/daemon/Makefile_aarch64        |   9 +-
 tools/gator/daemon/MaliVideoDriver.cpp     | 253 ++++++++++++++++++++
 tools/gator/daemon/MaliVideoDriver.h       |  50 ++++
 tools/gator/daemon/Monitor.cpp             |  11 +-
 tools/gator/daemon/Monitor.h               |   1 +
 tools/gator/daemon/OlySocket.cpp           |  77 ++----
 tools/gator/daemon/OlySocket.h             |  14 +-
 tools/gator/daemon/PerfDriver.cpp          |  92 ++++++--
 tools/gator/daemon/PerfDriver.h            |   6 +-
 tools/gator/daemon/PerfGroup.cpp           |  28 ++-
 tools/gator/daemon/PerfGroup.h             |   2 +
 tools/gator/daemon/PerfSource.cpp          |  14 +-
 tools/gator/daemon/Proc.cpp                | 106 ++++++---
 tools/gator/daemon/Proc.h                  |   2 +-
 tools/gator/daemon/Sender.h                |   2 +-
 tools/gator/daemon/SessionData.cpp         |  55 ++++-
 tools/gator/daemon/SessionData.h           |  17 +-
 tools/gator/daemon/SessionXML.cpp          |   8 +-
 tools/gator/daemon/StreamlineSetup.cpp     |   2 +-
 tools/gator/daemon/StreamlineSetup.h       |   4 +-
 tools/gator/daemon/UEvent.cpp              |   3 +-
 tools/gator/daemon/UserSpaceSource.cpp     |  14 +-
 tools/gator/daemon/UserSpaceSource.h       |   2 +-
 tools/gator/daemon/c++.cpp                 |  40 ++++
 tools/gator/daemon/common.mk               |  16 +-
 tools/gator/daemon/defaults.xml            |   5 +
 tools/gator/daemon/escape.c                |   2 +-
 tools/gator/daemon/events-CCI-400.xml      |  21 +-
 tools/gator/daemon/events-CCN-504.xml      |   9 -
 tools/gator/daemon/events-Cortex-A53.xml   |  84 -------
 tools/gator/daemon/events-Cortex-A57.xml   |  84 -------
 tools/gator/daemon/events-Filesystem.xml   |  11 +
 tools/gator/daemon/events-L2C-310.xml      |  30 +--
 tools/gator/daemon/events-Linux.xml        |   5 +-
 tools/gator/daemon/events-Mali-4xx.xml     | 126 +++++-----
 tools/gator/daemon/events-Mali-T6xx.xml    |  26 +-
 tools/gator/daemon/events-Mali-T6xx_hw.xml |  33 +--
 tools/gator/daemon/events-Mali-V500.xml    |  29 +++
 tools/gator/daemon/main.cpp                | 262 ++++++++++++---------
 87 files changed, 2316 insertions(+), 1164 deletions(-)
 create mode 100644 drivers/gator/gator_events_threads.c
 delete mode 100644 drivers/gator/gator_trace_gpu.h
 create mode 100644 drivers/gator/mali/mali_dd_gator_api.h
 create mode 100644 tools/gator/daemon/Application.mk
 create mode 100644 tools/gator/daemon/FSDriver.cpp
 create mode 100644 tools/gator/daemon/FSDriver.h
 create mode 100644 tools/gator/daemon/MaliVideoDriver.cpp
 create mode 100644 tools/gator/daemon/MaliVideoDriver.h
 create mode 100644 tools/gator/daemon/c++.cpp
 create mode 100644 tools/gator/daemon/events-Filesystem.xml
 create mode 100644 tools/gator/daemon/events-Mali-V500.xml

diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile
index 3dc9d059a4b4..2f86823313c6 100644
--- a/drivers/gator/Makefile
+++ b/drivers/gator/Makefile
@@ -7,13 +7,14 @@ CONFIG_GATOR ?= m
 obj-$(CONFIG_GATOR) := gator.o
 
 gator-y :=	gator_main.o \
-		gator_events_irq.o \
-		gator_events_sched.o \
-		gator_events_net.o \
 		gator_events_block.o \
+		gator_events_irq.o \
 		gator_events_meminfo.o \
-		gator_events_perf_pmu.o \
 		gator_events_mmapped.o \
+		gator_events_net.o \
+		gator_events_perf_pmu.o \
+		gator_events_sched.o \
+		gator_events_threads.o \
 
 # Convert the old GATOR_WITH_MALI_SUPPORT to the new kernel flags
 ifneq ($(GATOR_WITH_MALI_SUPPORT),)
@@ -48,10 +49,14 @@ ifeq ($(CONFIG_GATOR_WITH_MALI_SUPPORT),y)
   ccflags-$(CONFIG_GATOR_MALI_T6XX) += -DMALI_SUPPORT=MALI_T6xx
 endif
 
-# GATOR_TEST controls whether to include (=1) or exclude (=0) test code. 
+# GATOR_TEST controls whether to include (=1) or exclude (=0) test code.
 GATOR_TEST ?= 0
 EXTRA_CFLAGS +=	-DGATOR_TEST=$(GATOR_TEST)
 
+# Should the original or new block_rq_complete API be used?
+OLD_BLOCK_RQ_COMPLETE := $(shell grep -A3 block_rq_complete include/trace/events/block.h | grep nr_bytes > /dev/null; echo $$?)
+EXTRA_CFLAGS += -DOLD_BLOCK_RQ_COMPLETE=$(OLD_BLOCK_RQ_COMPLETE)
+
 gator-$(CONFIG_ARM) +=	gator_events_armv6.o \
 			gator_events_armv7.o \
 			gator_events_ccn-504.o \
diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h
index 586cd9e742fb..5ad0254d86a9 100644
--- a/drivers/gator/gator.h
+++ b/drivers/gator/gator.h
@@ -42,6 +42,10 @@
 #define AARCH64     0xd0f
 #define OTHER       0xfff
 
+// gpu enums
+#define MALI_4xx     1
+#define MALI_T6xx    2
+
 #define MAXSIZE_CORE_NAME 32
 
 struct gator_cpu {
@@ -82,13 +86,21 @@ int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
 		register_trace_##probe_name(probe_##probe_name)
 #	define GATOR_UNREGISTER_TRACE(probe_name) \
 		unregister_trace_##probe_name(probe_##probe_name)
-#else
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
 #	define GATOR_DEFINE_PROBE(probe_name, proto) \
 		static void probe_##probe_name(void *data, PARAMS(proto))
 #	define GATOR_REGISTER_TRACE(probe_name) \
 		register_trace_##probe_name(probe_##probe_name, NULL)
 #	define GATOR_UNREGISTER_TRACE(probe_name) \
 		unregister_trace_##probe_name(probe_##probe_name, NULL)
+#else
+#	define GATOR_DEFINE_PROBE(probe_name, proto) \
+		extern struct tracepoint *gator_tracepoint_##probe_name; \
+		static void probe_##probe_name(void *data, PARAMS(proto))
+#	define GATOR_REGISTER_TRACE(probe_name) \
+		tracepoint_probe_register(gator_tracepoint_##probe_name, probe_##probe_name, NULL)
+#	define GATOR_UNREGISTER_TRACE(probe_name) \
+		tracepoint_probe_unregister(gator_tracepoint_##probe_name, probe_##probe_name, NULL)
 #endif
 
 /******************************************************************************
@@ -115,6 +127,8 @@ u32 gator_cpuid(void);
 
 void gator_backtrace_handler(struct pt_regs *const regs);
 
+void gator_marshal_activity_switch(int core, int key, int activity, int pid);
+
 #if !GATOR_IKS_SUPPORT
 
 #define get_physical_cpu() smp_processor_id()
diff --git a/drivers/gator/gator_backtrace.c b/drivers/gator/gator_backtrace.c
index 9f305cf7242c..e03c1653c5b5 100644
--- a/drivers/gator/gator_backtrace.c
+++ b/drivers/gator/gator_backtrace.c
@@ -178,7 +178,7 @@ static void kernel_backtrace(int cpu, struct pt_regs *const regs)
 	marshal_backtrace(PC_REG & ~1, NO_COOKIE, 1);
 #endif
 }
- 
+
 static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time)
 {
 	bool in_kernel;
diff --git a/drivers/gator/gator_buffer.c b/drivers/gator/gator_buffer.c
index eba22dfe3bf2..dfbc97d80221 100644
--- a/drivers/gator/gator_buffer.c
+++ b/drivers/gator/gator_buffer.c
@@ -37,12 +37,12 @@ static void marshal_frame(int cpu, int buftype)
 	case SCHED_TRACE_BUF:
 		frame = FRAME_SCHED_TRACE;
 		break;
-	case GPU_TRACE_BUF:
-		frame = FRAME_GPU_TRACE;
-		break;
 	case IDLE_BUF:
 		frame = FRAME_IDLE;
 		break;
+	case ACTIVITY_BUF:
+		frame = FRAME_ACTIVITY;
+		break;
 	default:
 		frame = -1;
 		break;
diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c
index 153119b463e6..bd8a9ba24e99 100644
--- a/drivers/gator/gator_events_armv7.c
+++ b/drivers/gator/gator_events_armv7.c
@@ -27,9 +27,9 @@
 // ccnt reg
 #define CCNT_REG	(1 << 31)
 
-#define CCNT 		0
+#define CCNT		0
 #define CNT0		1
-#define CNTMAX 		(6+1)
+#define CNTMAX		(6+1)
 
 static const char *pmnc_name;
 static int pmnc_counters;
diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c
index b2bc414e462e..03eed4fb9ebb 100644
--- a/drivers/gator/gator_events_block.c
+++ b/drivers/gator/gator_events_block.c
@@ -28,15 +28,25 @@ static ulong block_rq_rd_key;
 static atomic_t blockCnt[BLOCK_TOTAL];
 static int blockGet[BLOCK_TOTAL * 4];
 
+// Tracepoint changed in 3.15 backported to older kernels. The Makefile tries to autodetect the correct value, but if it fails change the #if below
+#if OLD_BLOCK_RQ_COMPLETE
 GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq))
+#else
+GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq, unsigned int nr_bytes))
+#endif
 {
-	int write, size;
+	int write;
+	unsigned int size;
 
 	if (!rq)
 		return;
 
 	write = rq->cmd_flags & EVENTWRITE;
+#if OLD_BLOCK_RQ_COMPLETE
 	size = rq->resid_len;
+#else
+	size = nr_bytes;
+#endif
 
 	if (!size)
 		return;
diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c
index 85d47645a9d9..9e1c7064bd73 100644
--- a/drivers/gator/gator_events_mali_4xx.c
+++ b/drivers/gator/gator_events_mali_4xx.c
@@ -18,17 +18,27 @@
 #include "gator_events_mali_4xx.h"
 
 /*
- * There are (currently) four different variants of the comms between gator and Mali:
- * 1 (deprecated): No software counter support
- * 2 (deprecated): Tracepoint called for each separate s/w counter value as it appears
- * 3 (default): Single tracepoint for all s/w counters in a bundle.
- * Interface style 3 is the default if no other is specified.  1 and 2 will be eliminated when
- * existing Mali DDKs are upgraded.
- * 4. As above, but for the Utgard (Mali-450) driver.
- */
+* There have been four different variants of the comms between gator and Mali depending on driver version:
+* # | DDK vsn range             | Support                                                             | Notes
+*
+* 1 | (obsolete)                | No software counter support                                         | Obsolete patches
+* 2 | (obsolete)                | Tracepoint called for each separate s/w counter value as it appears | Obsolete patches
+* 3 | r3p0-04rel0 - r3p2-01rel2 | Single tracepoint for all s/w counters in a bundle.                 |
+* 4 | r3p2-01rel3 - date        | As above but with extensions for MP devices (Mali-450)              | At least r4p0-00rel1
+*/
 
 #if !defined(GATOR_MALI_INTERFACE_STYLE)
-#define GATOR_MALI_INTERFACE_STYLE (3)
+#define GATOR_MALI_INTERFACE_STYLE (4)
+#endif
+
+#if GATOR_MALI_INTERFACE_STYLE == 1
+#error GATOR_MALI_INTERFACE_STYLE 1 is obsolete
+#elif GATOR_MALI_INTERFACE_STYLE == 2
+#error GATOR_MALI_INTERFACE_STYLE 2 is obsolete
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
+// Valid GATOR_MALI_INTERFACE_STYLE
+#else
+#error Unknown GATOR_MALI_INTERFACE_STYLE option.
 #endif
 
 #if GATOR_MALI_INTERFACE_STYLE < 4
@@ -44,6 +54,8 @@
 #error MALI_SUPPORT set to an invalid device code: expecting MALI_4xx
 #endif
 
+static const char mali_name[] = "Mali-4xx";
+
 /* gatorfs variables for counter enable state,
  * the event the counter should count and the
  * 'key' (a unique id set by gatord and returned
@@ -63,6 +75,7 @@ static u32 *counter_address[NUMBER_OF_EVENTS];
  */
 static unsigned long counter_dump[NUMBER_OF_EVENTS * 2];
 static unsigned long counter_prev[NUMBER_OF_EVENTS];
+static bool prev_set[NUMBER_OF_EVENTS];
 
 /* Note whether tracepoints have been registered */
 static int trace_registered;
@@ -76,18 +89,11 @@ static unsigned int n_vp_cores = MAX_NUM_VP_CORES;
 static unsigned int n_l2_cores = MAX_NUM_L2_CACHE_CORES;
 static unsigned int n_fp_cores = MAX_NUM_FP_CORES;
 
-/**
- * Calculate the difference and handle the overflow.
- */
-static u32 get_difference(u32 start, u32 end)
-{
-	if (start - end >= 0) {
-		return start - end;
-	}
-
-	// Mali counters are unsigned 32 bit values that wrap.
-	return (4294967295u - end) + start;
-}
+extern mali_counter mali_activity[2];
+static const char* const mali_activity_names[] = {
+	"fragment",
+	"vertex",
+};
 
 /**
  * Returns non-zero if the given counter ID is an activity counter.
@@ -112,40 +118,6 @@ static inline int is_hw_counter(unsigned int event_id)
 typedef void _mali_profiling_get_mali_version_type(struct _mali_profiling_mali_version *values);
 typedef u32 _mali_profiling_get_l2_counters_type(_mali_profiling_l2_counter_values *values);
 
-#if GATOR_MALI_INTERFACE_STYLE == 2
-/**
- * Returns non-zero if the given counter ID is a software counter.
- */
-static inline int is_sw_counter(unsigned int event_id)
-{
-	return (event_id >= FIRST_SW_COUNTER && event_id <= LAST_SW_COUNTER);
-}
-#endif
-
-#if GATOR_MALI_INTERFACE_STYLE == 2
-/*
- * The Mali DDK uses s64 types to contain software counter values, but gator
- * can only use a maximum of 32 bits. This function scales a software counter
- * to an appropriate range.
- */
-static u32 scale_sw_counter_value(unsigned int event_id, signed long long value)
-{
-	u32 scaled_value;
-
-	switch (event_id) {
-	case COUNTER_GLES_UPLOAD_TEXTURE_TIME:
-	case COUNTER_GLES_UPLOAD_VBO_TIME:
-		scaled_value = (u32)div_s64(value, 1000000);
-		break;
-	default:
-		scaled_value = (u32)value;
-		break;
-	}
-
-	return scaled_value;
-}
-#endif
-
 /* Probe for continuously sampled counter */
 #if 0				//WE_DONT_CURRENTLY_USE_THIS_SO_SUPPRESS_WARNING
 GATOR_DEFINE_PROBE(mali_sample_address, TP_PROTO(unsigned int event_id, u32 *addr))
@@ -172,16 +144,6 @@ GATOR_DEFINE_PROBE(mali_hw_counter, TP_PROTO(unsigned int event_id, unsigned int
 	}
 }
 
-#if GATOR_MALI_INTERFACE_STYLE == 2
-GATOR_DEFINE_PROBE(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long long value))
-{
-	if (is_sw_counter(event_id)) {
-		counter_data[event_id] = scale_sw_counter_value(event_id, value);
-	}
-}
-#endif /* GATOR_MALI_INTERFACE_STYLE == 2 */
-
-#if GATOR_MALI_INTERFACE_STYLE >= 3
 GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters))
 {
 	u32 i;
@@ -193,7 +155,6 @@ GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surfac
 		}
 	}
 }
-#endif /* GATOR_MALI_INTERFACE_STYLE >= 3 */
 
 /**
  * Create a single filesystem entry for a specified event.
@@ -254,6 +215,7 @@ static void initialise_version_info(void)
 		symbol_put(_mali_profiling_get_mali_version);
 	} else {
 		printk("gator: mali online _mali_profiling_get_mali_version symbol not found\n");
+		printk("gator:  check your Mali DDK version versus the GATOR_MALI_INTERFACE_STYLE setting\n");
 	}
 }
 #endif
@@ -261,7 +223,6 @@ static void initialise_version_info(void)
 static int create_files(struct super_block *sb, struct dentry *root)
 {
 	int event;
-	const char *mali_name = gator_mali_get_mali_name();
 
 	char buf[40];
 	int core_id;
@@ -278,6 +239,14 @@ static int create_files(struct super_block *sb, struct dentry *root)
 	initialise_version_info();
 #endif
 
+	mali_activity[0].cores = n_fp_cores;
+	mali_activity[1].cores = n_vp_cores;
+	for (event = 0; event < ARRAY_SIZE(mali_activity); event++) {
+		if (gator_mali_create_file_system(mali_name, mali_activity_names[event], sb, root, &mali_activity[event], NULL) != 0) {
+			return -1;
+		}
+	}
+
 	/* Vertex processor counters */
 	for (core_id = 0; core_id < n_vp_cores; core_id++) {
 		int activity_counter_id = ACTIVITY_VP_0;
@@ -413,7 +382,6 @@ static void init_counters(unsigned int from_counter, unsigned int to_counter)
 static void mali_counter_initialize(void)
 {
 	int i;
-	int core_id;
 
 	mali_profiling_control_type *mali_control;
 
@@ -463,15 +431,10 @@ static void mali_counter_initialize(void)
 		n_l2_cores = 0;
 	}
 
-	for (core_id = 0; core_id < n_l2_cores; core_id++) {
-		int counter_id = COUNTER_L2_0_C0 + (2 * core_id);
-		counter_prev[counter_id] = 0;
-		counter_prev[counter_id + 1] = 0;
-	}
-
 	/* Clear counters in the start */
 	for (i = 0; i < NUMBER_OF_EVENTS; i++) {
 		counter_data[i] = 0;
+		prev_set[i] = false;
 	}
 }
 
@@ -528,23 +491,11 @@ static int start(void)
 		return -1;
 	}
 
-#if GATOR_MALI_INTERFACE_STYLE == 1
-	/* None. */
-#elif GATOR_MALI_INTERFACE_STYLE == 2
-	/* For patched Mali driver. */
-	if (GATOR_REGISTER_TRACE(mali_sw_counter)) {
-		printk("gator: mali_sw_counter tracepoint failed to activate\n");
-		return -1;
-	}
-#elif GATOR_MALI_INTERFACE_STYLE >= 3
 	/* For Mali drivers with built-in support. */
 	if (GATOR_REGISTER_TRACE(mali_sw_counters)) {
 		printk("gator: mali_sw_counters tracepoint failed to activate\n");
 		return -1;
 	}
-#else
-#error Unknown GATOR_MALI_INTERFACE_STYLE option.
-#endif
 
 	trace_registered = 1;
 
@@ -561,17 +512,8 @@ static void stop(void)
 	if (trace_registered) {
 		GATOR_UNREGISTER_TRACE(mali_hw_counter);
 
-#if GATOR_MALI_INTERFACE_STYLE == 1
-		/* None. */
-#elif GATOR_MALI_INTERFACE_STYLE == 2
-		/* For patched Mali driver. */
-		GATOR_UNREGISTER_TRACE(mali_sw_counter);
-#elif GATOR_MALI_INTERFACE_STYLE >= 3
 		/* For Mali drivers with built-in support. */
 		GATOR_UNREGISTER_TRACE(mali_sw_counters);
-#else
-#error Unknown GATOR_MALI_INTERFACE_STYLE option.
-#endif
 
 		pr_debug("gator: mali timeline tracepoint deactivated\n");
 
@@ -636,21 +578,23 @@ static int read(int **buffer)
 
 			per_core = &cache_values.cores[cache_id];
 
-			if (counter_enabled[counter_id_0]) {
+			if (counter_enabled[counter_id_0] && prev_set[counter_id_0]) {
 				// Calculate and save src0's counter val0
 				counter_dump[len++] = counter_key[counter_id_0];
-				counter_dump[len++] = get_difference(per_core->value0, counter_prev[counter_id_0]);
+				counter_dump[len++] = per_core->value0 - counter_prev[counter_id_0];
 			}
 
-			if (counter_enabled[counter_id_1]) {
+			if (counter_enabled[counter_id_1] && prev_set[counter_id_1]) {
 				// Calculate and save src1's counter val1
 				counter_dump[len++] = counter_key[counter_id_1];
-				counter_dump[len++] = get_difference(per_core->value1, counter_prev[counter_id_1]);
+				counter_dump[len++] = per_core->value1 - counter_prev[counter_id_1];
 			}
 
 			// Save the previous values for the counters.
 			counter_prev[counter_id_0] = per_core->value0;
+			prev_set[counter_id_0] = true;
 			counter_prev[counter_id_1] = per_core->value1;
+			prev_set[counter_id_1] = true;
 		}
 	}
 
@@ -709,6 +653,8 @@ int gator_events_mali_init(void)
 
 	pr_debug("gator: mali init\n");
 
+	gator_mali_initialise_counters(mali_activity, ARRAY_SIZE(mali_activity));
+
 	for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
 		counter_enabled[cnt] = 0;
 		counter_event[cnt] = 0;
diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c
index dc58dcf0c662..4f2cce4ce67b 100644
--- a/drivers/gator/gator_events_mali_common.c
+++ b/drivers/gator/gator_events_mali_common.c
@@ -8,26 +8,6 @@
  */
 #include "gator_events_mali_common.h"
 
-static u32 gator_mali_get_id(void)
-{
-	return MALI_SUPPORT;
-}
-
-extern const char *gator_mali_get_mali_name(void)
-{
-	u32 id = gator_mali_get_id();
-
-	switch (id) {
-	case MALI_T6xx:
-		return "Mali-T6xx";
-	case MALI_4xx:
-		return "Mali-4xx";
-	default:
-		pr_debug("gator: Mali-T6xx: unknown Mali ID (%d)\n", id);
-		return "Mali-Unknown";
-	}
-}
-
 extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event)
 {
 	int err;
@@ -42,24 +22,31 @@ extern int gator_mali_create_file_system(const char *mali_name, const char *even
 		dir = gatorfs_mkdir(sb, root, buf);
 
 		if (dir == NULL) {
-			pr_debug("gator: Mali-T6xx: error creating file system for: %s (%s)", event_name, buf);
+			pr_debug("gator: %s: error creating file system for: %s (%s)", mali_name, event_name, buf);
 			return -1;
 		}
 
 		err = gatorfs_create_ulong(sb, dir, "enabled", &counter->enabled);
 		if (err != 0) {
-			pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ulong for: %s (%s)", event_name, buf);
+			pr_debug("gator: %s: error calling gatorfs_create_ulong for: %s (%s)", mali_name, event_name, buf);
 			return -1;
 		}
 		err = gatorfs_create_ro_ulong(sb, dir, "key", &counter->key);
 		if (err != 0) {
-			pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf);
+			pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)", mali_name, event_name, buf);
 			return -1;
 		}
+		if (counter->cores != -1) {
+			err = gatorfs_create_ro_ulong(sb, dir, "cores", &counter->cores);
+			if (err != 0) {
+				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)", mali_name, event_name, buf);
+				return -1;
+			}
+		}
 		if (event != NULL) {
 			err = gatorfs_create_ulong(sb, dir, "event", event);
 			if (err != 0) {
-				pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf);
+				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)", mali_name, event_name, buf);
 				return -1;
 			}
 		}
@@ -77,5 +64,6 @@ extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int
 
 		counter->key = gator_events_get_key();
 		counter->enabled = 0;
+		counter->cores = -1;
 	}
 }
diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h
index 41c2a3c13fae..91d871bc915a 100644
--- a/drivers/gator/gator_events_mali_common.h
+++ b/drivers/gator/gator_events_mali_common.h
@@ -18,10 +18,6 @@
 #include <linux/slab.h>
 #include <asm/io.h>
 
-/* Device codes for each known GPU */
-#define MALI_4xx     (0x0b07)
-#define MALI_T6xx    (0x0056)
-
 /* Ensure that MALI_SUPPORT has been defined to something. */
 #ifndef MALI_SUPPORT
 #error MALI_SUPPORT not defined!
@@ -35,8 +31,12 @@
  * Runtime state information for a counter.
  */
 typedef struct {
-	unsigned long key;	/* 'key' (a unique id set by gatord and returned by gator.ko) */
-	unsigned long enabled;	/* counter enable state */
+	// 'key' (a unique id set by gatord and returned by gator.ko)
+	unsigned long key;
+	// counter enable state
+	unsigned long enabled;
+	// for activity counters, the number of cores, otherwise -1
+	unsigned long cores;
 } mali_counter;
 
 /*
@@ -53,18 +53,10 @@ extern int _mali_profiling_set_event(unsigned int, int);
 extern void _mali_profiling_control(unsigned int, unsigned int);
 extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
 
-/**
- * Returns a name which identifies the GPU type (eg Mali-4xx, Mali-T6xx).
- *
- * @return The name as a constant string.
- */
-extern const char *gator_mali_get_mali_name(void);
-
 /**
  * Creates a filesystem entry under /dev/gator relating to the specified event name and key, and
  * associate the key/enable values with this entry point.
  *
- * @param mali_name A name related to the type of GPU, obtained from a call to gator_mali_get_mali_name()
  * @param event_name The name of the event.
  * @param sb Linux super block
  * @param root Directory under which the entry will be created.
diff --git a/drivers/gator/gator_events_mali_t6xx.c b/drivers/gator/gator_events_mali_t6xx.c
index 76f14eee7676..e56ba84aefb8 100644
--- a/drivers/gator/gator_events_mali_t6xx.c
+++ b/drivers/gator/gator_events_mali_t6xx.c
@@ -32,6 +32,8 @@
 #error MALI_SUPPORT set to an invalid device code: expecting MALI_T6xx
 #endif
 
+static const char mali_name[] = "Mali-T6xx";
+
 /* Counters for Mali-T6xx:
  *
  *  - Timeline events
@@ -292,7 +294,6 @@ static int create_files(struct super_block *sb, struct dentry *root)
 	 * Create the filesystem for all events
 	 */
 	int counter_index = 0;
-	const char *mali_name = gator_mali_get_mali_name();
 	mali_profiling_control_type *mali_control;
 
 	for (event = FIRST_TIMELINE_EVENT; event < FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS; event++) {
@@ -317,7 +318,7 @@ static int create_files(struct super_block *sb, struct dentry *root)
 	}
 
 	mali_control = symbol_get(_mali_profiling_control);
-	if (mali_control) {	
+	if (mali_control) {
 		if (gator_mali_create_file_system(mali_name, "Filmstrip_cnt0", sb, root, &counters[FILMSTRIP], &filmstrip_event) != 0) {
 			return -1;
 		}
diff --git a/drivers/gator/gator_events_mali_t6xx_hw.c b/drivers/gator/gator_events_mali_t6xx_hw.c
index dfbc91ffd765..3a072bb6ac06 100644
--- a/drivers/gator/gator_events_mali_t6xx_hw.c
+++ b/drivers/gator/gator_events_mali_t6xx_hw.c
@@ -16,7 +16,10 @@
 #include <asm/io.h>
 
 /* Mali T6xx DDK includes */
-#ifdef MALI_DIR_MIDGARD
+#if defined(MALI_SIMPLE_API)
+/* Header with wrapper functions to kbase structures and functions */
+#include "mali/mali_dd_gator_api.h"
+#elif defined(MALI_DIR_MIDGARD)
 /* New DDK Directory structure with kernel/drivers/gpu/arm/midgard*/
 #include "mali_linux_trace.h"
 #include "mali_kbase.h"
@@ -28,37 +31,49 @@
 #include "kbase/src/linux/mali_kbase_mem_linux.h"
 #endif
 
-#include "gator_events_mali_common.h"
-
 /* If API version is not specified then assume API version 1. */
 #ifndef MALI_DDK_GATOR_API_VERSION
 #define MALI_DDK_GATOR_API_VERSION 1
 #endif
 
-#if (MALI_DDK_GATOR_API_VERSION != 1) && (MALI_DDK_GATOR_API_VERSION != 2)
-#error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3 DDK).
+#if (MALI_DDK_GATOR_API_VERSION != 1) && (MALI_DDK_GATOR_API_VERSION != 2) && (MALI_DDK_GATOR_API_VERSION != 3)
+#error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3 DDK, or 3 for r? DDK).
 #endif
 
+#include "gator_events_mali_common.h"
+
 /*
  * Mali-T6xx
  */
+#if MALI_DDK_GATOR_API_VERSION == 3
+typedef uint32_t kbase_dd_instr_hwcnt_dump_irq_type(struct mali_dd_hwcnt_handles *);
+typedef uint32_t kbase_dd_instr_hwcnt_dump_complete_type(struct mali_dd_hwcnt_handles *, uint32_t *);
+typedef struct mali_dd_hwcnt_handles* mali_dd_hwcnt_init_type(struct mali_dd_hwcnt_info *);
+typedef void mali_dd_hwcnt_clear_type(struct mali_dd_hwcnt_info *, struct mali_dd_hwcnt_handles *);
+
+static kbase_dd_instr_hwcnt_dump_irq_type *kbase_dd_instr_hwcnt_dump_irq_symbol;
+static kbase_dd_instr_hwcnt_dump_complete_type *kbase_dd_instr_hwcnt_dump_complete_symbol;
+static mali_dd_hwcnt_init_type *mali_dd_hwcnt_init_symbol;
+static mali_dd_hwcnt_clear_type *mali_dd_hwcnt_clear_symbol;
+
+#else
 typedef struct kbase_device *kbase_find_device_type(int);
-typedef kbase_context *kbase_create_context_type(kbase_device *);
-typedef void kbase_destroy_context_type(kbase_context *);
+typedef struct kbase_context *kbase_create_context_type(struct kbase_device *);
+typedef void kbase_destroy_context_type(struct kbase_context *);
 
 #if MALI_DDK_GATOR_API_VERSION == 1
-typedef void *kbase_va_alloc_type(kbase_context *, u32);
-typedef void kbase_va_free_type(kbase_context *, void *);
+typedef void *kbase_va_alloc_type(struct kbase_context *, u32);
+typedef void kbase_va_free_type(struct kbase_context *, void *);
 #elif MALI_DDK_GATOR_API_VERSION == 2
-typedef void *kbase_va_alloc_type(kbase_context *, u32, kbase_hwc_dma_mapping * handle);
-typedef void kbase_va_free_type(kbase_context *, kbase_hwc_dma_mapping * handle);
+typedef void *kbase_va_alloc_type(struct kbase_context *, u32, kbase_hwc_dma_mapping * handle);
+typedef void kbase_va_free_type(struct kbase_context *, kbase_hwc_dma_mapping * handle);
 #endif
 
-typedef mali_error kbase_instr_hwcnt_enable_type(kbase_context *, kbase_uk_hwcnt_setup *);
-typedef mali_error kbase_instr_hwcnt_disable_type(kbase_context *);
-typedef mali_error kbase_instr_hwcnt_clear_type(kbase_context *);
-typedef mali_error kbase_instr_hwcnt_dump_irq_type(kbase_context *);
-typedef mali_bool kbase_instr_hwcnt_dump_complete_type(kbase_context *, mali_bool *);
+typedef mali_error kbase_instr_hwcnt_enable_type(struct kbase_context *, struct kbase_uk_hwcnt_setup *);
+typedef mali_error kbase_instr_hwcnt_disable_type(struct kbase_context *);
+typedef mali_error kbase_instr_hwcnt_clear_type(struct kbase_context *);
+typedef mali_error kbase_instr_hwcnt_dump_irq_type(struct kbase_context *);
+typedef mali_bool kbase_instr_hwcnt_dump_complete_type(struct kbase_context *, mali_bool *);
 
 static kbase_find_device_type *kbase_find_device_symbol;
 static kbase_create_context_type *kbase_create_context_symbol;
@@ -70,6 +85,7 @@ static kbase_instr_hwcnt_dump_complete_type *kbase_instr_hwcnt_dump_complete_sym
 static kbase_instr_hwcnt_disable_type *kbase_instr_hwcnt_disable_symbol;
 static kbase_va_free_type *kbase_va_free_symbol;
 static kbase_destroy_context_type *kbase_destroy_context_symbol;
+#endif
 
 static long shader_present_low = 0;
 
@@ -99,6 +115,8 @@ enum {
 	MMU_BLOCK
 };
 
+static const char mali_name[] = "Mali-T6xx";
+
 /* Counters for Mali-T6xx:
  *
  *  - HW counters, 4 blocks
@@ -381,6 +399,14 @@ static const char *const hardware_counter_names[] = {
 #define GET_HW_BLOCK(c) (((c) >> 6) & 0x3)
 #define GET_COUNTER_OFFSET(c) ((c) & 0x3f)
 
+#if MALI_DDK_GATOR_API_VERSION == 3
+/* Opaque handles for kbase_context and kbase_hwc_dma_mapping */
+static struct mali_dd_hwcnt_handles *handles;
+
+/* Information about hardware counters */
+static struct mali_dd_hwcnt_info *in_out_info;
+
+#else
 /* Memory to dump hardware counters into */
 static void *kernel_dump_buffer;
 
@@ -390,14 +416,9 @@ kbase_hwc_dma_mapping kernel_dump_buffer_handle;
 #endif
 
 /* kbase context and device */
-static kbase_context *kbcontext = NULL;
+static struct kbase_context *kbcontext = NULL;
 static struct kbase_device *kbdevice = NULL;
-
-/*
- * The following function has no external prototype in older DDK revisions.  When the DDK
- * is updated then this should be removed.
- */
-struct kbase_device *kbase_find_device(int minor);
+#endif
 
 static volatile bool kbase_device_busy = false;
 static unsigned int num_hardware_counters_enabled;
@@ -412,6 +433,13 @@ static mali_counter counters[NUMBER_OF_HARDWARE_COUNTERS];
  */
 static unsigned long counter_dump[NUMBER_OF_HARDWARE_COUNTERS * 2];
 
+extern mali_counter mali_activity[3];
+static const char* const mali_activity_names[] = {
+	"fragment",
+	"vertex",
+	"opencl",
+};
+
 #define SYMBOL_GET(FUNCTION, ERROR_COUNT) \
 	if(FUNCTION ## _symbol) \
 	{ \
@@ -431,8 +459,8 @@ static unsigned long counter_dump[NUMBER_OF_HARDWARE_COUNTERS * 2];
 #define SYMBOL_CLEANUP(FUNCTION) \
 	if(FUNCTION ## _symbol) \
 	{ \
-        symbol_put(FUNCTION); \
-        FUNCTION ## _symbol = NULL; \
+		symbol_put(FUNCTION); \
+		FUNCTION ## _symbol = NULL; \
 	}
 
 /**
@@ -442,6 +470,12 @@ static unsigned long counter_dump[NUMBER_OF_HARDWARE_COUNTERS * 2];
 static int init_symbols(void)
 {
 	int error_count = 0;
+#if MALI_DDK_GATOR_API_VERSION == 3
+	SYMBOL_GET(kbase_dd_instr_hwcnt_dump_irq, error_count);
+	SYMBOL_GET(kbase_dd_instr_hwcnt_dump_complete, error_count);
+	SYMBOL_GET(mali_dd_hwcnt_init, error_count);
+	SYMBOL_GET(mali_dd_hwcnt_clear, error_count);
+#else
 	SYMBOL_GET(kbase_find_device, error_count);
 	SYMBOL_GET(kbase_create_context, error_count);
 	SYMBOL_GET(kbase_va_alloc, error_count);
@@ -452,6 +486,7 @@ static int init_symbols(void)
 	SYMBOL_GET(kbase_instr_hwcnt_disable, error_count);
 	SYMBOL_GET(kbase_va_free, error_count);
 	SYMBOL_GET(kbase_destroy_context, error_count);
+#endif
 
 	return error_count;
 }
@@ -461,6 +496,12 @@ static int init_symbols(void)
  */
 static void clean_symbols(void)
 {
+#if MALI_DDK_GATOR_API_VERSION == 3
+	SYMBOL_CLEANUP(kbase_dd_instr_hwcnt_dump_irq);
+	SYMBOL_CLEANUP(kbase_dd_instr_hwcnt_dump_complete);
+	SYMBOL_CLEANUP(mali_dd_hwcnt_init);
+	SYMBOL_CLEANUP(mali_dd_hwcnt_clear);
+#else
 	SYMBOL_CLEANUP(kbase_find_device);
 	SYMBOL_CLEANUP(kbase_create_context);
 	SYMBOL_CLEANUP(kbase_va_alloc);
@@ -471,6 +512,7 @@ static void clean_symbols(void)
 	SYMBOL_CLEANUP(kbase_instr_hwcnt_disable);
 	SYMBOL_CLEANUP(kbase_va_free);
 	SYMBOL_CLEANUP(kbase_destroy_context);
+#endif
 }
 
 /**
@@ -502,11 +544,13 @@ static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time
 
 static int start(void)
 {
-	kbase_uk_hwcnt_setup setup;
-	mali_error err;
-	int cnt;
-	u16 bitmask[] = { 0, 0, 0, 0 };
+#if MALI_DDK_GATOR_API_VERSION < 3
+	struct kbase_uk_hwcnt_setup setup;
 	unsigned long long shadersPresent = 0;
+	u16 bitmask[] = { 0, 0, 0, 0 };
+	mali_error err;
+#endif
+	int cnt;
 
 	/* Setup HW counters */
 	num_hardware_counters_enabled = 0;
@@ -515,18 +559,52 @@ static int start(void)
 		pr_debug("Unexpected number of hardware counters defined: expecting 256, got %d\n", NUMBER_OF_HARDWARE_COUNTERS);
 	}
 
+#if MALI_DDK_GATOR_API_VERSION == 3
+	/* Declare and initialise mali_dd_hwcnt_info structure */
+	in_out_info = kmalloc(sizeof(struct mali_dd_hwcnt_info), GFP_KERNEL);
+	for (cnt = 0; cnt < 4; cnt++){
+		in_out_info->bitmask[cnt] = 0;
+	}
+#endif
 	/* Calculate enable bitmasks based on counters_enabled array */
 	for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
 		const mali_counter *counter = &counters[cnt];
 		if (counter->enabled) {
 			int block = GET_HW_BLOCK(cnt);
 			int enable_bit = GET_COUNTER_OFFSET(cnt) / 4;
+#if MALI_DDK_GATOR_API_VERSION == 3
+			in_out_info->bitmask[block] |= (1 << enable_bit);
+#else
 			bitmask[block] |= (1 << enable_bit);
+#endif
 			pr_debug("gator: Mali-T6xx: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt);
 			num_hardware_counters_enabled++;
 		}
 	}
 
+#if MALI_DDK_GATOR_API_VERSION == 3
+	/* Create a kbase context for HW counters */
+	if (num_hardware_counters_enabled > 0) {
+		if (init_symbols() > 0) {
+			clean_symbols();
+			/* No Mali driver code entrypoints found - not a fault. */
+			return 0;
+		}
+
+		handles = mali_dd_hwcnt_init_symbol(in_out_info);
+
+		if(handles == NULL) {
+			goto out;
+		}
+
+		/* See if we can get the number of shader cores */
+		shader_present_low = (unsigned long)in_out_info->shader_present_bitmap;
+
+		kbase_device_busy = false;
+	}
+
+	return 0;
+#else
 	/* Create a kbase context for HW counters */
 	if (num_hardware_counters_enabled > 0) {
 		if (init_symbols() > 0) {
@@ -606,6 +684,7 @@ static int start(void)
 
 destroy_context:
 	kbase_destroy_context_symbol(kbcontext);
+#endif
 
 out:
 	clean_symbols();
@@ -615,7 +694,11 @@ static int start(void)
 static void stop(void)
 {
 	unsigned int cnt;
-	kbase_context *temp_kbcontext;
+#if MALI_DDK_GATOR_API_VERSION == 3
+	struct mali_dd_hwcnt_handles *temp_hand;
+#else
+	struct kbase_context *temp_kbcontext;
+#endif
 
 	pr_debug("gator: Mali-T6xx: stop\n");
 
@@ -625,6 +708,20 @@ static void stop(void)
 	}
 
 	/* Destroy the context for HW counters */
+#if MALI_DDK_GATOR_API_VERSION == 3
+	if (num_hardware_counters_enabled > 0 && handles != NULL) {
+		/*
+		 * Set the global variable to NULL before destroying it, because
+		 * other function will check this before using it.
+		 */
+		temp_hand = handles;
+		handles = NULL;
+
+		mali_dd_hwcnt_clear_symbol(in_out_info, temp_hand);
+
+		kfree(in_out_info);
+
+#else
 	if (num_hardware_counters_enabled > 0 && kbcontext != NULL) {
 		/*
 		 * Set the global variable to NULL before destroying it, because
@@ -642,6 +739,7 @@ static void stop(void)
 #endif
 
 		kbase_destroy_context_symbol(temp_kbcontext);
+#endif
 
 		pr_debug("gator: Mali-T6xx: hardware counters stopped\n");
 
@@ -654,7 +752,7 @@ static int read(int **buffer)
 	int cnt;
 	int len = 0;
 	u32 value = 0;
-	mali_bool success;
+	uint32_t success;
 
 	struct timespec current_time;
 	static u32 prev_time_s = 0;
@@ -686,12 +784,21 @@ static int read(int **buffer)
 			0x500	/* VITHAR_MEMORY_SYSTEM,   Block 3 */
 		};
 
+#if MALI_DDK_GATOR_API_VERSION == 3
+		if (!handles) {
+			return -1;
+		}
+
+		/* Mali symbols can be called safely since a kbcontext is valid */
+		if (kbase_dd_instr_hwcnt_dump_complete_symbol(handles, &success) == MALI_TRUE) {
+#else
 		if (!kbcontext) {
 			return -1;
 		}
 
 		/* Mali symbols can be called safely since a kbcontext is valid */
 		if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success) == MALI_TRUE) {
+#endif
 			kbase_device_busy = false;
 
 			if (success == MALI_TRUE) {
@@ -702,7 +809,11 @@ static int read(int **buffer)
 						const int block = GET_HW_BLOCK(cnt);
 						const int counter_offset = GET_COUNTER_OFFSET(cnt);
 
+#if MALI_DDK_GATOR_API_VERSION == 3
+						const char* block_base_address = (char*)in_out_info->kernel_dump_buffer + vithar_blocks[block];
+#else
 						const char* block_base_address = (char*)kernel_dump_buffer + vithar_blocks[block];
+#endif
 
 						/* If counter belongs to shader block need to take into account all cores */
 						if (block == SHADER_BLOCK) {
@@ -741,7 +852,11 @@ static int read(int **buffer)
 
 		if (!kbase_device_busy) {
 			kbase_device_busy = true;
+#if MALI_DDK_GATOR_API_VERSION == 3
+			kbase_dd_instr_hwcnt_dump_irq_symbol(handles);
+#else
 			kbase_instr_hwcnt_dump_irq_symbol(kbcontext);
+#endif
 		}
 	}
 
@@ -760,7 +875,12 @@ static int create_files(struct super_block *sb, struct dentry *root)
 	 * Create the filesystem for all events
 	 */
 	int counter_index = 0;
-	const char *mali_name = gator_mali_get_mali_name();
+
+	for (event = 0; event < ARRAY_SIZE(mali_activity); event++) {
+		if (gator_mali_create_file_system(mali_name, mali_activity_names[event], sb, root, &mali_activity[event], NULL) != 0) {
+			return -1;
+		}
+	}
 
 	for (event = 0; event < NUMBER_OF_HARDWARE_COUNTERS; event++) {
 		if (gator_mali_create_file_system(mali_name, hardware_counter_names[counter_index], sb, root, &counters[event], NULL) != 0)
@@ -786,6 +906,7 @@ int gator_events_mali_t6xx_hw_init(void)
 	test_all_is_read_scheduled();
 #endif
 
+	gator_mali_initialise_counters(mali_activity, ARRAY_SIZE(mali_activity));
 	gator_mali_initialise_counters(counters, NUMBER_OF_HARDWARE_COUNTERS);
 
 	return gator_events_install(&gator_events_mali_t6xx_interface);
diff --git a/drivers/gator/gator_events_mmapped.c b/drivers/gator/gator_events_mmapped.c
index 3b248ec24e6e..5bc01c42c3a2 100644
--- a/drivers/gator/gator_events_mmapped.c
+++ b/drivers/gator/gator_events_mmapped.c
@@ -8,21 +8,25 @@
  * published by the Free Software Foundation.
  *
  * Similar entries to those below must be present in the events.xml file.
- * To add them to the events.xml, create an events-mmap.xml with the 
+ * To add them to the events.xml, create an events-mmap.xml with the
  * following contents and rebuild gatord:
  *
- * <counter_set name="mmapped_cnt" count="3"/>
- * <category name="mmapped" counter_set="mmapped_cnt" per_cpu="no">
- *   <event event="0x0" title="Simulated1" name="Sine" display="maximum" average_selection="yes" description="Sort-of-sine"/>
- *   <event event="0x1" title="Simulated2" name="Triangle" display="maximum" average_selection="yes" description="Triangular wave"/>
- *   <event event="0x2" title="Simulated3" name="PWM" display="maximum" average_selection="yes" description="PWM Signal"/>
+ * <category name="mmapped">
+ *   <event counter="mmapped_cnt0" title="Simulated1" name="Sine" display="maximum" class="absolute" description="Sort-of-sine"/>
+ *   <event counter="mmapped_cnt1" title="Simulated2" name="Triangle" display="maximum" class="absolute" description="Triangular wave"/>
+ *   <event counter="mmapped_cnt2" title="Simulated3" name="PWM" display="maximum" class="absolute" description="PWM Signal"/>
  * </category>
  *
- * When adding custom events, be sure do the following
+ * When adding custom events, be sure to do the following:
  * - add any needed .c files to the gator driver Makefile
  * - call gator_events_install in the events init function
  * - add the init function to GATOR_EVENTS_LIST in gator_main.c
  * - add a new events-*.xml file to the gator daemon and rebuild
+ *
+ * Troubleshooting:
+ * - verify the new events are part of events.xml, which is created when building the daemon
+ * - verify the new events exist at /dev/gator/events/ once gatord is launched
+ * - verify the counter name in the XML matches the name at /dev/gator/events
  */
 
 #include <linux/init.h>
@@ -37,7 +41,6 @@ static int mmapped_global_enabled;
 
 static struct {
 	unsigned long enabled;
-	unsigned long event;
 	unsigned long key;
 } mmapped_counters[MMAPPED_COUNTERS_NUM];
 
@@ -47,7 +50,7 @@ static s64 prev_time;
 
 /* Adds mmapped_cntX directories and enabled, event, and key files to /dev/gator/events */
 static int gator_events_mmapped_create_files(struct super_block *sb,
-					    struct dentry *root)
+					     struct dentry *root)
 {
 	int i;
 
@@ -61,8 +64,6 @@ static int gator_events_mmapped_create_files(struct super_block *sb,
 			return -1;
 		gatorfs_create_ulong(sb, dir, "enabled",
 				     &mmapped_counters[i].enabled);
-		gatorfs_create_ulong(sb, dir, "event",
-				     &mmapped_counters[i].event);
 		gatorfs_create_ro_ulong(sb, dir, "key",
 					&mmapped_counters[i].key);
 	}
@@ -177,8 +178,7 @@ static int gator_events_mmapped_read(int **buffer)
 		if (mmapped_counters[i].enabled) {
 			mmapped_buffer[len++] = mmapped_counters[i].key;
 			mmapped_buffer[len++] =
-			    mmapped_simulate(mmapped_counters[i].event,
-					    delta_in_us);
+			    mmapped_simulate(i, delta_in_us);
 		}
 	}
 
diff --git a/drivers/gator/gator_events_perf_pmu.c b/drivers/gator/gator_events_perf_pmu.c
index 8b2d67a058b3..06bbad5b10c3 100644
--- a/drivers/gator/gator_events_perf_pmu.c
+++ b/drivers/gator/gator_events_perf_pmu.c
@@ -470,10 +470,10 @@ static void gator_events_perf_pmu_cci_init(const int type)
 
 	switch (probe_cci_revision()) {
 	case 0:
-		cci_name = "cci-400";
+		cci_name = "CCI_400";
 		break;
 	case 1:
-		cci_name = "cci-400-r1";
+		cci_name = "CCI_400-r1";
 		break;
 	default:
 		pr_debug("gator: unrecognized cci-400 revision\n");
@@ -549,7 +549,7 @@ int gator_events_perf_pmu_init(void)
 		}
 
 		if (pe->pmu != NULL && type == pe->pmu->type) {
-			if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0) {
+			if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0 || strcmp("CCI_400-r1", pe->pmu->name) == 0) {
 				gator_events_perf_pmu_cci_init(type);
 			} else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) {
 				found_cpu = true;
diff --git a/drivers/gator/gator_events_scorpion.c b/drivers/gator/gator_events_scorpion.c
index 8ca251af0e26..2e5be8d50e9d 100644
--- a/drivers/gator/gator_events_scorpion.c
+++ b/drivers/gator/gator_events_scorpion.c
@@ -26,9 +26,9 @@ static int pmnc_counters;
 // ccnt reg
 #define CCNT_REG	(1 << 31)
 
-#define CCNT 		0
+#define CCNT		0
 #define CNT0		1
-#define CNTMAX 		(4+1)
+#define CNTMAX		(4+1)
 
 static unsigned long pmnc_enabled[CNTMAX];
 static unsigned long pmnc_event[CNTMAX];
diff --git a/drivers/gator/gator_events_threads.c b/drivers/gator/gator_events_threads.c
new file mode 100644
index 000000000000..9de85862fe6c
--- /dev/null
+++ b/drivers/gator/gator_events_threads.c
@@ -0,0 +1,115 @@
+/*
+ * Sample activity provider
+ *
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * See gator_events_mmapped.c for additional directions and
+ * troubleshooting.
+ *
+ * For this sample to work these entries must be present in the
+ * events.xml file. So create an events-threads.xml in the gator
+ * daemon source directory with the following contents and rebuild
+ * gatord:
+ *
+ * <category name="threads">
+ *   <event counter="Linux_threads" title="Linux" name="Threads" class="activity" activity1="odd" activity_color1="0x000000ff" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" description="Linux syscall activity"/>
+ * </category>
+ */
+
+#include <trace/events/sched.h>
+
+#include "gator.h"
+
+static ulong threads_enabled;
+static ulong threads_key;
+static ulong threads_cores;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
+#else
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
+#endif
+{
+	int cpu = get_physical_cpu();
+	int pid = next->pid;
+	if (pid == 0) {
+		// idle
+		gator_marshal_activity_switch(cpu, threads_key, 0, 0);
+	} else if (pid & 1) {
+		// odd
+		gator_marshal_activity_switch(cpu, threads_key, 1, pid);
+	} else {
+		// even
+		//gator_marshal_activity_switch(cpu, threads_key, 2, current->pid);
+		// Multiple activities are not yet supported so emit idle
+		gator_marshal_activity_switch(cpu, threads_key, 0, 0);
+	}
+}
+
+// Adds Linux_threads directory and enabled, key, and cores files to /dev/gator/events
+static int gator_events_threads_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+
+	dir = gatorfs_mkdir(sb, root, "Linux_threads");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &threads_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &threads_key);
+	// Number of cores associated with this activity
+	gatorfs_create_ro_ulong(sb, dir, "cores", &threads_cores);
+
+	return 0;
+}
+
+static int gator_events_threads_start(void)
+{
+	int cpu;
+
+	if (threads_enabled) {
+		preempt_disable();
+		for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
+			gator_marshal_activity_switch(cpu, threads_key, 0, 0);
+		}
+		preempt_enable();
+
+		if (GATOR_REGISTER_TRACE(sched_switch)) {
+			goto fail_sched_switch;
+		}
+	}
+
+	return 0;
+
+fail_sched_switch:
+	return -1;
+}
+
+static void gator_events_threads_stop(void)
+{
+	if (threads_enabled) {
+		GATOR_UNREGISTER_TRACE(sched_switch);
+	}
+
+	threads_enabled = 0;
+}
+
+static struct gator_interface gator_events_threads_interface = {
+	.create_files = gator_events_threads_create_files,
+	.start = gator_events_threads_start,
+	.stop = gator_events_threads_stop,
+};
+
+// Must not be static. Ensure that this init function is added to GATOR_EVENTS_LIST in gator_main.c
+int __init gator_events_threads_init(void)
+{
+	threads_enabled = 0;
+	threads_key = gator_events_get_key();
+	threads_cores = nr_cpu_ids;
+
+	return gator_events_install(&gator_events_threads_interface);
+}
diff --git a/drivers/gator/gator_iks.c b/drivers/gator/gator_iks.c
index e90dfcce9381..9180b874457a 100644
--- a/drivers/gator/gator_iks.c
+++ b/drivers/gator/gator_iks.c
@@ -150,7 +150,7 @@ static void gator_send_iks_core_names(void)
 	preempt_disable();
 	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
 		if (mpidr_cpus[cpu] != NULL) {
-			gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid, mpidr_cpus[cpu]);
+			gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid);
 		}
 	}
 	preempt_enable();
diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c
index e67f7c5cc61d..0d867f22364f 100644
--- a/drivers/gator/gator_main.c
+++ b/drivers/gator/gator_main.c
@@ -8,7 +8,7 @@
  */
 
 // This version must match the gator daemon version
-#define PROTOCOL_VERSION 18
+#define PROTOCOL_VERSION 19
 static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 
 #include <linux/slab.h>
@@ -71,8 +71,8 @@ static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 #define BLOCK_COUNTER_BUFFER_SIZE (128*1024)
 #define ANNOTATE_BUFFER_SIZE      (128*1024)	// annotate counters have the core as part of the data and the core value in the frame header may be discarded
 #define SCHED_TRACE_BUFFER_SIZE   (128*1024)
-#define GPU_TRACE_BUFFER_SIZE     (64*1024)	// gpu trace counters have the core as part of the data and the core value in the frame header may be discarded
 #define IDLE_BUFFER_SIZE          (32*1024)	// idle counters have the core as part of the data and the core value in the frame header may be discarded
+#define ACTIVITY_BUFFER_SIZE      (128*1024)
 
 #define NO_COOKIE      0U
 #define UNRESOLVED_COOKIE ~0U
@@ -84,8 +84,8 @@ static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 #define FRAME_BLOCK_COUNTER 5
 #define FRAME_ANNOTATE      6
 #define FRAME_SCHED_TRACE   7
-#define FRAME_GPU_TRACE     8
 #define FRAME_IDLE          9
+#define FRAME_ACTIVITY     13
 
 #define MESSAGE_END_BACKTRACE 1
 
@@ -94,14 +94,9 @@ static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 #define MESSAGE_THREAD_NAME 2
 #define MESSAGE_LINK        4
 
-// GPU Trace Frame Messages
-#define MESSAGE_GPU_START 1
-#define MESSAGE_GPU_STOP  2
-
 // Scheduler Trace Frame Messages
 #define MESSAGE_SCHED_SWITCH 1
 #define MESSAGE_SCHED_EXIT   2
-#define MESSAGE_SCHED_START  3
 
 // Idle Frame Messages
 #define MESSAGE_IDLE_ENTER 1
@@ -111,6 +106,10 @@ static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 #define MESSAGE_SUMMARY   1
 #define MESSAGE_CORE_NAME 3
 
+// Activity Frame Messages
+#define MESSAGE_SWITCH 2
+#define MESSAGE_EXIT   3
+
 #define MAXSIZE_PACK32     5
 #define MAXSIZE_PACK64    10
 
@@ -132,8 +131,8 @@ enum {
 	BLOCK_COUNTER_BUF,
 	ANNOTATE_BUF,
 	SCHED_TRACE_BUF,
-	GPU_TRACE_BUF,
 	IDLE_BUF,
+	ACTIVITY_BUF,
 	NUM_GATOR_BUFS
 };
 
@@ -175,6 +174,7 @@ static DEFINE_PER_CPU(u64, last_timestamp);
 
 static bool printed_monotonic_warning;
 
+static u32 gator_cpuids[NR_CPUS];
 static bool sent_core_name[NR_CPUS];
 
 static DEFINE_PER_CPU(bool, in_scheduler_context);
@@ -226,6 +226,7 @@ static DEFINE_PER_CPU(u64, gator_buffer_commit_time);
 	GATOR_EVENT(gator_events_perf_pmu_init) \
 	GATOR_EVENT(gator_events_sched_init) \
 	GATOR_EVENT(gator_events_scorpion_init) \
+	GATOR_EVENT(gator_events_threads_init) \
 
 #define GATOR_EVENT(EVENT_INIT) __weak int EVENT_INIT(void);
 GATOR_EVENTS_LIST
@@ -570,25 +571,37 @@ static void gator_timer_stop(void)
 	}
 }
 
-#if defined(__arm__) || defined(__aarch64__)
-static void gator_send_core_name(int cpu, const u32 cpuid, const struct gator_cpu *const gator_cpu)
+static void gator_send_core_name(const int cpu, const u32 cpuid)
 {
-	const char *core_name = NULL;
-	char core_name_buf[32];
+#if defined(__arm__) || defined(__aarch64__)
+	if (!sent_core_name[cpu] || (cpuid != gator_cpuids[cpu])) {
+		const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(cpuid);
+		const char *core_name = NULL;
+		char core_name_buf[32];
 
-	if (!sent_core_name[cpu]) {
+		// Save off this cpuid
+		gator_cpuids[cpu] = cpuid;
 		if (gator_cpu != NULL) {
 			core_name = gator_cpu->core_name;
 		} else {
-			snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
+			if (cpuid == -1) {
+				snprintf(core_name_buf, sizeof(core_name_buf), "Unknown");
+			} else {
+				snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
+			}
 			core_name = core_name_buf;
 		}
 
 		marshal_core_name(cpu, cpuid, core_name);
 		sent_core_name[cpu] = true;
 	}
-}
 #endif
+}
+
+static void gator_read_cpuid(void * arg)
+{
+	gator_cpuids[get_physical_cpu()] = gator_cpuid();
+}
 
 // This function runs in interrupt context and on the appropriate core
 static void gator_timer_online(void *migrate)
@@ -598,6 +611,9 @@ static void gator_timer_online(void *migrate)
 	int *buffer;
 	u64 time;
 
+	// Send what is currently running on this core
+	marshal_sched_trace_switch(current->pid, 0);
+
 	gator_trace_power_online();
 
 	// online any events and output counters
@@ -617,12 +633,7 @@ static void gator_timer_online(void *migrate)
 		gator_hrtimer_online();
 	}
 
-#if defined(__arm__) || defined(__aarch64__)
-	if (!sent_core_name[cpu]) {
-		const u32 cpuid = gator_cpuid();
-		gator_send_core_name(cpu, cpuid, gator_find_cpu_by_cpuid(cpuid));
-	}
-#endif
+	gator_send_core_name(cpu, gator_cpuid());
 }
 
 // This function runs in interrupt context and may be running on a core other than core 'cpu'
@@ -658,6 +669,13 @@ static int gator_timer_start(unsigned long sample_rate)
 	if (gator_hrtimer_init(sample_rate, gator_timer_interrupt) == -1)
 		return -1;
 
+	// Send off the previously saved cpuids
+	for_each_present_cpu(cpu) {
+		preempt_disable();
+		gator_send_core_name(cpu, gator_cpuids[cpu]);
+		preempt_enable();
+	}
+
 	gator_send_iks_core_names();
 	for_each_online_cpu(cpu) {
 		gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false);
@@ -1009,12 +1027,12 @@ static int gator_op_setup(void)
 	gator_buffer_size[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE;
 	gator_buffer_mask[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE - 1;
 
-	gator_buffer_size[GPU_TRACE_BUF] = GPU_TRACE_BUFFER_SIZE;
-	gator_buffer_mask[GPU_TRACE_BUF] = GPU_TRACE_BUFFER_SIZE - 1;
-
 	gator_buffer_size[IDLE_BUF] = IDLE_BUFFER_SIZE;
 	gator_buffer_mask[IDLE_BUF] = IDLE_BUFFER_SIZE - 1;
 
+	gator_buffer_size[ACTIVITY_BUF] = ACTIVITY_BUFFER_SIZE;
+	gator_buffer_mask[ACTIVITY_BUF] = ACTIVITY_BUFFER_SIZE - 1;
+
 	// Initialize percpu per buffer variables
 	for (i = 0; i < NUM_GATOR_BUFS; i++) {
 		// Verify buffers are a power of 2
@@ -1349,8 +1367,62 @@ static void gator_op_create_files(struct super_block *sb, struct dentry *root)
 /******************************************************************************
  * Module
  ******************************************************************************/
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
+
+#define GATOR_TRACEPOINTS \
+	GATOR_HANDLE_TRACEPOINT(block_rq_complete); \
+	GATOR_HANDLE_TRACEPOINT(cpu_frequency); \
+	GATOR_HANDLE_TRACEPOINT(cpu_idle); \
+	GATOR_HANDLE_TRACEPOINT(cpu_migrate_begin); \
+	GATOR_HANDLE_TRACEPOINT(cpu_migrate_current); \
+	GATOR_HANDLE_TRACEPOINT(cpu_migrate_finish); \
+	GATOR_HANDLE_TRACEPOINT(irq_handler_exit); \
+	GATOR_HANDLE_TRACEPOINT(mali_hw_counter); \
+	GATOR_HANDLE_TRACEPOINT(mali_job_slots_event); \
+	GATOR_HANDLE_TRACEPOINT(mali_mmu_as_in_use); \
+	GATOR_HANDLE_TRACEPOINT(mali_mmu_as_released); \
+	GATOR_HANDLE_TRACEPOINT(mali_page_fault_insert_pages); \
+	GATOR_HANDLE_TRACEPOINT(mali_pm_status); \
+	GATOR_HANDLE_TRACEPOINT(mali_sw_counter); \
+	GATOR_HANDLE_TRACEPOINT(mali_sw_counters); \
+	GATOR_HANDLE_TRACEPOINT(mali_timeline_event); \
+	GATOR_HANDLE_TRACEPOINT(mali_total_alloc_pages_change); \
+	GATOR_HANDLE_TRACEPOINT(mm_page_alloc); \
+	GATOR_HANDLE_TRACEPOINT(mm_page_free); \
+	GATOR_HANDLE_TRACEPOINT(mm_page_free_batched); \
+	GATOR_HANDLE_TRACEPOINT(sched_process_exec); \
+	GATOR_HANDLE_TRACEPOINT(sched_process_fork); \
+	GATOR_HANDLE_TRACEPOINT(sched_process_free); \
+	GATOR_HANDLE_TRACEPOINT(sched_switch); \
+	GATOR_HANDLE_TRACEPOINT(softirq_exit); \
+
+#define GATOR_HANDLE_TRACEPOINT(probe_name) \
+	struct tracepoint *gator_tracepoint_##probe_name
+GATOR_TRACEPOINTS;
+#undef GATOR_HANDLE_TRACEPOINT
+
+static void gator_fct(struct tracepoint *tp, void *priv)
+{
+#define GATOR_HANDLE_TRACEPOINT(probe_name) \
+	if (strcmp(tp->name, #probe_name) == 0) { \
+		gator_tracepoint_##probe_name = tp; \
+		return; \
+	}
+GATOR_TRACEPOINTS;
+#undef GATOR_HANDLE_TRACEPOINT
+}
+
+#else
+
+#define for_each_kernel_tracepoint(fct, priv)
+
+#endif
+
 static int __init gator_module_init(void)
 {
+	for_each_kernel_tracepoint(gator_fct, NULL);
+
 	if (gatorfs_register()) {
 		return -1;
 	}
@@ -1362,6 +1434,10 @@ static int __init gator_module_init(void)
 
 	setup_timer(&gator_buffer_wake_up_timer, gator_buffer_wake_up, 0);
 
+	// Initialize the list of cpuids
+	memset(gator_cpuids, -1, sizeof(gator_cpuids));
+	on_each_cpu(gator_read_cpuid, NULL, 1);
+
 	return 0;
 }
 
diff --git a/drivers/gator/gator_marshaling.c b/drivers/gator/gator_marshaling.c
index fd413ad1331c..97b4ae6f9d4d 100644
--- a/drivers/gator/gator_marshaling.c
+++ b/drivers/gator/gator_marshaling.c
@@ -231,75 +231,28 @@ static void marshal_event_single(int core, int key, int value)
 	// Check and commit; commit is set to occur once buffer is 3/4 full
 	buffer_check(cpu, COUNTER_BUF, time);
 }
+
+static void marshal_event_single64(int core, int key, long long value)
+{
+	unsigned long flags, cpu;
+	u64 time;
+
+	local_irq_save(flags);
+	cpu = get_physical_cpu();
+	time = gator_get_time();
+	if (buffer_check_space(cpu, COUNTER_BUF, 2 * MAXSIZE_PACK64 + 2 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int64(cpu, COUNTER_BUF, time);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, core);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, key);
+		gator_buffer_write_packed_int64(cpu, COUNTER_BUF, value);
+	}
+	local_irq_restore(flags);
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, COUNTER_BUF, time);
+}
 #endif
 
-static void marshal_sched_gpu_start(int unit, int core, int tgid, int pid)
-{
-	unsigned long cpu = get_physical_cpu(), flags;
-	u64 time;
-
-	if (!per_cpu(gator_buffer, cpu)[GPU_TRACE_BUF])
-		return;
-
-	local_irq_save(flags);
-	time = gator_get_time();
-	if (buffer_check_space(cpu, GPU_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, MESSAGE_GPU_START);
-		gator_buffer_write_packed_int64(cpu, GPU_TRACE_BUF, time);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, unit);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, core);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, tgid);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, pid);
-	}
-	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
-	buffer_check(cpu, GPU_TRACE_BUF, time);
-}
-
-static void marshal_sched_gpu_stop(int unit, int core)
-{
-	unsigned long cpu = get_physical_cpu(), flags;
-	u64 time;
-
-	if (!per_cpu(gator_buffer, cpu)[GPU_TRACE_BUF])
-		return;
-
-	local_irq_save(flags);
-	time = gator_get_time();
-	if (buffer_check_space(cpu, GPU_TRACE_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) {
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, MESSAGE_GPU_STOP);
-		gator_buffer_write_packed_int64(cpu, GPU_TRACE_BUF, time);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, unit);
-		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, core);
-	}
-	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
-	buffer_check(cpu, GPU_TRACE_BUF, time);
-}
-
-static void marshal_sched_trace_start(int tgid, int pid, int cookie)
-{
-	unsigned long cpu = get_physical_cpu(), flags;
-	u64 time;
-
-	if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF])
-		return;
-
-	local_irq_save(flags);
-	time = gator_get_time();
-	if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_START);
-		gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, tgid);
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, cookie);
-	}
-	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
-	buffer_check(cpu, SCHED_TRACE_BUF, time);
-}
-
-static void marshal_sched_trace_switch(int tgid, int pid, int cookie, int state)
+static void marshal_sched_trace_switch(int pid, int state)
 {
 	unsigned long cpu = get_physical_cpu(), flags;
 	u64 time;
@@ -312,9 +265,7 @@ static void marshal_sched_trace_switch(int tgid, int pid, int cookie, int state)
 	if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
 		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_SWITCH);
 		gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, tgid);
 		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
-		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, cookie);
 		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, state);
 	}
 	local_irq_restore(flags);
@@ -379,3 +330,33 @@ static void marshal_core_name(const int core, const int cpuid, const char *name)
 	gator_commit_buffer(cpu, SUMMARY_BUF, gator_get_time());
 }
 #endif
+
+static void marshal_activity_switch(int core, int key, int activity, int pid, int state)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[ACTIVITY_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, ACTIVITY_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, MESSAGE_SWITCH);
+		gator_buffer_write_packed_int64(cpu, ACTIVITY_BUF, time);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, core);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, key);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, activity);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, pid);
+		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, state);
+	}
+	local_irq_restore(flags);
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, ACTIVITY_BUF, time);
+}
+
+void gator_marshal_activity_switch(int core, int key, int activity, int pid)
+{
+	// state is reserved for cpu use only
+	marshal_activity_switch(core, key, activity, pid, 0);
+}
diff --git a/drivers/gator/gator_trace_gpu.c b/drivers/gator/gator_trace_gpu.c
index 6332098e5958..a8b9e7d61ece 100644
--- a/drivers/gator/gator_trace_gpu.c
+++ b/drivers/gator/gator_trace_gpu.c
@@ -23,8 +23,6 @@
 #endif
 #endif
 
-#include "gator_trace_gpu.h"
-
 /*
  * Taken from MALI_PROFILING_EVENT_TYPE_* items in Mali DDK.
  */
@@ -37,7 +35,6 @@
 /* Note whether tracepoints have been registered */
 static int mali_timeline_trace_registered;
 static int mali_job_slots_trace_registered;
-static int gpu_trace_registered;
 
 enum {
 	GPU_UNIT_NONE = 0,
@@ -47,19 +44,19 @@ enum {
 	NUMBER_OF_GPU_UNITS
 };
 
-#define MALI_4xx     (0x0b07)
-#define MALI_T6xx    (0x0056)
+#if defined(MALI_SUPPORT)
 
-struct mali_gpu_job {
+struct mali_activity {
+	int core;
+	int key;
 	int count;
-	int last_tgid;
+	int last_activity;
 	int last_pid;
-	int last_job_id;
 };
 
 #define NUMBER_OF_GPU_CORES 16
-static struct mali_gpu_job mali_gpu_jobs[NUMBER_OF_GPU_UNITS][NUMBER_OF_GPU_CORES];
-static DEFINE_SPINLOCK(mali_gpu_jobs_lock);
+static struct mali_activity mali_activities[NUMBER_OF_GPU_UNITS*NUMBER_OF_GPU_CORES];
+static DEFINE_SPINLOCK(mali_activities_lock);
 
 /* Only one event should be running on a unit and core at a time (ie, a start
  * event can only be followed by a stop and vice versa), but because the kernel
@@ -67,53 +64,97 @@ static DEFINE_SPINLOCK(mali_gpu_jobs_lock);
  * start1, start2, stop1, stop2. Change it back into start1, stop1, start2,
  * stop2 by queueing up start2 and releasing it when stop1 is received.
  */
-static void mali_gpu_enqueue(int unit, int core, int tgid, int pid, int job_id)
+
+static int mali_activity_index(int core, int key)
 {
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mali_activities); ++i) {
+		if ((mali_activities[i].core == core) && (mali_activities[i].key == key)) {
+			break;
+		}
+		if ((mali_activities[i].core == 0) && (mali_activities[i].key == 0)) {
+			mali_activities[i].core = core;
+			mali_activities[i].key = key;
+			break;
+		}
+	}
+	BUG_ON(i >= ARRAY_SIZE(mali_activities));
+
+	return i;
+}
+
+static void mali_activity_enqueue(int core, int key, int activity, int pid)
+{
+	int i;
 	int count;
 
-	spin_lock(&mali_gpu_jobs_lock);
-	count = mali_gpu_jobs[unit][core].count;
+	spin_lock(&mali_activities_lock);
+	i = mali_activity_index(core, key);
+
+	count = mali_activities[i].count;
 	BUG_ON(count < 0);
-	++mali_gpu_jobs[unit][core].count;
+	++mali_activities[i].count;
 	if (count) {
-		mali_gpu_jobs[unit][core].last_tgid = tgid;
-		mali_gpu_jobs[unit][core].last_pid = pid;
-		mali_gpu_jobs[unit][core].last_job_id = job_id;
+		mali_activities[i].last_activity = activity;
+		mali_activities[i].last_pid = pid;
 	}
-	spin_unlock(&mali_gpu_jobs_lock);
+	spin_unlock(&mali_activities_lock);
 
 	if (!count) {
-		marshal_sched_gpu_start(unit, core, tgid, pid/*, job_id*/);
+		gator_marshal_activity_switch(core, key, activity, pid);
 	}
 }
 
-static void mali_gpu_stop(int unit, int core)
+static void mali_activity_stop(int core, int key)
 {
+	int i;
 	int count;
-	int last_tgid = 0;
+	int last_activity = 0;
 	int last_pid = 0;
-	//int last_job_id = 0;
 
-	spin_lock(&mali_gpu_jobs_lock);
-	if (mali_gpu_jobs[unit][core].count == 0) {
-		spin_unlock(&mali_gpu_jobs_lock);
+	spin_lock(&mali_activities_lock);
+	i = mali_activity_index(core, key);
+
+	if (mali_activities[i].count == 0) {
+		spin_unlock(&mali_activities_lock);
 		return;
 	}
-	--mali_gpu_jobs[unit][core].count;
-	count = mali_gpu_jobs[unit][core].count;
+	--mali_activities[i].count;
+	count = mali_activities[i].count;
 	if (count) {
-		last_tgid = mali_gpu_jobs[unit][core].last_tgid;
-		last_pid = mali_gpu_jobs[unit][core].last_pid;
-		//last_job_id = mali_gpu_jobs[unit][core].last_job_id;
+		last_activity = mali_activities[i].last_activity;
+		last_pid = mali_activities[i].last_pid;
 	}
-	spin_unlock(&mali_gpu_jobs_lock);
+	spin_unlock(&mali_activities_lock);
 
-	marshal_sched_gpu_stop(unit, core);
+	gator_marshal_activity_switch(core, key, 0, 0);
 	if (count) {
-		marshal_sched_gpu_start(unit, core, last_tgid, last_pid/*, last_job_id*/);
+		gator_marshal_activity_switch(core, key, last_activity, last_pid);
 	}
 }
 
+void mali_activity_clear(mali_counter mali_activity[], size_t mali_activity_size)
+{
+	int activity;
+	int cores;
+	int core;
+
+	for (activity = 0; activity < mali_activity_size; ++activity) {
+		cores = mali_activity[activity].cores;
+		if (cores < 0) {
+			cores = 1;
+		}
+		for (core = 0; core < cores; ++core) {
+			if (mali_activity[activity].enabled) {
+				gator_marshal_activity_switch(core, mali_activity[activity].key, 0, 0);
+			}
+		}
+	}
+}
+
+#endif
+
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
 #include "gator_events_mali_4xx.h"
 
@@ -142,6 +183,8 @@ enum {
 	EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE = 1,
 };
 
+mali_counter mali_activity[2];
+
 GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned int d2, unsigned int d3, unsigned int d4))
 {
 	unsigned int component, state;
@@ -154,18 +197,26 @@ GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned
 	case EVENT_TYPE_START:
 		if (component == EVENT_CHANNEL_VP0) {
 			/* tgid = d0; pid = d1; */
-			mali_gpu_enqueue(GPU_UNIT_VP, 0, d0, d1, 0);
+			if (mali_activity[1].enabled) {
+				mali_activity_enqueue(0, mali_activity[1].key, 1, d1);
+			}
 		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
 			/* tgid = d0; pid = d1; */
-			mali_gpu_enqueue(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0, d0, d1, 0);
+			if (mali_activity[0].enabled) {
+				mali_activity_enqueue(component - EVENT_CHANNEL_FP0, mali_activity[0].key, 1, d1);
+			}
 		}
 		break;
 
 	case EVENT_TYPE_STOP:
 		if (component == EVENT_CHANNEL_VP0) {
-			mali_gpu_stop(GPU_UNIT_VP, 0);
+			if (mali_activity[1].enabled) {
+				mali_activity_stop(0, mali_activity[1].key);
+			}
 		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
-			mali_gpu_stop(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0);
+			if (mali_activity[0].enabled) {
+				mali_activity_stop(component - EVENT_CHANNEL_FP0, mali_activity[0].key);
+			}
 		}
 		break;
 
@@ -186,6 +237,9 @@ GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned
 #endif
 
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+
+mali_counter mali_activity[3];
+
 #if defined(MALI_JOB_SLOTS_EVENT_CHANGED)
 GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, unsigned char job_id))
 #else
@@ -217,31 +271,21 @@ GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigne
 	if (unit != GPU_UNIT_NONE) {
 		switch (state) {
 		case EVENT_TYPE_START:
-			mali_gpu_enqueue(unit, 0, tgid, (pid != 0 ? pid : tgid), job_id);
+			if (mali_activity[component].enabled) {
+				mali_activity_enqueue(0, mali_activity[component].key, 1, (pid != 0 ? pid : tgid));
+			}
 			break;
 		case EVENT_TYPE_STOP:
-			mali_gpu_stop(unit, 0);
+		default: // Some jobs can be soft-stopped, so ensure that this terminates the activity trace.
+			if (mali_activity[component].enabled) {
+				mali_activity_stop(0, mali_activity[component].key);
+			}
 			break;
-		default:
-			/*
-			 * Some jobs can be soft-stopped, so ensure that this terminates the activity trace.
-			 */
-			mali_gpu_stop(unit, 0);
 		}
 	}
 }
 #endif
 
-GATOR_DEFINE_PROBE(gpu_activity_start, TP_PROTO(int gpu_unit, int gpu_core, struct task_struct *p))
-{
-	mali_gpu_enqueue(gpu_unit, gpu_core, (int)p->tgid, (int)p->pid, 0);
-}
-
-GATOR_DEFINE_PROBE(gpu_activity_stop, TP_PROTO(int gpu_unit, int gpu_core))
-{
-	mali_gpu_stop(gpu_unit, gpu_core);
-}
-
 static int gator_trace_gpu_start(void)
 {
 	/*
@@ -249,32 +293,25 @@ static int gator_trace_gpu_start(void)
 	 * Absence of gpu trace points is not an error
 	 */
 
-	memset(&mali_gpu_jobs, 0, sizeof(mali_gpu_jobs));
-	gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
+#if defined(MALI_SUPPORT)
+	memset(&mali_activities, 0, sizeof(mali_activities));
+#endif
+	mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
 
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
+	mali_activity_clear(mali_activity, ARRAY_SIZE(mali_activity));
 	if (!GATOR_REGISTER_TRACE(mali_timeline_event)) {
 		mali_timeline_trace_registered = 1;
 	}
 #endif
 
 #if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+	mali_activity_clear(mali_activity, ARRAY_SIZE(mali_activity));
 	if (!GATOR_REGISTER_TRACE(mali_job_slots_event)) {
 		mali_job_slots_trace_registered = 1;
 	}
 #endif
 
-	if (!mali_timeline_trace_registered) {
-		if (GATOR_REGISTER_TRACE(gpu_activity_start)) {
-			return 0;
-		}
-		if (GATOR_REGISTER_TRACE(gpu_activity_stop)) {
-			GATOR_UNREGISTER_TRACE(gpu_activity_start);
-			return 0;
-		}
-		gpu_trace_registered = 1;
-	}
-
 	return 0;
 }
 
@@ -292,10 +329,5 @@ static void gator_trace_gpu_stop(void)
 	}
 #endif
 
-	if (gpu_trace_registered) {
-		GATOR_UNREGISTER_TRACE(gpu_activity_stop);
-		GATOR_UNREGISTER_TRACE(gpu_activity_start);
-	}
-
-	gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
+	mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
 }
diff --git a/drivers/gator/gator_trace_gpu.h b/drivers/gator/gator_trace_gpu.h
deleted file mode 100644
index 5113d459e24c..000000000000
--- a/drivers/gator/gator_trace_gpu.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * Copyright (C) ARM Limited 2010-2014. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#undef TRACE_GPU
-#define TRACE_GPU gpu
-
-#if !defined(_TRACE_GPU_H)
-#define _TRACE_GPU_H
-
-#include <linux/tracepoint.h>
-
-/*
- * UNIT - the GPU processor type
- *  1 = Vertex Processor
- *  2 = Fragment Processor
- *
- * CORE - the GPU processor core number
- *  this is not the CPU core number
- */
-
-/*
- * Tracepoint for calling GPU unit start activity on core
- */
-TRACE_EVENT(gpu_activity_start,
-
-	    TP_PROTO(int gpu_unit, int gpu_core, struct task_struct *p),
-
-	    TP_ARGS(gpu_unit, gpu_core, p),
-
-	    TP_STRUCT__entry(
-			     __field(int, gpu_unit)
-			     __field(int, gpu_core)
-			     __array(char, comm, TASK_COMM_LEN)
-			     __field(pid_t, pid)
-	    ),
-
-	    TP_fast_assign(
-			   __entry->gpu_unit = gpu_unit;
-			   __entry->gpu_core = gpu_core;
-			   memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-			   __entry->pid = p->pid;
-	    ),
-
-	    TP_printk("unit=%d core=%d comm=%s pid=%d",
-		      __entry->gpu_unit, __entry->gpu_core, __entry->comm,
-		      __entry->pid)
-    );
-
-/*
- * Tracepoint for calling GPU unit stop activity on core
- */
-TRACE_EVENT(gpu_activity_stop,
-
-	    TP_PROTO(int gpu_unit, int gpu_core),
-
-	    TP_ARGS(gpu_unit, gpu_core),
-
-	    TP_STRUCT__entry(
-			     __field(int, gpu_unit)
-			     __field(int, gpu_core)
-	    ),
-
-	    TP_fast_assign(
-			   __entry->gpu_unit = gpu_unit;
-			   __entry->gpu_core = gpu_core;
-	    ),
-
-	    TP_printk("unit=%d core=%d", __entry->gpu_unit, __entry->gpu_core)
-    );
-
-#endif /* _TRACE_GPU_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/drivers/gator/gator_trace_power.c b/drivers/gator/gator_trace_power.c
index 1895bb988c9f..f2754b1c2b56 100644
--- a/drivers/gator/gator_trace_power.c
+++ b/drivers/gator/gator_trace_power.c
@@ -75,7 +75,7 @@ static int gator_trace_power_create_files(struct super_block *sb, struct dentry
 GATOR_DEFINE_PROBE(cpu_frequency, TP_PROTO(unsigned int frequency, unsigned int cpu))
 {
 	cpu = lcpu_to_pcpu(cpu);
-	marshal_event_single(cpu, power_cpu_key[POWER_CPU_FREQ], frequency * 1000);
+	marshal_event_single64(cpu, power_cpu_key[POWER_CPU_FREQ], frequency * 1000L);
 }
 
 GATOR_DEFINE_PROBE(cpu_idle, TP_PROTO(unsigned int state, unsigned int cpu))
@@ -109,7 +109,7 @@ static void gator_trace_power_online(void)
 	int pcpu = get_physical_cpu();
 	int lcpu = get_logical_cpu();
 	if (power_cpu_enabled[POWER_CPU_FREQ]) {
-		marshal_event_single(pcpu, power_cpu_key[POWER_CPU_FREQ], cpufreq_quick_get(lcpu) * 1000);
+		marshal_event_single64(pcpu, power_cpu_key[POWER_CPU_FREQ], cpufreq_quick_get(lcpu) * 1000L);
 	}
 }
 
diff --git a/drivers/gator/gator_trace_sched.c b/drivers/gator/gator_trace_sched.c
index 52990e9d4811..655008628933 100644
--- a/drivers/gator/gator_trace_sched.c
+++ b/drivers/gator/gator_trace_sched.c
@@ -114,7 +114,7 @@ static void collect_counters(u64 time, struct task_struct *task)
 
 		// Commit buffers on timeout
 		if (gator_live_rate > 0 && time >= per_cpu(gator_buffer_commit_time, cpu)) {
-			static const int buftypes[] = { NAME_BUF, COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF };
+			static const int buftypes[] = { NAME_BUF, COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF, ACTIVITY_BUF };
 			int i;
 
 			for (i = 0; i < ARRAY_SIZE(buftypes); ++i) {
@@ -137,35 +137,44 @@ static void collect_counters(u64 time, struct task_struct *task)
 // special case used during a suspend of the system
 static void trace_sched_insert_idle(void)
 {
-	marshal_sched_trace_switch(0, 0, 0, 0);
+	marshal_sched_trace_switch(0, 0);
 }
 
-GATOR_DEFINE_PROBE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child))
+static void gator_trace_emit_link(struct task_struct *p)
 {
 	int cookie;
 	int cpu = get_physical_cpu();
 
-	cookie = get_exec_cookie(cpu, child);
-	emit_pid_name(child);
+	cookie = get_exec_cookie(cpu, p);
+	emit_pid_name(p);
 
-	marshal_sched_trace_start(child->tgid, child->pid, cookie);
+	marshal_link(cookie, p->tgid, p->pid);
 }
 
+GATOR_DEFINE_PROBE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child))
+{
+	gator_trace_emit_link(child);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+GATOR_DEFINE_PROBE(sched_process_exec, TP_PROTO(struct task_struct *p, pid_t old_pid, struct linux_binprm *bprm))
+{
+	gator_trace_emit_link(p);
+}
+#endif
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
 GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
 #else
 GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
 #endif
 {
-	int cookie;
 	int state;
 	int cpu = get_physical_cpu();
 
 	per_cpu(in_scheduler_context, cpu) = true;
 
 	// do as much work as possible before disabling interrupts
-	cookie = get_exec_cookie(cpu, next);
-	emit_pid_name(next);
 	if (prev->state == TASK_RUNNING) {
 		state = STATE_CONTENTION;
 	} else if (prev->in_iowait) {
@@ -178,7 +187,10 @@ GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_
 	collect_counters(gator_get_time(), prev);
 	per_cpu(collecting, cpu) = 0;
 
-	marshal_sched_trace_switch(next->tgid, next->pid, cookie, state);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
+	gator_trace_emit_link(next);
+#endif
+	marshal_sched_trace_switch(next->pid, state);
 
 	per_cpu(in_scheduler_context, cpu) = false;
 }
@@ -199,6 +211,10 @@ static int register_scheduler_tracepoints(void)
 	// register tracepoints
 	if (GATOR_REGISTER_TRACE(sched_process_fork))
 		goto fail_sched_process_fork;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	if (GATOR_REGISTER_TRACE(sched_process_exec))
+		goto fail_sched_process_exec;
+#endif
 	if (GATOR_REGISTER_TRACE(sched_switch))
 		goto fail_sched_switch;
 	if (GATOR_REGISTER_TRACE(sched_process_free))
@@ -216,15 +232,42 @@ static int register_scheduler_tracepoints(void)
 	GATOR_UNREGISTER_TRACE(sched_switch);
 fail_sched_switch:
 	GATOR_UNREGISTER_TRACE(sched_process_fork);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+fail_sched_process_exec:
+	GATOR_UNREGISTER_TRACE(sched_process_exec);
+#endif
 fail_sched_process_fork:
 	pr_err("gator: tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
 
 	return -1;
 }
 
+static void unregister_scheduler_tracepoints(void)
+{
+	GATOR_UNREGISTER_TRACE(sched_process_fork);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	GATOR_UNREGISTER_TRACE(sched_process_exec);
+#endif
+	GATOR_UNREGISTER_TRACE(sched_switch);
+	GATOR_UNREGISTER_TRACE(sched_process_free);
+	pr_debug("gator: unregistered tracepoints\n");
+}
+
+static void gator_trace_sched_stop(void)
+{
+	int cpu;
+
+	unregister_scheduler_tracepoints();
+
+	for_each_present_cpu(cpu) {
+		kfree(per_cpu(taskname_keys, cpu));
+	}
+}
+
 static int gator_trace_sched_start(void)
 {
 	int cpu, size;
+	int ret;
 
 	for_each_present_cpu(cpu) {
 		size = TASK_MAP_ENTRIES * TASK_MAX_COLLISIONS * sizeof(uint64_t);
@@ -234,7 +277,9 @@ static int gator_trace_sched_start(void)
 		memset(per_cpu(taskname_keys, cpu), 0, size);
 	}
 
-	return register_scheduler_tracepoints();
+	ret = register_scheduler_tracepoints();
+
+	return ret;
 }
 
 static void gator_trace_sched_offline(void)
@@ -242,24 +287,6 @@ static void gator_trace_sched_offline(void)
 	trace_sched_insert_idle();
 }
 
-static void unregister_scheduler_tracepoints(void)
-{
-	GATOR_UNREGISTER_TRACE(sched_process_fork);
-	GATOR_UNREGISTER_TRACE(sched_switch);
-	GATOR_UNREGISTER_TRACE(sched_process_free);
-	pr_debug("gator: unregistered tracepoints\n");
-}
-
-static void gator_trace_sched_stop(void)
-{
-	int cpu;
-	unregister_scheduler_tracepoints();
-
-	for_each_present_cpu(cpu) {
-		kfree(per_cpu(taskname_keys, cpu));
-	}
-}
-
 static void gator_trace_sched_init(void)
 {
 	int i;
diff --git a/drivers/gator/mali/mali_dd_gator_api.h b/drivers/gator/mali/mali_dd_gator_api.h
new file mode 100644
index 000000000000..104b34f2d72a
--- /dev/null
+++ b/drivers/gator/mali/mali_dd_gator_api.h
@@ -0,0 +1,40 @@
+#if !defined(MALI_DDK_GATOR_API_VERSION)
+	#define MALI_DDK_GATOR_API_VERSION 3
+#endif
+#if !defined(MALI_TRUE)
+	#define MALI_TRUE                ((unsigned int)1)
+#endif
+
+#if !defined(MALI_FALSE)
+	#define MALI_FALSE               ((unsigned int)0)
+#endif
+
+struct mali_dd_hwcnt_info {
+
+	/* Passed from Gator to kbase */
+	//u32 in_mali_dd_hwcnt_version;
+	unsigned short int bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	unsigned int size;
+
+	unsigned int gpu_id;
+
+	unsigned int nr_cores;
+
+	unsigned int nr_core_groups;
+
+	/* The cached present bitmaps - these are the same as the corresponding hardware registers*/
+	unsigned long int shader_present_bitmap;
+};
+
+struct mali_dd_hwcnt_handles;
+extern struct mali_dd_hwcnt_handles* mali_dd_hwcnt_init(struct mali_dd_hwcnt_info *in_out_info);
+extern void mali_dd_hwcnt_clear(struct mali_dd_hwcnt_info *in_out_info, struct mali_dd_hwcnt_handles *opaque_handles);
+extern unsigned int kbase_dd_instr_hwcnt_dump_complete(struct mali_dd_hwcnt_handles *opaque_handles, unsigned int * const success);
+extern unsigned int kbase_dd_instr_hwcnt_dump_irq(struct mali_dd_hwcnt_handles *opaque_handles);
diff --git a/drivers/gator/mali_t6xx.mk b/drivers/gator/mali_t6xx.mk
index 059d47aec910..fa7571ded17b 100644
--- a/drivers/gator/mali_t6xx.mk
+++ b/drivers/gator/mali_t6xx.mk
@@ -21,6 +21,10 @@ OSK_DIR = $(DDK_DIR)/drivers/gpu/arm/midgard/osk
 EXTRA_CFLAGS += -DMALI_DIR_MIDGARD=1
 endif
 
+ifneq ($(wildcard $(DDK_DIR)/drivers/gpu/arm/midgard/mali_dd_gator_api.h),)
+EXTRA_CFLAGS += -DMALI_SIMPLE_API=1
+endif
+
 UMP_DIR = $(DDK_DIR)/include/linux
 
 # Include directories in the DDK
diff --git a/tools/gator/daemon/Android.mk b/tools/gator/daemon/Android.mk
index 045d028fda5f..44c069cc7e24 100644
--- a/tools/gator/daemon/Android.mk
+++ b/tools/gator/daemon/Android.mk
@@ -3,7 +3,7 @@ include $(CLEAR_VARS)
 
 XML_H := $(shell cd $(LOCAL_PATH) && make events_xml.h defaults_xml.h)
 
-LOCAL_CFLAGS += -Wall -O3 -mthumb-interwork -fno-exceptions -DETCDIR=\"/etc\" -Ilibsensors
+LOCAL_CFLAGS += -Wall -O3 -mthumb-interwork -fno-exceptions -pthread -DETCDIR=\"/etc\" -Ilibsensors
 
 LOCAL_SRC_FILES := \
 	Buffer.cpp \
@@ -15,12 +15,14 @@ LOCAL_SRC_FILES := \
 	DynBuf.cpp \
 	EventsXML.cpp \
 	ExternalSource.cpp \
+	FSDriver.cpp \
 	Fifo.cpp \
 	Hwmon.cpp \
 	KMod.cpp \
 	LocalCapture.cpp \
 	Logging.cpp \
 	main.cpp \
+	MaliVideoDriver.cpp \
 	Monitor.cpp \
 	OlySocket.cpp \
 	OlyUtility.cpp \
@@ -55,7 +57,7 @@ LOCAL_SRC_FILES := \
 	mxml/mxml-set.c \
 	mxml/mxml-string.c
 
-LOCAL_C_INCLUDES := $(LOCAL_PATH) 
+LOCAL_C_INCLUDES := $(LOCAL_PATH)
 
 LOCAL_MODULE := gatord
 LOCAL_MODULE_TAGS := optional
diff --git a/tools/gator/daemon/Application.mk b/tools/gator/daemon/Application.mk
new file mode 100644
index 000000000000..631ba54148d1
--- /dev/null
+++ b/tools/gator/daemon/Application.mk
@@ -0,0 +1 @@
+APP_PLATFORM := android-8
diff --git a/tools/gator/daemon/Buffer.cpp b/tools/gator/daemon/Buffer.cpp
index 93557dabed9f..dd19f7f8be76 100644
--- a/tools/gator/daemon/Buffer.cpp
+++ b/tools/gator/daemon/Buffer.cpp
@@ -15,11 +15,12 @@
 #define mask (mSize - 1)
 
 enum {
-	CODE_PEA    = 1,
-	CODE_KEYS   = 2,
-	CODE_FORMAT = 3,
-	CODE_MAPS   = 4,
-	CODE_COMM   = 5,
+	CODE_PEA      = 1,
+	CODE_KEYS     = 2,
+	CODE_FORMAT   = 3,
+	CODE_MAPS     = 4,
+	CODE_COMM     = 5,
+	CODE_KEYS_OLD = 6,
 };
 
 // Summary Frame Messages
@@ -167,7 +168,7 @@ void Buffer::check(const uint64_t time) {
 	}
 }
 
-void Buffer::packInt(int32_t x) {
+void Buffer::packInt(char *const buf, const int size, int &writePos, int32_t x) {
 	int packedBytes = 0;
 	int more = true;
 	while (more) {
@@ -181,11 +182,15 @@ void Buffer::packInt(int32_t x) {
 			b |= 0x80;
 		}
 
-		mBuf[(mWritePos + packedBytes) & mask] = b;
+		buf[(writePos + packedBytes) & /*mask*/(size - 1)] = b;
 		packedBytes++;
 	}
 
-	mWritePos = (mWritePos + packedBytes) & mask;
+	writePos = (writePos + packedBytes) & /*mask*/(size - 1);
+}
+
+void Buffer::packInt(int32_t x) {
+	packInt(mBuf, mSize, mWritePos, x);
 }
 
 void Buffer::packInt64(int64_t x) {
@@ -320,6 +325,21 @@ void Buffer::keys(const int count, const __u64 *const ids, const int *const keys
 	check(1);
 }
 
+void Buffer::keysOld(const int keyCount, const int *const keys, const int bytes, const char *const buf) {
+	if (checkSpace((2 + keyCount) * MAXSIZE_PACK32 + bytes)) {
+		packInt(CODE_KEYS_OLD);
+		packInt(keyCount);
+		for (int i = 0; i < keyCount; ++i) {
+			packInt(keys[i]);
+		}
+		writeBytes(buf, bytes);
+	} else {
+		logg->logError(__FILE__, __LINE__, "Ran out of buffer space for perf attrs");
+		handleException();
+	}
+	check(1);
+}
+
 void Buffer::format(const int length, const char *const format) {
 	if (checkSpace(MAXSIZE_PACK32 + length + 1)) {
 		packInt(CODE_FORMAT);
diff --git a/tools/gator/daemon/Buffer.h b/tools/gator/daemon/Buffer.h
index 50237771860c..2de1b97ac091 100644
--- a/tools/gator/daemon/Buffer.h
+++ b/tools/gator/daemon/Buffer.h
@@ -54,6 +54,7 @@ class Buffer {
 	// Perf Attrs messages
 	void pea(const struct perf_event_attr *const pea, int key);
 	void keys(const int count, const __u64 *const ids, const int *const keys);
+	void keysOld(const int keyCount, const int *const keys, const int bytes, const char *const buf);
 	void format(const int length, const char *const format);
 	void maps(const int pid, const int tid, const char *const maps);
 	void comm(const int pid, const int tid, const char *const image, const char *const comm);
@@ -64,6 +65,11 @@ class Buffer {
 	// Prefer a new member to using these functions if possible
 	char *getWritePos() { return mBuf + mWritePos; }
 	void advanceWrite(int bytes) { mWritePos = (mWritePos + bytes) & /*mask*/(mSize - 1); }
+	static void packInt(char *const buf, const int size, int &writePos, int32_t x);
+	void packInt(int32_t x);
+	void packInt64(int64_t x);
+	void writeBytes(const void *const data, size_t count);
+	void writeString(const char *const str);
 
 	static void writeLEInt(unsigned char *buf, int v) {
 		buf[0] = (v >> 0) & 0xFF;
@@ -76,11 +82,6 @@ class Buffer {
 	bool commitReady() const;
 	bool checkSpace(int bytes);
 
-	void packInt(int32_t x);
-	void packInt64(int64_t x);
-	void writeBytes(const void *const data, size_t count);
-	void writeString(const char *const str);
-
 	const int32_t mCore;
 	const int32_t mBufType;
 	const int mSize;
diff --git a/tools/gator/daemon/CapturedXML.cpp b/tools/gator/daemon/CapturedXML.cpp
index cf79b72a1166..4a11415a00c9 100644
--- a/tools/gator/daemon/CapturedXML.cpp
+++ b/tools/gator/daemon/CapturedXML.cpp
@@ -33,7 +33,7 @@ mxml_node_t* CapturedXML::getTree(bool includeTime) {
 	captured = mxmlNewElement(xml, "captured");
 	mxmlElementSetAttr(captured, "version", "1");
 	if (gSessionData->perf.isSetup()) {
-	  mxmlElementSetAttr(captured, "type", "Perf");
+		mxmlElementSetAttr(captured, "type", "Perf");
 	}
 	mxmlElementSetAttrf(captured, "protocol", "%d", PROTOCOL_VERSION);
 	if (includeTime) { // Send the following only after the capture is complete
@@ -66,10 +66,15 @@ mxml_node_t* CapturedXML::getTree(bool includeTime) {
 			mxml_node_t *const node = mxmlNewElement(counters, "counter");
 			mxmlElementSetAttrf(node, "key", "0x%x", counter.getKey());
 			mxmlElementSetAttr(node, "type", counter.getType());
-			mxmlElementSetAttrf(node, "event", "0x%x", counter.getEvent());
+			if (counter.getEvent() != -1) {
+				mxmlElementSetAttrf(node, "event", "0x%x", counter.getEvent());
+			}
 			if (counter.getCount() > 0) {
 				mxmlElementSetAttrf(node, "count", "%d", counter.getCount());
 			}
+			if (counter.getCores() > 0) {
+				mxmlElementSetAttrf(node, "cores", "%d", counter.getCores());
+			}
 		}
 	}
 
@@ -89,7 +94,7 @@ void CapturedXML::write(char* path) {
 
 	// Set full path
 	snprintf(file, PATH_MAX, "%s/captured.xml", path);
-	
+
 	char* xml = getXML(true);
 	if (util->writeToDisk(file, xml) < 0) {
 		logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify the path.", file);
diff --git a/tools/gator/daemon/CapturedXML.h b/tools/gator/daemon/CapturedXML.h
index efc1e52bdba3..ed08c44bc3ff 100644
--- a/tools/gator/daemon/CapturedXML.h
+++ b/tools/gator/daemon/CapturedXML.h
@@ -23,4 +23,4 @@ class CapturedXML {
 
 const char * mxmlWhitespaceCB(mxml_node_t *node, int where);
 
-#endif 	//__CAPTURED_XML_H__
+#endif //__CAPTURED_XML_H__
diff --git a/tools/gator/daemon/Child.cpp b/tools/gator/daemon/Child.cpp
index ca33561ffdca..1901ecc6a724 100644
--- a/tools/gator/daemon/Child.cpp
+++ b/tools/gator/daemon/Child.cpp
@@ -26,13 +26,13 @@
 #include "Driver.h"
 #include "PerfSource.h"
 #include "DriverSource.h"
-#include "UserSpaceSource.h"
 #include "ExternalSource.h"
+#include "UserSpaceSource.h"
 
 static sem_t haltPipeline, senderThreadStarted, startProfile, senderSem; // Shared by Child and spawned threads
 static Source *primarySource = NULL;
-static Source *userSpaceSource = NULL;
 static Source *externalSource = NULL;
+static Source *userSpaceSource = NULL;
 static Sender* sender = NULL;        // Shared by Child.cpp and spawned threads
 Child* child = NULL;                 // shared by Child.cpp and main.cpp
 
@@ -147,16 +147,16 @@ static void *senderThread(void *) {
 	prctl(PR_SET_NAME, (unsigned long)&"gatord-sender", 0, 0, 0);
 	sem_wait(&haltPipeline);
 
-	while (!primarySource->isDone() || (userSpaceSource != NULL && !userSpaceSource->isDone()) || (externalSource != NULL && !externalSource->isDone())) {
+	while (!primarySource->isDone() ||
+	       !externalSource->isDone() ||
+	       (userSpaceSource != NULL && !userSpaceSource->isDone())) {
 		sem_wait(&senderSem);
 
 		primarySource->write(sender);
+		externalSource->write(sender);
 		if (userSpaceSource != NULL) {
 			userSpaceSource->write(sender);
 		}
-		if (externalSource != NULL) {
-			externalSource->write(sender);
-		}
 	}
 
 	// write end-of-capture sequence
@@ -202,6 +202,10 @@ void Child::initialization() {
 void Child::endSession() {
 	gSessionData->mSessionIsActive = false;
 	primarySource->interrupt();
+	externalSource->interrupt();
+	if (userSpaceSource != NULL) {
+		userSpaceSource->interrupt();
+	}
 	sem_post(&haltPipeline);
 }
 
@@ -227,9 +231,9 @@ void Child::run() {
 
 	// Set up the driver; must be done after gSessionData->mPerfCounterType[] is populated
 	if (!gSessionData->perf.isSetup()) {
-	  primarySource = new DriverSource(&senderSem, &startProfile);
+		primarySource = new DriverSource(&senderSem, &startProfile);
 	} else {
-	  primarySource = new PerfSource(&senderSem, &startProfile);
+		primarySource = new PerfSource(&senderSem, &startProfile);
 	}
 
 	// Initialize all drivers
@@ -280,11 +284,18 @@ void Child::run() {
 		thread_creation_success = false;
 	} else if (socket && pthread_create(&stopThreadID, NULL, stopThread, NULL)) {
 		thread_creation_success = false;
-	} else if (pthread_create(&senderThreadID, NULL, senderThread, NULL)){
+	} else if (pthread_create(&senderThreadID, NULL, senderThread, NULL)) {
 		thread_creation_success = false;
 	}
 
-	if (gSessionData->hwmon.countersEnabled()) {
+	externalSource = new ExternalSource(&senderSem);
+	if (!externalSource->prepare()) {
+		logg->logError(__FILE__, __LINE__, "Unable to prepare for capture");
+		handleException();
+	}
+	externalSource->start();
+
+	if (gSessionData->hwmon.countersEnabled() || gSessionData->fsDriver.countersEnabled()) {
 		userSpaceSource = new UserSpaceSource(&senderSem);
 		if (!userSpaceSource->prepare()) {
 			logg->logError(__FILE__, __LINE__, "Unable to prepare for capture");
@@ -292,14 +303,6 @@ void Child::run() {
 		}
 		userSpaceSource->start();
 	}
-	if (access("/tmp/gator", F_OK) == 0) {
-		externalSource = new ExternalSource(&senderSem);
-		if (!externalSource->prepare()) {
-			logg->logError(__FILE__, __LINE__, "Unable to prepare for capture");
-			handleException();
-		}
-		externalSource->start();
-	}
 
 	if (!thread_creation_success) {
 		logg->logError(__FILE__, __LINE__, "Failed to create gator threads");
@@ -312,12 +315,10 @@ void Child::run() {
 	// Start profiling
 	primarySource->run();
 
-	if (externalSource != NULL) {
-		externalSource->join();
-	}
 	if (userSpaceSource != NULL) {
 		userSpaceSource->join();
 	}
+	externalSource->join();
 
 	// Wait for the other threads to exit
 	pthread_join(senderThreadID, NULL);
@@ -337,8 +338,8 @@ void Child::run() {
 
 	logg->logMessage("Profiling ended.");
 
-	delete externalSource;
 	delete userSpaceSource;
+	delete externalSource;
 	delete primarySource;
 	delete sender;
 	delete localCapture;
diff --git a/tools/gator/daemon/Child.h b/tools/gator/daemon/Child.h
index 9e206d7113b8..a306a7760819 100644
--- a/tools/gator/daemon/Child.h
+++ b/tools/gator/daemon/Child.h
@@ -30,4 +30,4 @@ class Child {
 	Child &operator=(const Child &);
 };
 
-#endif 	//__CHILD_H__
+#endif //__CHILD_H__
diff --git a/tools/gator/daemon/ConfigurationXML.cpp b/tools/gator/daemon/ConfigurationXML.cpp
index fd479f2452cd..6590dd389196 100644
--- a/tools/gator/daemon/ConfigurationXML.cpp
+++ b/tools/gator/daemon/ConfigurationXML.cpp
@@ -21,12 +21,13 @@ static const char* ATTR_COUNTER            = "counter";
 static const char* ATTR_REVISION           = "revision";
 static const char* ATTR_EVENT              = "event";
 static const char* ATTR_COUNT              = "count";
+static const char* ATTR_CORES              = "cores";
 
 ConfigurationXML::ConfigurationXML() {
 	const char * configuration_xml;
 	unsigned int configuration_xml_len;
 	getDefaultConfigurationXml(configuration_xml, configuration_xml_len);
-	
+
 	char path[PATH_MAX];
 
 	getPath(path);
@@ -53,7 +54,7 @@ ConfigurationXML::ConfigurationXML() {
 
 		break;
 	}
-	
+
 	validate();
 }
 
@@ -82,7 +83,7 @@ int ConfigurationXML::parse(const char* configurationXML) {
 	node = mxmlGetFirstChild(tree);
 	while (node && mxmlGetType(node) != MXML_ELEMENT)
 		node = mxmlWalkNext(node, tree, MXML_NO_DESCEND);
-	
+
 	ret = configurationsTag(node);
 
 	node = mxmlGetFirstChild(node);
@@ -127,7 +128,7 @@ void ConfigurationXML::validate(void) {
 #define CONFIGURATION_REVISION 3
 int ConfigurationXML::configurationsTag(mxml_node_t *node) {
 	const char* revision_string;
-	
+
 	revision_string = mxmlElementGetAttr(node, ATTR_REVISION);
 	if (!revision_string) {
 		return 1; //revision issue;
@@ -158,6 +159,7 @@ void ConfigurationXML::configurationTag(mxml_node_t *node) {
 	if (mxmlElementGetAttr(node, ATTR_COUNTER)) counter.setType(mxmlElementGetAttr(node, ATTR_COUNTER));
 	if (mxmlElementGetAttr(node, ATTR_EVENT)) counter.setEvent(strtol(mxmlElementGetAttr(node, ATTR_EVENT), NULL, 16));
 	if (mxmlElementGetAttr(node, ATTR_COUNT)) counter.setCount(strtol(mxmlElementGetAttr(node, ATTR_COUNT), NULL, 10));
+	if (mxmlElementGetAttr(node, ATTR_CORES)) counter.setCores(strtol(mxmlElementGetAttr(node, ATTR_CORES), NULL, 10));
 	if (counter.getCount() > 0) {
 		gSessionData->mIsEBS = true;
 	}
diff --git a/tools/gator/daemon/Counter.h b/tools/gator/daemon/Counter.h
index 689174573e4e..5202aa046362 100644
--- a/tools/gator/daemon/Counter.h
+++ b/tools/gator/daemon/Counter.h
@@ -27,6 +27,7 @@ class Counter {
 		mEnabled = false;
 		mEvent = -1;
 		mCount = 0;
+		mCores = -1;
 		mKey = 0;
 		mDriver = NULL;
 	}
@@ -35,6 +36,7 @@ class Counter {
 	void setEnabled(const bool enabled) { mEnabled = enabled; }
 	void setEvent(const int event) { mEvent = event; }
 	void setCount(const int count) { mCount = count; }
+	void setCores(const int cores) { mCores = cores; }
 	void setKey(const int key) { mKey = key; }
 	void setDriver(Driver *const driver) { mDriver = driver; }
 
@@ -42,6 +44,7 @@ class Counter {
 	bool isEnabled() const { return mEnabled; }
 	int getEvent() const { return mEvent; }
 	int getCount() const { return mCount; }
+	int getCores() const { return mCores; }
 	int getKey() const { return mKey; }
 	Driver *getDriver() const { return mDriver; }
 
@@ -54,6 +57,7 @@ class Counter {
 	bool mEnabled;
 	int mEvent;
 	int mCount;
+	int mCores;
 	int mKey;
 	Driver *mDriver;
 };
diff --git a/tools/gator/daemon/DriverSource.cpp b/tools/gator/daemon/DriverSource.cpp
index f78ec6b7ce41..11d3095ef6d2 100644
--- a/tools/gator/daemon/DriverSource.cpp
+++ b/tools/gator/daemon/DriverSource.cpp
@@ -12,19 +12,24 @@
 
 #include <fcntl.h>
 #include <inttypes.h>
+#include <sys/prctl.h>
 #include <unistd.h>
 
+#include "Buffer.h"
 #include "Child.h"
+#include "DynBuf.h"
 #include "Fifo.h"
 #include "Logging.h"
+#include "Proc.h"
 #include "Sender.h"
 #include "SessionData.h"
 
 extern Child *child;
 
-DriverSource::DriverSource(sem_t *senderSem, sem_t *startProfile) : mFifo(NULL), mSenderSem(senderSem), mStartProfile(startProfile), mBufferSize(0), mBufferFD(0), mLength(1) {
+DriverSource::DriverSource(sem_t *senderSem, sem_t *startProfile) : mBuffer(NULL), mFifo(NULL), mSenderSem(senderSem), mStartProfile(startProfile), mBufferSize(0), mBufferFD(0), mLength(1) {
 	int driver_version = 0;
 
+	mBuffer = new Buffer(0, FRAME_PERF_ATTRS, 4*1024*1024, senderSem);
 	if (readIntDriver("/dev/gator/version", &driver_version) == -1) {
 		logg->logError(__FILE__, __LINE__, "Error reading gator driver version");
 		handleException();
@@ -43,7 +48,7 @@ DriverSource::DriverSource(sem_t *senderSem, sem_t *startProfile) : mFifo(NULL),
 			handleException();
 		} else {
 			// Release version mismatch
-			logg->logError(__FILE__, __LINE__, 
+			logg->logError(__FILE__, __LINE__,
 				"gator driver version \"%d\" is different than gator daemon version \"%d\".\n"
 				">> Please upgrade the driver and daemon to the latest versions.", driver_version, PROTOCOL_VERSION);
 			handleException();
@@ -87,6 +92,28 @@ bool DriverSource::prepare() {
 	return true;
 }
 
+void DriverSource::bootstrapThread() {
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-bootstrap", 0, 0, 0);
+
+	DynBuf printb;
+	DynBuf b1;
+	DynBuf b2;
+	DynBuf b3;
+
+	if (!readProc(mBuffer, false, &printb, &b1, &b2, &b3)) {
+		logg->logMessage("%s(%s:%i): readProc failed", __FUNCTION__, __FILE__, __LINE__);
+		handleException();
+	}
+
+	mBuffer->commit(1);
+	mBuffer->setDone();
+}
+
+void *DriverSource::bootstrapThreadStatic(void *arg) {
+	static_cast<DriverSource *>(arg)->bootstrapThread();
+	return NULL;
+}
+
 void DriverSource::run() {
 	// Get the initial pointer to the collect buffer
 	char *collectBuffer = mFifo->start();
@@ -138,6 +165,12 @@ void DriverSource::run() {
 
 	sem_post(mStartProfile);
 
+	pthread_t bootstrapThreadID;
+	if (pthread_create(&bootstrapThreadID, NULL, bootstrapThreadStatic, this) != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to start the gator_bootstrap thread");
+		handleException();
+	}
+
 	// Collect Data
 	do {
 		// This command will stall until data is received from the driver
@@ -164,6 +197,8 @@ void DriverSource::run() {
 	} while (bytesCollected > 0);
 
 	logg->logMessage("Exit collect data loop");
+
+	pthread_join(bootstrapThreadID, NULL);
 }
 
 void DriverSource::interrupt() {
@@ -174,7 +209,7 @@ void DriverSource::interrupt() {
 }
 
 bool DriverSource::isDone() {
-	return mLength <= 0;
+	return mLength <= 0 && (mBuffer == NULL || mBuffer->isDone());
 }
 
 void DriverSource::write(Sender *sender) {
@@ -182,6 +217,16 @@ void DriverSource::write(Sender *sender) {
 	if (data != NULL) {
 		sender->writeData(data, mLength, RESPONSE_APC_DATA);
 		mFifo->release();
+		// Assume the summary packet is in the first block received from the driver
+		gSessionData->mSentSummary = true;
+	}
+	if (mBuffer != NULL && !mBuffer->isDone()) {
+		mBuffer->write(sender);
+		if (mBuffer->isDone()) {
+			Buffer *buf = mBuffer;
+			mBuffer = NULL;
+			delete buf;
+		}
 	}
 }
 
@@ -227,7 +272,7 @@ int DriverSource::readInt64Driver(const char *fullpath, int64_t *value) {
 	char *endptr;
 	errno = 0;
 	*value = strtoll(data, &endptr, 10);
-	if (errno != 0 || *endptr != '\n') {
+	if (errno != 0 || (*endptr != '\n' && *endptr != '\0')) {
 		logg->logMessage("Invalid value in file %s", fullpath);
 		return -1;
 	}
diff --git a/tools/gator/daemon/DriverSource.h b/tools/gator/daemon/DriverSource.h
index dcf1078a239c..ec27b0815bbf 100644
--- a/tools/gator/daemon/DriverSource.h
+++ b/tools/gator/daemon/DriverSource.h
@@ -14,6 +14,7 @@
 
 #include "Source.h"
 
+class Buffer;
 class Fifo;
 
 class DriverSource : public Source {
@@ -37,6 +38,10 @@ class DriverSource : public Source {
 	static int writeReadDriver(const char *path, int64_t *value);
 
 private:
+	static void *bootstrapThreadStatic(void *arg);
+	void bootstrapThread();
+
+	Buffer *mBuffer;
 	Fifo *mFifo;
 	sem_t *const mSenderSem;
 	sem_t *const mStartProfile;
diff --git a/tools/gator/daemon/EventsXML.cpp b/tools/gator/daemon/EventsXML.cpp
index a07a046f3353..cf0192ef671f 100644
--- a/tools/gator/daemon/EventsXML.cpp
+++ b/tools/gator/daemon/EventsXML.cpp
@@ -13,7 +13,7 @@
 #include "OlyUtility.h"
 #include "SessionData.h"
 
-char* EventsXML::getXML() {
+mxml_node_t *EventsXML::getTree() {
 #include "events_xml.h" // defines and initializes char events_xml[] and int events_xml_len
 	char path[PATH_MAX];
 	mxml_node_t *xml;
@@ -38,6 +38,12 @@ char* EventsXML::getXML() {
 		xml = mxmlLoadString(NULL, (const char *)events_xml, MXML_NO_CALLBACK);
 	}
 
+	return xml;
+}
+
+char *EventsXML::getXML() {
+	mxml_node_t *xml = getTree();
+
 	// Add dynamic events from the drivers
 	mxml_node_t *events = mxmlFindElement(xml, xml, "events", NULL, NULL, MXML_DESCEND);
 	if (!events) {
@@ -48,19 +54,19 @@ char* EventsXML::getXML() {
 		driver->writeEvents(events);
 	}
 
-	char* string = mxmlSaveAllocString(xml, mxmlWhitespaceCB);
+	char *string = mxmlSaveAllocString(xml, mxmlWhitespaceCB);
 	mxmlDelete(xml);
 
 	return string;
 }
 
-void EventsXML::write(const char* path) {
+void EventsXML::write(const char *path) {
 	char file[PATH_MAX];
 
 	// Set full path
 	snprintf(file, PATH_MAX, "%s/events.xml", path);
-	
-	char* buf = getXML();
+
+	char *buf = getXML();
 	if (util->writeToDisk(file, buf) < 0) {
 		logg->logError(__FILE__, __LINE__, "Error writing %s\nPlease verify the path.", file);
 		handleException();
diff --git a/tools/gator/daemon/EventsXML.h b/tools/gator/daemon/EventsXML.h
index 6cd1560f7d4e..ff7a02fd3c78 100644
--- a/tools/gator/daemon/EventsXML.h
+++ b/tools/gator/daemon/EventsXML.h
@@ -9,9 +9,12 @@
 #ifndef EVENTS_XML
 #define EVENTS_XML
 
+#include "mxml/mxml.h"
+
 class EventsXML {
 public:
-	char* getXML();
+	mxml_node_t *getTree();
+	char *getXML();
 	void write(const char* path);
 };
 
diff --git a/tools/gator/daemon/ExternalSource.cpp b/tools/gator/daemon/ExternalSource.cpp
index fe5824b04812..b6ec301d0c08 100644
--- a/tools/gator/daemon/ExternalSource.cpp
+++ b/tools/gator/daemon/ExternalSource.cpp
@@ -8,41 +8,195 @@
 
 #include "ExternalSource.h"
 
+#include <fcntl.h>
 #include <sys/prctl.h>
+#include <unistd.h>
 
 #include "Logging.h"
 #include "OlySocket.h"
 #include "SessionData.h"
 
-ExternalSource::ExternalSource(sem_t *senderSem) : mBuffer(0, FRAME_EXTERNAL, 1024, senderSem), mSock("/tmp/gator") {
+static const char MALI_VIDEO[] = "\0mali-video";
+static const char MALI_VIDEO_STARTUP[] = "\0mali-video-startup";
+static const char MALI_VIDEO_V1[] = "MALI_VIDEO 1\n";
+
+static bool setNonblock(const int fd) {
+	int flags;
+
+	flags = fcntl(fd, F_GETFL);
+	if (flags < 0) {
+		logg->logMessage("fcntl getfl failed");
+		return false;
+	}
+
+	if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != 0) {
+		logg->logMessage("fcntl setfl failed");
+		return false;
+	}
+
+	return true;
+}
+
+ExternalSource::ExternalSource(sem_t *senderSem) : mBuffer(0, FRAME_EXTERNAL, 128*1024, senderSem), mMonitor(), mMveStartupUds(MALI_VIDEO_STARTUP, sizeof(MALI_VIDEO_STARTUP)), mInterruptFd(-1), mMveUds(-1) {
+	sem_init(&mBufferSem, 0, 0);
 }
 
 ExternalSource::~ExternalSource() {
 }
 
+void ExternalSource::waitFor(const uint64_t currTime, const int bytes) {
+	while (mBuffer.bytesAvailable() <= bytes) {
+		mBuffer.check(currTime);
+		sem_wait(&mBufferSem);
+	}
+}
+
+void ExternalSource::configureConnection(const int fd, const char *const handshake, size_t size) {
+	if (!setNonblock(fd)) {
+		logg->logError(__FILE__, __LINE__, "Unable to set nonblock on fh");
+		handleException();
+	}
+
+	if (!mMonitor.add(fd)) {
+		logg->logError(__FILE__, __LINE__, "Unable to add fh to monitor");
+		handleException();
+	}
+
+	// Write the handshake to the circular buffer
+	waitFor(1, Buffer::MAXSIZE_PACK32 + 4 + size - 1);
+	mBuffer.packInt(fd);
+	mBuffer.writeLEInt((unsigned char *)mBuffer.getWritePos(), size - 1);
+	mBuffer.advanceWrite(4);
+	mBuffer.writeBytes(handshake, size - 1);
+}
+
+bool ExternalSource::connectMve() {
+	if (!gSessionData->maliVideo.countersEnabled()) {
+		return true;
+	}
+
+	mMveUds = OlySocket::connect(MALI_VIDEO, sizeof(MALI_VIDEO));
+	if (mMveUds < 0) {
+		return false;
+	}
+
+	if (!gSessionData->maliVideo.start(mMveUds)) {
+		return false;
+	}
+
+	configureConnection(mMveUds, MALI_VIDEO_V1, sizeof(MALI_VIDEO_V1));
+
+	return true;
+}
+
 bool ExternalSource::prepare() {
+	if (!mMonitor.init() || !setNonblock(mMveStartupUds.getFd()) || !mMonitor.add(mMveStartupUds.getFd())) {
+		return false;
+	}
+
+	connectMve();
+
 	return true;
 }
 
 void ExternalSource::run() {
-	prctl(PR_SET_NAME, (unsigned long)&"gatord-uds", 0, 0, 0);
+	int pipefd[2];
+
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-external", 0, 0, 0);
+
+	if (pipe(pipefd) != 0) {
+		logg->logError(__FILE__, __LINE__, "pipe failed");
+		handleException();
+	}
+	mInterruptFd = pipefd[1];
+
+	if (!mMonitor.add(pipefd[0])) {
+		logg->logError(__FILE__, __LINE__, "Monitor::add failed");
+		handleException();
+	}
 
 	while (gSessionData->mSessionIsActive) {
-		// Will be aborted when the socket is closed at the end of the capture
-		int length = mSock.receive(mBuffer.getWritePos(), mBuffer.contiguousSpaceAvailable());
-		if (length <= 0) {
-			break;
+		struct epoll_event events[16];
+		// Clear any pending sem posts
+		while (sem_trywait(&mBufferSem) == 0);
+		int ready = mMonitor.wait(events, ARRAY_LENGTH(events), -1);
+		if (ready < 0) {
+			logg->logError(__FILE__, __LINE__, "Monitor::wait failed");
+			handleException();
 		}
 
-		mBuffer.advanceWrite(length);
-		mBuffer.check(0);
+		const uint64_t currTime = getTime();
+
+		for (int i = 0; i < ready; ++i) {
+			const int fd = events[i].data.fd;
+			if (fd == mMveStartupUds.getFd()) {
+				// Mali Video Engine says it's alive
+				int client = mMveStartupUds.acceptConnection();
+				// Don't read from this connection, establish a new connection to Mali-V500
+				close(client);
+				if (!connectMve()) {
+					logg->logError(__FILE__, __LINE__, "Unable to configure incoming Mali video connection");
+					handleException();
+				}
+			} else if (fd == pipefd[0]) {
+				// Means interrupt has been called and mSessionIsActive should be reread
+			} else {
+				while (true) {
+					waitFor(currTime, Buffer::MAXSIZE_PACK32 + 4);
+
+					mBuffer.packInt(fd);
+					char *const bytesPos = mBuffer.getWritePos();
+					mBuffer.advanceWrite(4);
+					const int contiguous = mBuffer.contiguousSpaceAvailable();
+					const int bytes = read(fd, mBuffer.getWritePos(), contiguous);
+					if (bytes < 0) {
+						if (errno == EAGAIN) {
+							// Nothing left to read, and Buffer convention dictates that writePos can't go backwards
+							mBuffer.writeLEInt((unsigned char *)bytesPos, 0);
+							break;
+						}
+						// Something else failed, close the socket
+						mBuffer.writeLEInt((unsigned char *)bytesPos, -1);
+						close(fd);
+						break;
+					} else if (bytes == 0) {
+						// The other side is closed
+						mBuffer.writeLEInt((unsigned char *)bytesPos, -1);
+						close(fd);
+						break;
+					}
+
+					mBuffer.writeLEInt((unsigned char *)bytesPos, bytes);
+					mBuffer.advanceWrite(bytes);
+
+					// Short reads also mean nothing is left to read
+					if (bytes < contiguous) {
+						break;
+					}
+				}
+			}
+		}
+
+		// Only call mBufferCheck once per iteration
+		mBuffer.check(currTime);
 	}
 
 	mBuffer.setDone();
+
+	mInterruptFd = -1;
+	close(pipefd[0]);
+	close(pipefd[1]);
 }
 
 void ExternalSource::interrupt() {
-	// Do nothing
+	if (mInterruptFd >= 0) {
+		int8_t c = 0;
+		// Write to the pipe to wake the monitor which will cause mSessionIsActive to be reread
+		if (::write(mInterruptFd, &c, sizeof(c)) != sizeof(c)) {
+			logg->logError(__FILE__, __LINE__, "write failed");
+			handleException();
+		}
+	}
 }
 
 bool ExternalSource::isDone() {
@@ -50,7 +204,12 @@ bool ExternalSource::isDone() {
 }
 
 void ExternalSource::write(Sender *sender) {
+	// Don't send external data until the summary packet is sent so that monotonic delta is available
+	if (!gSessionData->mSentSummary) {
+		return;
+	}
 	if (!mBuffer.isDone()) {
 		mBuffer.write(sender);
+		sem_post(&mBufferSem);
 	}
 }
diff --git a/tools/gator/daemon/ExternalSource.h b/tools/gator/daemon/ExternalSource.h
index 2052bdf2823e..2e7ed27df255 100644
--- a/tools/gator/daemon/ExternalSource.h
+++ b/tools/gator/daemon/ExternalSource.h
@@ -12,6 +12,7 @@
 #include <semaphore.h>
 
 #include "Buffer.h"
+#include "Monitor.h"
 #include "OlySocket.h"
 #include "Source.h"
 
@@ -29,8 +30,16 @@ class ExternalSource : public Source {
 	void write(Sender *sender);
 
 private:
+	void waitFor(const uint64_t currTime, const int bytes);
+	void configureConnection(const int fd, const char *const handshake, size_t size);
+	bool connectMve();
+
+	sem_t mBufferSem;
 	Buffer mBuffer;
-	OlySocket mSock;
+	Monitor mMonitor;
+	OlyServerSocket mMveStartupUds;
+	int mInterruptFd;
+	int mMveUds;
 
 	// Intentionally unimplemented
 	ExternalSource(const ExternalSource &);
diff --git a/tools/gator/daemon/FSDriver.cpp b/tools/gator/daemon/FSDriver.cpp
new file mode 100644
index 000000000000..40c8df1af222
--- /dev/null
+++ b/tools/gator/daemon/FSDriver.cpp
@@ -0,0 +1,212 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "FSDriver.h"
+
+#include <fcntl.h>
+#include <regex.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "Buffer.h"
+#include "Counter.h"
+#include "DriverSource.h"
+#include "Logging.h"
+#include "SessionData.h"
+
+class FSCounter {
+public:
+	FSCounter(FSCounter *next, char *name, const char *regex);
+	~FSCounter();
+
+	FSCounter *getNext() const { return next; }
+	int getKey() const { return key; }
+	bool isEnabled() const { return enabled; }
+	void setEnabled(const bool enabled) { this->enabled = enabled; }
+	const char *getName() const { return name; }
+	int64_t read();
+
+private:
+	FSCounter *const next;
+	regex_t reg;
+	char *name;
+	const int key;
+	int enabled : 1,
+		useRegex : 1;
+
+	// Intentionally unimplemented
+	FSCounter(const FSCounter &);
+	FSCounter &operator=(const FSCounter &);
+};
+
+FSCounter::FSCounter(FSCounter *next, char *name, const char *regex) : next(next), name(name), key(getEventKey()), enabled(false), useRegex(regex != NULL) {
+	if (useRegex) {
+		int result = regcomp(&reg, regex, REG_EXTENDED);
+		if (result != 0) {
+			char buf[128];
+			regerror(result, &reg, buf, sizeof(buf));
+			logg->logError(__FILE__, __LINE__, "Invalid regex '%s': %s", regex, buf);
+			handleException();
+		}
+	}
+}
+
+FSCounter::~FSCounter() {
+	free(name);
+	if (useRegex) {
+		regfree(&reg);
+	}
+}
+
+int64_t FSCounter::read() {
+	int64_t value;
+	if (useRegex) {
+		char buf[4096];
+		size_t pos = 0;
+		const int fd = open(name, O_RDONLY);
+		if (fd < 0) {
+			goto fail;
+		}
+		while (pos < sizeof(buf) - 1) {
+			const ssize_t bytes = ::read(fd, buf + pos, sizeof(buf) - pos - 1);
+			if (bytes < 0) {
+				goto fail;
+			} else if (bytes == 0) {
+				break;
+			}
+			pos += bytes;
+		}
+		close(fd);
+		buf[pos] = '\0';
+
+		regmatch_t match[2];
+		int result = regexec(&reg, buf, 2, match, 0);
+		if (result != 0) {
+			regerror(result, &reg, buf, sizeof(buf));
+			logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", name, buf);
+			handleException();
+		}
+
+		if (match[1].rm_so < 0) {
+			logg->logError(__FILE__, __LINE__, "Parsing %s failed", name);
+			handleException();
+		}
+		char *endptr;
+		errno = 0;
+		value = strtoll(buf + match[1].rm_so, &endptr, 0);
+		if (errno != 0) {
+			logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", name, strerror(errno));
+			handleException();
+		}
+	} else {
+		if (DriverSource::readInt64Driver(name, &value) != 0) {
+			goto fail;
+		}
+	}
+	return value;
+
+ fail:
+	logg->logError(__FILE__, __LINE__, "Unable to read %s", name);
+	handleException();
+}
+
+FSDriver::FSDriver() : counters(NULL) {
+}
+
+FSDriver::~FSDriver() {
+	while (counters != NULL) {
+		FSCounter * counter = counters;
+		counters = counter->getNext();
+		delete counter;
+	}
+}
+
+void FSDriver::setup(mxml_node_t *const xml) {
+	// fs driver does not currently work with perf
+	if (gSessionData->perf.isSetup()) {
+		return;
+	}
+
+	mxml_node_t *node = xml;
+	while (true) {
+		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
+		if (node == NULL) {
+			break;
+		}
+		const char *counter = mxmlElementGetAttr(node, "counter");
+		if ((counter != NULL) && (counter[0] == '/')) {
+			const char *regex = mxmlElementGetAttr(node, "regex");
+			counters = new FSCounter(counters, strdup(counter), regex);
+		}
+	}
+}
+
+FSCounter *FSDriver::findCounter(const Counter &counter) const {
+	for (FSCounter * fsCounter = counters; fsCounter != NULL; fsCounter = fsCounter->getNext()) {
+		if (strcmp(fsCounter->getName(), counter.getType()) == 0) {
+			return fsCounter;
+		}
+	}
+
+	return NULL;
+}
+
+bool FSDriver::claimCounter(const Counter &counter) const {
+	return findCounter(counter) != NULL;
+}
+
+bool FSDriver::countersEnabled() const {
+	for (FSCounter *counter = counters; counter != NULL; counter = counter->getNext()) {
+		if (counter->isEnabled()) {
+			return true;
+		}
+	}
+	return false;
+}
+
+void FSDriver::resetCounters() {
+	for (FSCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
+		counter->setEnabled(false);
+	}
+}
+
+void FSDriver::setupCounter(Counter &counter) {
+	FSCounter *const fsCounter = findCounter(counter);
+	if (fsCounter == NULL) {
+		counter.setEnabled(false);
+		return;
+	}
+	fsCounter->setEnabled(true);
+	counter.setKey(fsCounter->getKey());
+}
+
+int FSDriver::writeCounters(mxml_node_t *root) const {
+	int count = 0;
+	for (FSCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
+		if (access(counter->getName(), R_OK) == 0) {
+			mxml_node_t *node = mxmlNewElement(root, "counter");
+			mxmlElementSetAttr(node, "name", counter->getName());
+			++count;
+		}
+	}
+
+	return count;
+}
+
+void FSDriver::start() {
+}
+
+void FSDriver::read(Buffer * const buffer) {
+	for (FSCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		buffer->event(counter->getKey(), counter->read());
+	}
+}
diff --git a/tools/gator/daemon/FSDriver.h b/tools/gator/daemon/FSDriver.h
new file mode 100644
index 000000000000..ef3955362331
--- /dev/null
+++ b/tools/gator/daemon/FSDriver.h
@@ -0,0 +1,44 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef FSDRIVER_H
+#define FSDRIVER_H
+
+#include "Driver.h"
+
+class Buffer;
+class FSCounter;
+
+class FSDriver : public Driver {
+public:
+	FSDriver();
+	~FSDriver();
+
+	void setup(mxml_node_t *const xml);
+
+	bool claimCounter(const Counter &counter) const;
+	bool countersEnabled() const;
+	void resetCounters();
+	void setupCounter(Counter &counter);
+
+	int writeCounters(mxml_node_t *root) const;
+
+	void start();
+	void read(Buffer * buffer);
+
+private:
+	FSCounter *findCounter(const Counter &counter) const;
+
+	FSCounter *counters;
+
+	// Intentionally unimplemented
+	FSDriver(const FSDriver &);
+	FSDriver &operator=(const FSDriver &);
+};
+
+#endif // FSDRIVER_H
diff --git a/tools/gator/daemon/Fifo.h b/tools/gator/daemon/Fifo.h
index 7dd7426132d8..bdda3f549b50 100644
--- a/tools/gator/daemon/Fifo.h
+++ b/tools/gator/daemon/Fifo.h
@@ -45,4 +45,4 @@ class Fifo {
   Fifo &operator=(const Fifo &);
 };
 
-#endif 	//__FIFO_H__
+#endif //__FIFO_H__
diff --git a/tools/gator/daemon/Hwmon.cpp b/tools/gator/daemon/Hwmon.cpp
index 778f30755dfe..e44424743ef0 100644
--- a/tools/gator/daemon/Hwmon.cpp
+++ b/tools/gator/daemon/Hwmon.cpp
@@ -28,6 +28,7 @@ public:
 	const char *getTitle() const { return title; }
 	bool isDuplicate() const { return duplicate; }
 	const char *getDisplay() const { return display; }
+	const char *getCounterClass() const { return counter_class; }
 	const char *getUnit() const { return unit; }
 	int getModifier() const { return modifier; }
 
@@ -58,6 +59,7 @@ private:
 	char *label;
 	const char *title;
 	const char *display;
+	const char *counter_class;
 	const char *unit;
 	int modifier;
 	double previous_value;
@@ -87,7 +89,8 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 	case SENSORS_FEATURE_IN:
 		title = "Voltage";
 		input = SENSORS_SUBFEATURE_IN_INPUT;
-		display = "average";
+		display = "maximum";
+		counter_class = "absolute";
 		unit = "V";
 		modifier = 1000;
 		monotonic = false;
@@ -96,6 +99,7 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		title = "Fan";
 		input = SENSORS_SUBFEATURE_FAN_INPUT;
 		display = "average";
+		counter_class = "absolute";
 		unit = "RPM";
 		modifier = 1;
 		monotonic = false;
@@ -104,6 +108,7 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		title = "Temperature";
 		input = SENSORS_SUBFEATURE_TEMP_INPUT;
 		display = "maximum";
+		counter_class = "absolute";
 		unit = "°C";
 		modifier = 1000;
 		monotonic = false;
@@ -111,7 +116,8 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 	case SENSORS_FEATURE_POWER:
 		title = "Power";
 		input = SENSORS_SUBFEATURE_POWER_INPUT;
-		display = "average";
+		display = "maximum";
+		counter_class = "absolute";
 		unit = "W";
 		modifier = 1000000;
 		monotonic = false;
@@ -120,6 +126,7 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		title = "Energy";
 		input = SENSORS_SUBFEATURE_ENERGY_INPUT;
 		display = "accumulate";
+		counter_class = "delta";
 		unit = "J";
 		modifier = 1000000;
 		monotonic = true;
@@ -127,7 +134,8 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 	case SENSORS_FEATURE_CURR:
 		title = "Current";
 		input = SENSORS_SUBFEATURE_CURR_INPUT;
-		display = "average";
+		display = "maximum";
+		counter_class = "absolute";
 		unit = "A";
 		modifier = 1000;
 		monotonic = false;
@@ -136,6 +144,7 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		title = "Humidity";
 		input = SENSORS_SUBFEATURE_HUMIDITY_INPUT;
 		display = "average";
+		counter_class = "absolute";
 		unit = "%";
 		modifier = 1000;
 		monotonic = false;
@@ -311,6 +320,7 @@ void Hwmon::writeEvents(mxml_node_t *root) const {
 			mxmlElementSetAttr(node, "name", counter->getLabel());
 		}
 		mxmlElementSetAttr(node, "display", counter->getDisplay());
+		mxmlElementSetAttr(node, "class", counter->getCounterClass());
 		mxmlElementSetAttr(node, "units", counter->getUnit());
 		if (counter->getModifier() != 1) {
 			mxmlElementSetAttrf(node, "modifier", "%d", counter->getModifier());
diff --git a/tools/gator/daemon/KMod.cpp b/tools/gator/daemon/KMod.cpp
index 9300002f3fb2..73e123d2f14e 100644
--- a/tools/gator/daemon/KMod.cpp
+++ b/tools/gator/daemon/KMod.cpp
@@ -58,10 +58,15 @@ void KMod::setupCounter(Counter &counter) {
 		return;
 	}
 
+	int value = 0;
 	snprintf(text, sizeof(text), "%s/key", base);
-	int key = 0;
-	DriverSource::readIntDriver(text, &key);
-	counter.setKey(key);
+	DriverSource::readIntDriver(text, &value);
+	counter.setKey(value);
+
+	snprintf(text, sizeof(text), "%s/cores", base);
+	if (DriverSource::readIntDriver(text, &value) == 0) {
+		counter.setCores(value);
+	}
 
 	snprintf(text, sizeof(text), "%s/event", base);
 	DriverSource::writeDriver(text, counter.getEvent());
diff --git a/tools/gator/daemon/LocalCapture.h b/tools/gator/daemon/LocalCapture.h
index aadeccecf0cc..b1e7219795cf 100644
--- a/tools/gator/daemon/LocalCapture.h
+++ b/tools/gator/daemon/LocalCapture.h
@@ -23,4 +23,4 @@ class LocalCapture {
 	int removeDirAndAllContents(char* path);
 };
 
-#endif 	//__LOCAL_CAPTURE_H__
+#endif //__LOCAL_CAPTURE_H__
diff --git a/tools/gator/daemon/Logging.h b/tools/gator/daemon/Logging.h
index 6ae328046989..4934bb079754 100644
--- a/tools/gator/daemon/Logging.h
+++ b/tools/gator/daemon/Logging.h
@@ -33,4 +33,4 @@ extern Logging* logg;
 
 extern void handleException() __attribute__ ((noreturn));
 
-#endif 	//__LOGGING_H__
+#endif //__LOGGING_H__
diff --git a/tools/gator/daemon/Makefile b/tools/gator/daemon/Makefile
index 24ee94045470..2ed49fdb688b 100644
--- a/tools/gator/daemon/Makefile
+++ b/tools/gator/daemon/Makefile
@@ -8,14 +8,14 @@
 # targets run 'make SOFTFLOAT=1 SYSROOT=/path/to/sysroot', see
 # README_Streamline.txt for more details
 
-CPP = $(CROSS_COMPILE)g++
-GCC = $(CROSS_COMPILE)gcc
+CC = $(CROSS_COMPILE)gcc
+CXX = $(CROSS_COMPILE)g++
 
 # -mthumb-interwork is required for interworking to ARM or Thumb stdlibc
-CFLAGS += -mthumb-interwork
+CPPFLAGS += -mthumb-interwork
 
 ifeq ($(SOFTFLOAT),1)
-	CFLAGS += -marm -march=armv4t -mfloat-abi=soft
+	CPPFLAGS += -marm -march=armv4t -mfloat-abi=soft
 	LDFLAGS += -marm -march=armv4t -mfloat-abi=soft
 endif
 ifneq ($(SYSROOT),)
diff --git a/tools/gator/daemon/Makefile_aarch64 b/tools/gator/daemon/Makefile_aarch64
index 10b4b4a71ab1..efd1fa002182 100644
--- a/tools/gator/daemon/Makefile_aarch64
+++ b/tools/gator/daemon/Makefile_aarch64
@@ -4,12 +4,9 @@
 #
 
 # Uncomment and define CROSS_COMPILE if it is not already defined
-# CROSS_COMPILE=/path/to/cross-compiler/arm-linux-gnueabihf-
-# NOTE: This toolchain uses the hardfloat abi by default. For non-hardfloat
-# targets it is necessary to add options
-# '-marm -march=armv4t -mfloat-abi=soft'.
+# CROSS_COMPILE=/path/to/cross-compiler/aarch64-linux-gnu-
 
-CPP = $(CROSS_COMPILE)g++
-GCC = $(CROSS_COMPILE)gcc
+CC = $(CROSS_COMPILE)gcc
+CXX = $(CROSS_COMPILE)g++
 
 include common.mk
diff --git a/tools/gator/daemon/MaliVideoDriver.cpp b/tools/gator/daemon/MaliVideoDriver.cpp
new file mode 100644
index 000000000000..18b413b01a37
--- /dev/null
+++ b/tools/gator/daemon/MaliVideoDriver.cpp
@@ -0,0 +1,253 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "MaliVideoDriver.h"
+
+#include <unistd.h>
+
+#include "Buffer.h"
+#include "Counter.h"
+#include "Logging.h"
+#include "SessionData.h"
+
+// From instr/src/mve_instr_comm_protocol.h
+typedef enum mve_instr_configuration_type {
+	MVE_INSTR_RAW         = 1 << 0,
+	MVE_INSTR_COUNTERS    = 1 << 1,
+	MVE_INSTR_EVENTS      = 1 << 2,
+	MVE_INSTR_ACTIVITIES  = 1 << 3,
+
+	// Raw always pushed regardless
+	MVE_INSTR_PULL        = 1 << 12,
+	// Raw always unpacked regardless
+	MVE_INSTR_PACKED_COMM = 1 << 13,
+	// Don’t send ACKt response
+	MVE_INSTR_NO_AUTO_ACK   = 1 << 14,
+} mve_instr_configuration_type_t;
+
+static const char COUNTER[] = "ARM_Mali-V500_cnt";
+static const char EVENT[] = "ARM_Mali-V500_evn";
+static const char ACTIVITY[] = "ARM_Mali-V500_act";
+
+class MaliVideoCounter {
+public:
+	MaliVideoCounter(MaliVideoCounter *next, const char *name, const MaliVideoCounterType type, const int id) : mNext(next), mName(name), mType(type), mId(id), mKey(getEventKey()), mEnabled(false) {
+	}
+
+	~MaliVideoCounter() {
+		delete mName;
+	}
+
+	MaliVideoCounter *getNext() const { return mNext; }
+	const char *getName() const { return mName; }
+	MaliVideoCounterType getType() const { return mType; }
+	int getId() const { return mId; }
+	int getKey() const { return mKey; }
+	bool isEnabled() const { return mEnabled; }
+	void setEnabled(const bool enabled) { mEnabled = enabled; }
+
+private:
+	MaliVideoCounter *const mNext;
+	const char *const mName;
+	const MaliVideoCounterType mType;
+	// Mali Video id
+	const int mId;
+	// Streamline key
+	const int mKey;
+	bool mEnabled;
+};
+
+MaliVideoDriver::MaliVideoDriver() : mCounters(NULL), mActivityCount(0) {
+}
+
+MaliVideoDriver::~MaliVideoDriver() {
+	while (mCounters != NULL) {
+		MaliVideoCounter *counter = mCounters;
+		mCounters = counter->getNext();
+		delete counter;
+	}
+}
+
+void MaliVideoDriver::setup(mxml_node_t *const xml) {
+	// hwmon does not currently work with perf
+	if (gSessionData->perf.isSetup()) {
+		return;
+	}
+
+	mxml_node_t *node = xml;
+	while (true) {
+		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
+		if (node == NULL) {
+			break;
+		}
+		const char *counter = mxmlElementGetAttr(node, "counter");
+		if (counter == NULL) {
+			// Ignore
+		} else if (strncmp(counter, COUNTER, sizeof(COUNTER) - 1) == 0) {
+			const int i = strtol(counter + sizeof(COUNTER) - 1, NULL, 10);
+			mCounters = new MaliVideoCounter(mCounters, strdup(counter), MVCT_COUNTER, i);
+		} else if (strncmp(counter, EVENT, sizeof(EVENT) - 1) == 0) {
+			const int i = strtol(counter + sizeof(EVENT) - 1, NULL, 10);
+			mCounters = new MaliVideoCounter(mCounters, strdup(counter), MVCT_EVENT, i);
+		} else if (strcmp(counter, ACTIVITY) == 0) {
+			mCounters = new MaliVideoCounter(mCounters, strdup(ACTIVITY), MVCT_ACTIVITY, 0);
+			mActivityCount = 0;
+			while (true) {
+				char buf[32];
+				snprintf(buf, sizeof(buf), "activity%i", mActivityCount + 1);
+				if (mxmlElementGetAttr(node, buf) == NULL) {
+					break;
+				}
+				++mActivityCount;
+			}
+		}
+	}
+}
+
+MaliVideoCounter *MaliVideoDriver::findCounter(const Counter &counter) const {
+	for (MaliVideoCounter *maliVideoCounter = mCounters; maliVideoCounter != NULL; maliVideoCounter = maliVideoCounter->getNext()) {
+		if (strcmp(maliVideoCounter->getName(), counter.getType()) == 0) {
+			return maliVideoCounter;
+		}
+	}
+
+	return NULL;
+}
+
+bool MaliVideoDriver::claimCounter(const Counter &counter) const {
+	return findCounter(counter) != NULL;
+}
+
+bool MaliVideoDriver::countersEnabled() const {
+	for (MaliVideoCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		if (counter->isEnabled()) {
+			return true;
+		}
+	}
+	return false;
+}
+
+void MaliVideoDriver::resetCounters() {
+	for (MaliVideoCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		counter->setEnabled(false);
+	}
+}
+
+void MaliVideoDriver::setupCounter(Counter &counter) {
+	MaliVideoCounter *const maliVideoCounter = findCounter(counter);
+	if (maliVideoCounter == NULL) {
+		counter.setEnabled(false);
+		return;
+	}
+	maliVideoCounter->setEnabled(true);
+	counter.setKey(maliVideoCounter->getKey());
+}
+
+int MaliVideoDriver::writeCounters(mxml_node_t *root) const {
+	if (access("/dev/mv500", F_OK) != 0) {
+		return 0;
+	}
+
+	int count = 0;
+	for (MaliVideoCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		mxml_node_t *node = mxmlNewElement(root, "counter");
+		mxmlElementSetAttr(node, "name", counter->getName());
+		++count;
+	}
+
+	return count;
+}
+
+void MaliVideoDriver::marshalEnable(const MaliVideoCounterType type, char *const buf, const size_t bufsize, int &pos) {
+	// size
+	int numEnabled = 0;
+	for (MaliVideoCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		if (counter->isEnabled() && (counter->getType() == type)) {
+			++numEnabled;
+		}
+	}
+	Buffer::packInt(buf, bufsize, pos, numEnabled*sizeof(uint32_t));
+	for (MaliVideoCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		if (counter->isEnabled() && (counter->getType() == type)) {
+			Buffer::packInt(buf, bufsize, pos, counter->getId());
+		}
+	}
+}
+
+bool MaliVideoDriver::start(const int mveUds) {
+	char buf[256];
+	int pos = 0;
+
+	// code - MVE_INSTR_STARTUP
+	buf[pos++] = 'C';
+	buf[pos++] = 'L';
+	buf[pos++] = 'N';
+	buf[pos++] = 'T';
+	// size
+	Buffer::packInt(buf, sizeof(buf), pos, sizeof(uint32_t));
+	// client_version_number
+	Buffer::packInt(buf, sizeof(buf), pos, 1);
+
+	// code - MVE_INSTR_CONFIGURE
+	buf[pos++] = 'C';
+	buf[pos++] = 'N';
+	buf[pos++] = 'F';
+	buf[pos++] = 'G';
+	// size
+	Buffer::packInt(buf, sizeof(buf), pos, 5*sizeof(uint32_t));
+	// configuration
+	Buffer::packInt(buf, sizeof(buf), pos, MVE_INSTR_COUNTERS | MVE_INSTR_EVENTS | MVE_INSTR_ACTIVITIES | MVE_INSTR_PACKED_COMM);
+	// communication_protocol_version
+	Buffer::packInt(buf, sizeof(buf), pos, 1);
+	// data_protocol_version
+	Buffer::packInt(buf, sizeof(buf), pos, 1);
+	// sample_rate - convert samples/second to ms/sample
+	Buffer::packInt(buf, sizeof(buf), pos, 1000/gSessionData->mSampleRate);
+	// live_rate - convert ns/flush to ms/flush
+	Buffer::packInt(buf, sizeof(buf), pos, gSessionData->mLiveRate/1000000);
+
+	// code - MVE_INSTR_ENABLE_COUNTERS
+	buf[pos++] = 'C';
+	buf[pos++] = 'F';
+	buf[pos++] = 'G';
+	buf[pos++] = 'c';
+	marshalEnable(MVCT_COUNTER, buf, sizeof(buf), pos);
+
+	// code - MVE_INSTR_ENABLE_EVENTS
+	buf[pos++] = 'C';
+	buf[pos++] = 'F';
+	buf[pos++] = 'G';
+	buf[pos++] = 'e';
+	marshalEnable(MVCT_EVENT, buf, sizeof(buf), pos);
+
+	/*
+	// code - MVE_INSTR_ENABLE_ACTIVITIES
+	buf[pos++] = 'C';
+	buf[pos++] = 'F';
+	buf[pos++] = 'G';
+	buf[pos++] = 'a';
+	// size
+	Buffer::packInt(buf, sizeof(buf), pos, mActivityCount*sizeof(uint32_t));
+	for (int i = 0; i < mActivityCount; ++i) {
+		// activity_id
+		Buffer::packInt(buf, sizeof(buf), pos, i);
+	}
+	*/
+
+	int written = 0;
+	while (written < pos) {
+		size_t bytes = ::write(mveUds, buf + written, pos - written);
+		if (bytes <= 0) {
+			logg->logMessage("%s(%s:%i): write failed", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+		written += bytes;
+	}
+
+	return true;
+}
diff --git a/tools/gator/daemon/MaliVideoDriver.h b/tools/gator/daemon/MaliVideoDriver.h
new file mode 100644
index 000000000000..00cb80889a74
--- /dev/null
+++ b/tools/gator/daemon/MaliVideoDriver.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MALIVIDEODRIVER_H
+#define MALIVIDEODRIVER_H
+
+#include "Driver.h"
+
+class MaliVideoCounter;
+
+enum MaliVideoCounterType {
+	MVCT_COUNTER,
+	MVCT_EVENT,
+	MVCT_ACTIVITY,
+};
+
+class MaliVideoDriver : public Driver {
+public:
+	MaliVideoDriver();
+	~MaliVideoDriver();
+
+	void setup(mxml_node_t *const xml);
+
+	bool claimCounter(const Counter &counter) const;
+	bool countersEnabled() const;
+	void resetCounters();
+	void setupCounter(Counter &counter);
+
+	int writeCounters(mxml_node_t *root) const;
+
+	bool start(const int mveUds);
+
+private:
+	MaliVideoCounter *findCounter(const Counter &counter) const;
+	void marshalEnable(const MaliVideoCounterType type, char *const buf, const size_t bufsize, int &pos);
+
+	MaliVideoCounter *mCounters;
+	int mActivityCount;
+
+	// Intentionally unimplemented
+	MaliVideoDriver(const MaliVideoDriver &);
+	MaliVideoDriver &operator=(const MaliVideoDriver &);
+};
+
+#endif // MALIVIDEODRIVER_H
diff --git a/tools/gator/daemon/Monitor.cpp b/tools/gator/daemon/Monitor.cpp
index 90d5c47706c7..b34a15f0eb0c 100644
--- a/tools/gator/daemon/Monitor.cpp
+++ b/tools/gator/daemon/Monitor.cpp
@@ -18,8 +18,15 @@ Monitor::Monitor() : mFd(-1) {
 }
 
 Monitor::~Monitor() {
-	if (mFd >= -1) {
-		close(mFd);
+	if (mFd >= 0) {
+		::close(mFd);
+	}
+}
+
+void Monitor::close() {
+	if (mFd >= 0) {
+		::close(mFd);
+		mFd = -1;
 	}
 }
 
diff --git a/tools/gator/daemon/Monitor.h b/tools/gator/daemon/Monitor.h
index 6e268b6e1bed..7194e0e4ca50 100644
--- a/tools/gator/daemon/Monitor.h
+++ b/tools/gator/daemon/Monitor.h
@@ -16,6 +16,7 @@ class Monitor {
 	Monitor();
 	~Monitor();
 
+	void close();
 	bool init();
 	bool add(const int fd);
 	int wait(struct epoll_event *const events, int maxevents, int timeout);
diff --git a/tools/gator/daemon/OlySocket.cpp b/tools/gator/daemon/OlySocket.cpp
index 26e4768f3934..28774e36e510 100644
--- a/tools/gator/daemon/OlySocket.cpp
+++ b/tools/gator/daemon/OlySocket.cpp
@@ -9,6 +9,7 @@
 #include "OlySocket.h"
 
 #include <stdio.h>
+#include <string.h>
 #ifdef WIN32
 #include <Winsock2.h>
 #include <ws2tcpip.h>
@@ -43,16 +44,18 @@ OlyServerSocket::OlyServerSocket(int port) {
   createServerSocket(port);
 }
 
-OlySocket::OlySocket(int port, const char* host) {
-  createClientSocket(host, port);
-}
-
 OlySocket::OlySocket(int socketID) : mSocketID(socketID) {
 }
 
 #ifndef WIN32
 
-OlyServerSocket::OlyServerSocket(const char* path) {
+#define MIN(A, B) ({ \
+  const __typeof__(A) __a = A; \
+  const __typeof__(B) __b = B; \
+  __a > __b ? __b : __a; \
+})
+
+OlyServerSocket::OlyServerSocket(const char* path, const size_t pathSize) {
   // Create socket
   mFDServer = socket(PF_UNIX, SOCK_STREAM, 0);
   if (mFDServer < 0) {
@@ -60,13 +63,11 @@ OlyServerSocket::OlyServerSocket(const char* path) {
     handleException();
   }
 
-  unlink(path);
-
   // Create sockaddr_in structure, ensuring non-populated fields are zero
   struct sockaddr_un sockaddr;
   memset((void*)&sockaddr, 0, sizeof(sockaddr));
   sockaddr.sun_family = AF_UNIX;
-  strncpy(sockaddr.sun_path, path, sizeof(sockaddr.sun_path) - 1);
+  memcpy(sockaddr.sun_path, path, MIN(pathSize, sizeof(sockaddr.sun_path)));
   sockaddr.sun_path[sizeof(sockaddr.sun_path) - 1] = '\0';
 
   // Bind the socket to an address
@@ -82,24 +83,25 @@ OlyServerSocket::OlyServerSocket(const char* path) {
   }
 }
 
-OlySocket::OlySocket(const char* path) {
-  mSocketID = socket(PF_UNIX, SOCK_STREAM, 0);
-  if (mSocketID < 0) {
-    return;
+int OlySocket::connect(const char* path, const size_t pathSize) {
+  int fd = socket(PF_UNIX, SOCK_STREAM, 0);
+  if (fd < 0) {
+    return -1;
   }
 
   // Create sockaddr_in structure, ensuring non-populated fields are zero
   struct sockaddr_un sockaddr;
   memset((void*)&sockaddr, 0, sizeof(sockaddr));
   sockaddr.sun_family = AF_UNIX;
-  strncpy(sockaddr.sun_path, path, sizeof(sockaddr.sun_path) - 1);
+  memcpy(sockaddr.sun_path, path, MIN(pathSize, sizeof(sockaddr.sun_path)));
   sockaddr.sun_path[sizeof(sockaddr.sun_path) - 1] = '\0';
 
-  if (connect(mSocketID, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)) < 0) {
-    close(mSocketID);
-    mSocketID = -1;
-    return;
+  if (::connect(fd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)) < 0) {
+    close(fd);
+    return -1;
   }
+
+  return fd;
 }
 
 #endif
@@ -137,47 +139,6 @@ void OlyServerSocket::closeServerSocket() {
   mFDServer = 0;
 }
 
-void OlySocket::createClientSocket(const char* hostname, int portno) {
-#ifdef WIN32
-  // TODO: Implement for Windows
-#else
-  char buf[32];
-  struct addrinfo hints, *res, *res0;
-
-  snprintf(buf, sizeof(buf), "%d", portno);
-  mSocketID = -1;
-  memset((void*)&hints, 0, sizeof(hints));
-  hints.ai_family = PF_UNSPEC;
-  hints.ai_socktype = SOCK_STREAM;
-
-  if (getaddrinfo(hostname, buf, &hints, &res0)) {
-    logg->logError(__FILE__, __LINE__, "Client socket failed to get address info for %s", hostname);
-    handleException();
-  }
-  for (res=res0; res!=NULL; res = res->ai_next) {
-    if ( res->ai_family != PF_INET || res->ai_socktype != SOCK_STREAM ) {
-      continue;
-    }
-    mSocketID = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
-    if (mSocketID < 0) {
-      continue;
-    }
-    if (connect(mSocketID, res->ai_addr, res->ai_addrlen) < 0) {
-      close(mSocketID);
-      mSocketID = -1;
-    }
-    if (mSocketID > 0) {
-      break;
-    }
-  }
-  freeaddrinfo(res0);
-  if (mSocketID <= 0) {
-    logg->logError(__FILE__, __LINE__, "Could not connect to client socket. Ensure ARM Streamline is running.");
-    handleException();
-  }
-#endif
-}
-
 void OlyServerSocket::createServerSocket(int port) {
   int family = AF_INET6;
 
diff --git a/tools/gator/daemon/OlySocket.h b/tools/gator/daemon/OlySocket.h
index eab786b304bf..20c67cc695e1 100644
--- a/tools/gator/daemon/OlySocket.h
+++ b/tools/gator/daemon/OlySocket.h
@@ -9,13 +9,15 @@
 #ifndef __OLY_SOCKET_H__
 #define __OLY_SOCKET_H__
 
+#include <stddef.h>
+
 class OlySocket {
 public:
-  OlySocket(int port, const char* hostname);
-  OlySocket(int socketID);
 #ifndef WIN32
-  OlySocket(const char* path);
+  static int connect(const char* path, const size_t pathSize);
 #endif
+
+  OlySocket(int socketID);
   ~OlySocket();
 
   void closeSocket();
@@ -29,21 +31,21 @@ class OlySocket {
 
 private:
   int mSocketID;
-
-  void createClientSocket(const char* hostname, int port);
 };
 
 class OlyServerSocket {
 public:
   OlyServerSocket(int port);
 #ifndef WIN32
-  OlyServerSocket(const char* path);
+  OlyServerSocket(const char* path, const size_t pathSize);
 #endif
   ~OlyServerSocket();
 
   int acceptConnection();
   void closeServerSocket();
 
+  int getFd() { return mFDServer; }
+
 private:
   int mFDServer;
 
diff --git a/tools/gator/daemon/PerfDriver.cpp b/tools/gator/daemon/PerfDriver.cpp
index 8e25c22f6798..ac97a077d266 100644
--- a/tools/gator/daemon/PerfDriver.cpp
+++ b/tools/gator/daemon/PerfDriver.cpp
@@ -11,6 +11,7 @@
 #include <dirent.h>
 #include <sys/utsname.h>
 #include <time.h>
+#include <unistd.h>
 
 #include "Buffer.h"
 #include "Config.h"
@@ -30,7 +31,7 @@
 struct gator_cpu {
 	const int cpuid;
 	// Human readable name
-	const char core_name[32];
+	const char *const core_name;
 	// gatorfs event and Perf PMU name
 	const char *const pmnc_name;
 	const int pmnc_counters;
@@ -62,9 +63,20 @@ static const struct gator_cpu gator_cpus[] = {
 static const char OLD_PMU_PREFIX[] = "ARMv7 Cortex-";
 static const char NEW_PMU_PREFIX[] = "ARMv7_Cortex_";
 
+struct uncore_counter {
+	// gatorfs event and Perf PMU name
+	const char *const name;
+	const int count;
+};
+
+static const struct uncore_counter uncore_counters[] = {
+	{ "CCI_400", 4 },
+	{ "CCI_400-r1", 4 },
+};
+
 class PerfCounter {
 public:
-	PerfCounter(PerfCounter *next, const char *name, uint32_t type, uint64_t config) : mNext(next), mName(name), mType(type), mCount(0), mKey(getEventKey()), mConfig(config), mEnabled(false) {}
+	PerfCounter(PerfCounter *next, const char *name, uint32_t type, uint64_t config, bool perCpu) : mNext(next), mName(name), mType(type), mCount(0), mKey(getEventKey()), mConfig(config), mEnabled(false), mPerCpu(perCpu) {}
 	~PerfCounter() {
 		delete [] mName;
 	}
@@ -79,6 +91,7 @@ public:
 	void setConfig(const uint64_t config) { mConfig = config; }
 	bool isEnabled() const { return mEnabled; }
 	void setEnabled(const bool enabled) { mEnabled = enabled; }
+	bool isPerCpu() const { return mPerCpu; }
 
 private:
 	PerfCounter *const mNext;
@@ -87,10 +100,11 @@ private:
 	int mCount;
 	const int mKey;
 	uint64_t mConfig;
-	bool mEnabled;
+	int mEnabled : 1,
+		mPerCpu : 1;
 };
 
-PerfDriver::PerfDriver() : mCounters(NULL), mIsSetup(false) {
+PerfDriver::PerfDriver() : mCounters(NULL), mIsSetup(false), mLegacySupport(false) {
 }
 
 PerfDriver::~PerfDriver() {
@@ -105,13 +119,27 @@ void PerfDriver::addCpuCounters(const char *const counterName, const int type, c
 	int len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1;
 	char *name = new char[len];
 	snprintf(name, len, "%s_ccnt", counterName);
-	mCounters = new PerfCounter(mCounters, name, type, -1);
+	mCounters = new PerfCounter(mCounters, name, type, -1, true);
 
 	for (int j = 0; j < numCounters; ++j) {
 		len = snprintf(NULL, 0, "%s_cnt%d", counterName, j) + 1;
 		name = new char[len];
 		snprintf(name, len, "%s_cnt%d", counterName, j);
-		mCounters = new PerfCounter(mCounters, name, type, -1);
+		mCounters = new PerfCounter(mCounters, name, type, -1, true);
+	}
+}
+
+void PerfDriver::addUncoreCounters(const char *const counterName, const int type, const int numCounters) {
+	int len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1;
+	char *name = new char[len];
+	snprintf(name, len, "%s_ccnt", counterName);
+	mCounters = new PerfCounter(mCounters, name, type, -1, false);
+
+	for (int j = 0; j < numCounters; ++j) {
+		len = snprintf(NULL, 0, "%s_cnt%d", counterName, j) + 1;
+		name = new char[len];
+		snprintf(name, len, "%s_cnt%d", counterName, j);
+		mCounters = new PerfCounter(mCounters, name, type, -1, false);
 	}
 }
 
@@ -139,10 +167,16 @@ bool PerfDriver::setup() {
 		}
 	}
 
-	if (KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 12, 0)) {
+	if (KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 4, 0)) {
 		logg->logMessage("%s(%s:%i): Unsupported kernel version", __FUNCTION__, __FILE__, __LINE__);
 		return false;
 	}
+	mLegacySupport = KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 12, 0);
+
+	if (access(EVENTS_PATH, R_OK) != 0) {
+		logg->logMessage("%s(%s:%i): " EVENTS_PATH " does not exist, is CONFIG_TRACING enabled?", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
 
 	// Add supported PMUs
 	bool foundCpu = false;
@@ -174,6 +208,21 @@ bool PerfDriver::setup() {
 			foundCpu = true;
 			addCpuCounters(gator_cpus[i].pmnc_name, type, gator_cpus[i].pmnc_counters);
 		}
+
+		for (int i = 0; i < ARRAY_LENGTH(uncore_counters); ++i) {
+			if (strcmp(dirent->d_name, uncore_counters[i].name) != 0) {
+				continue;
+			}
+
+			int type;
+			char buf[256];
+			snprintf(buf, sizeof(buf), PERF_DEVICES "/%s/type", dirent->d_name);
+			if (DriverSource::readIntDriver(buf, &type) != 0) {
+				continue;
+			}
+
+			addUncoreCounters(uncore_counters[i].name, type, uncore_counters[i].count);
+		}
 	}
 	closedir(dir);
 
@@ -203,12 +252,12 @@ bool PerfDriver::setup() {
 
 	id = getTracepointId("irq/softirq_exit", &printb);
 	if (id >= 0) {
-		mCounters = new PerfCounter(mCounters, "Linux_irq_softirq", PERF_TYPE_TRACEPOINT, id);
+		mCounters = new PerfCounter(mCounters, "Linux_irq_softirq", PERF_TYPE_TRACEPOINT, id, true);
 	}
 
 	id = getTracepointId("irq/irq_handler_exit", &printb);
 	if (id >= 0) {
-		mCounters = new PerfCounter(mCounters, "Linux_irq_irq", PERF_TYPE_TRACEPOINT, id);
+		mCounters = new PerfCounter(mCounters, "Linux_irq_irq", PERF_TYPE_TRACEPOINT, id, true);
 	}
 
 	//Linux_block_rq_wr
@@ -218,7 +267,7 @@ bool PerfDriver::setup() {
 
 	id = getTracepointId(SCHED_SWITCH, &printb);
 	if (id >= 0) {
-		mCounters = new PerfCounter(mCounters, "Linux_sched_switch", PERF_TYPE_TRACEPOINT, id);
+		mCounters = new PerfCounter(mCounters, "Linux_sched_switch", PERF_TYPE_TRACEPOINT, id, true);
 	}
 
 	//Linux_meminfo_memused
@@ -227,7 +276,7 @@ bool PerfDriver::setup() {
 	//Linux_power_cpu_freq
 	//Linux_power_cpu_idle
 
-	mCounters = new PerfCounter(mCounters, "Linux_cpu_wait_contention", TYPE_DERIVED, -1);
+	mCounters = new PerfCounter(mCounters, "Linux_cpu_wait_contention", TYPE_DERIVED, -1, false);
 
 	//Linux_cpu_wait_io
 
@@ -252,15 +301,16 @@ bool PerfDriver::summary(Buffer *const buffer) {
 	}
 	const int64_t timestamp = (int64_t)ts.tv_sec * 1000000000L + ts.tv_nsec;
 
-	if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
-		logg->logMessage("%s(%s:%i): clock_gettime failed", __FUNCTION__, __FILE__, __LINE__);
-		return false;
-	}
-	const int64_t uptime = (int64_t)ts.tv_sec * 1000000000L + ts.tv_nsec;
+	const int64_t uptime = getTime();
 
 	buffer->summary(timestamp, uptime, 0, buf);
 
 	for (int i = 0; i < gSessionData->mCores; ++i) {
+		// Don't send information on a cpu we know nothing about
+		if (gSessionData->mCpuIds[i] == -1) {
+			continue;
+		}
+
 		int j;
 		for (j = 0; j < ARRAY_LENGTH(gator_cpus); ++j) {
 			if (gator_cpus[j].cpuid == gSessionData->mCpuIds[i]) {
@@ -270,7 +320,11 @@ bool PerfDriver::summary(Buffer *const buffer) {
 		if (gator_cpus[j].cpuid == gSessionData->mCpuIds[i]) {
 			buffer->coreName(i, gSessionData->mCpuIds[i], gator_cpus[j].core_name);
 		} else {
-			snprintf(buf, sizeof(buf), "Unknown (0x%.3x)", gSessionData->mCpuIds[i]);
+			if (gSessionData->mCpuIds[i] == -1) {
+				snprintf(buf, sizeof(buf), "Unknown");
+			} else {
+				snprintf(buf, sizeof(buf), "Unknown (0x%.3x)", gSessionData->mCpuIds[i]);
+			}
 			buffer->coreName(i, gSessionData->mCpuIds[i], buf);
 		}
 	}
@@ -326,10 +380,10 @@ int PerfDriver::writeCounters(mxml_node_t *root) const {
 	return count;
 }
 
-bool PerfDriver::enable(PerfGroup *group, Buffer *const buffer) const {
+bool PerfDriver::enable(PerfGroup *const group, Buffer *const buffer) const {
 	for (PerfCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
 		if (counter->isEnabled() && (counter->getType() != TYPE_DERIVED)) {
-			if (!group->add(buffer, counter->getKey(), counter->getType(), counter->getConfig(), counter->getCount(), 0, 0)) {
+			if (!group->add(buffer, counter->getKey(), counter->getType(), counter->getConfig(), counter->getCount(), counter->getCount() > 0 ? PERF_SAMPLE_TID | PERF_SAMPLE_IP : 0, counter->isPerCpu() ? PERF_GROUP_PER_CPU : 0)) {
 				logg->logMessage("%s(%s:%i): PerfGroup::add failed", __FUNCTION__, __FILE__, __LINE__);
 				return false;
 			}
diff --git a/tools/gator/daemon/PerfDriver.h b/tools/gator/daemon/PerfDriver.h
index 3181b74f5570..2cae575a7059 100644
--- a/tools/gator/daemon/PerfDriver.h
+++ b/tools/gator/daemon/PerfDriver.h
@@ -27,6 +27,8 @@ class PerfDriver : public Driver {
 	PerfDriver();
 	~PerfDriver();
 
+	bool getLegacySupport() const { return mLegacySupport; }
+
 	bool setup();
 	bool summary(Buffer *const buffer);
 	bool isSetup() const { return mIsSetup; }
@@ -37,16 +39,18 @@ class PerfDriver : public Driver {
 
 	int writeCounters(mxml_node_t *root) const;
 
-	bool enable(PerfGroup *group, Buffer *const buffer) const;
+	bool enable(PerfGroup *const group, Buffer *const buffer) const;
 
 	static long long getTracepointId(const char *const name, DynBuf *const printb);
 
 private:
 	PerfCounter *findCounter(const Counter &counter) const;
 	void addCpuCounters(const char *const counterName, const int type, const int numCounters);
+	void addUncoreCounters(const char *const counterName, const int type, const int numCounters);
 
 	PerfCounter *mCounters;
 	bool mIsSetup;
+	bool mLegacySupport;
 
 	// Intentionally undefined
 	PerfDriver(const PerfDriver &);
diff --git a/tools/gator/daemon/PerfGroup.cpp b/tools/gator/daemon/PerfGroup.cpp
index faf5fcaf15e6..2a0239f7c348 100644
--- a/tools/gator/daemon/PerfGroup.cpp
+++ b/tools/gator/daemon/PerfGroup.cpp
@@ -23,7 +23,9 @@
 #define DEFAULT_PEA_ARGS(pea, additionalSampleType) \
 	pea.size = sizeof(pea); \
 	/* Emit time, read_format below, group leader id, and raw tracepoint info */ \
-	pea.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_IDENTIFIER | additionalSampleType; \
+	pea.sample_type = (gSessionData->perf.getLegacySupport() \
+										 ? PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_ID \
+										 : PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_IDENTIFIER ) | additionalSampleType; \
 	/* Emit emit value in group format */ \
 	pea.read_format = PERF_FORMAT_ID | PERF_FORMAT_GROUP; \
 	/* start out disabled */ \
@@ -39,6 +41,7 @@ static int sys_perf_event_open(struct perf_event_attr *const attr, const pid_t p
 
 PerfGroup::PerfGroup(PerfBuffer *const pb) : mPb(pb) {
 	memset(&mAttrs, 0, sizeof(mAttrs));
+	memset(&mPerCpu, 0, sizeof(mPerCpu));
 	memset(&mKeys, -1, sizeof(mKeys));
 	memset(&mFds, -1, sizeof(mFds));
 }
@@ -75,6 +78,7 @@ bool PerfGroup::add(Buffer *const buffer, const int key, const __u32 type, const
 	mAttrs[i].freq = (flags & PERF_GROUP_FREQ ? 1 : 0);
 	mAttrs[i].task = (flags & PERF_GROUP_TASK ? 1 : 0);
 	mAttrs[i].sample_id_all = (flags & PERF_GROUP_SAMPLE_ID_ALL ? 1 : 0);
+	mPerCpu[i] = (flags & PERF_GROUP_PER_CPU);
 
 	mKeys[i] = key;
 
@@ -91,13 +95,17 @@ bool PerfGroup::prepareCPU(const int cpu) {
 			continue;
 		}
 
+		if ((cpu != 0) && !mPerCpu[i]) {
+			continue;
+		}
+
 		const int offset = i * gSessionData->mCores;
 		if (mFds[cpu + offset] >= 0) {
 			logg->logMessage("%s(%s:%i): cpu already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__);
 			return false;
 		}
 
-		logg->logMessage("%s(%s:%i): perf_event_open cpu: %i type: %lli config: %lli sample: %lli sample_type: %lli", __FUNCTION__, __FILE__, __LINE__, cpu, (long long)mAttrs[i].type, (long long)mAttrs[i].config, (long long)mAttrs[i].sample_period, (long long)mAttrs[i].sample_type);
+		logg->logMessage("%s(%s:%i): perf_event_open cpu: %i type: %lli config: %lli sample: %lli sample_type: 0x%llx pinned: %i mmap: %i comm: %i freq: %i task: %i sample_id_all: %i", __FUNCTION__, __FILE__, __LINE__, cpu, (long long)mAttrs[i].type, (long long)mAttrs[i].config, (long long)mAttrs[i].sample_period, (long long)mAttrs[i].sample_type, mAttrs[i].pinned, mAttrs[i].mmap, mAttrs[i].comm, mAttrs[i].freq, mAttrs[i].task, mAttrs[i].sample_id_all);
 		mFds[cpu + offset] = sys_perf_event_open(&mAttrs[i], -1, cpu, i == 0 ? -1 : mFds[cpu], i == 0 ? 0 : PERF_FLAG_FD_OUTPUT);
 		if (mFds[cpu + offset] < 0) {
 			logg->logMessage("%s(%s:%i): failed %s", __FUNCTION__, __FILE__, __LINE__, strerror(errno));
@@ -125,7 +133,9 @@ int PerfGroup::onlineCPU(const int cpu, const bool start, Buffer *const buffer,
 		}
 
 		coreKeys[idCount] = mKeys[i];
-		if (ioctl(fd, PERF_EVENT_IOC_ID, &ids[idCount]) != 0) {
+		if (!gSessionData->perf.getLegacySupport() && ioctl(fd, PERF_EVENT_IOC_ID, &ids[idCount]) != 0 &&
+				// Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works
+				ioctl(fd, (PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT), &ids[idCount]) != 0) {
 			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
 			return false;
 		}
@@ -137,7 +147,17 @@ int PerfGroup::onlineCPU(const int cpu, const bool start, Buffer *const buffer,
 		return false;
 	}
 
-	buffer->keys(idCount, ids, coreKeys);
+	if (!gSessionData->perf.getLegacySupport()) {
+		buffer->keys(idCount, ids, coreKeys);
+	} else {
+		char buf[1024];
+		ssize_t bytes = read(mFds[cpu], buf, sizeof(buf));
+		if (bytes < 0) {
+			logg->logMessage("read failed");
+			return false;
+		}
+		buffer->keysOld(idCount, coreKeys, bytes, buf);
+	}
 
 	if (start) {
 		for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
diff --git a/tools/gator/daemon/PerfGroup.h b/tools/gator/daemon/PerfGroup.h
index af496d41334c..3f1e2bb4d1c8 100644
--- a/tools/gator/daemon/PerfGroup.h
+++ b/tools/gator/daemon/PerfGroup.h
@@ -24,6 +24,7 @@ enum PerfGroupFlags {
 	PERF_GROUP_FREQ          = 1 << 2,
 	PERF_GROUP_TASK          = 1 << 3,
 	PERF_GROUP_SAMPLE_ID_ALL = 1 << 4,
+	PERF_GROUP_PER_CPU       = 1 << 5,
 };
 
 class PerfGroup {
@@ -43,6 +44,7 @@ class PerfGroup {
 private:
 	// +1 for the group leader
 	struct perf_event_attr mAttrs[MAX_PERFORMANCE_COUNTERS + 1];
+	bool mPerCpu[MAX_PERFORMANCE_COUNTERS + 1];
 	int mKeys[MAX_PERFORMANCE_COUNTERS + 1];
 	int mFds[NR_CPUS * (MAX_PERFORMANCE_COUNTERS + 1)];
 	PerfBuffer *const mPb;
diff --git a/tools/gator/daemon/PerfSource.cpp b/tools/gator/daemon/PerfSource.cpp
index 1f1cb1988f00..ecfaa66832bd 100644
--- a/tools/gator/daemon/PerfSource.cpp
+++ b/tools/gator/daemon/PerfSource.cpp
@@ -37,7 +37,7 @@ static bool sendTracepointFormat(Buffer *const buffer, const char *const name, D
 	return true;
 }
 
-PerfSource::PerfSource(sem_t *senderSem, sem_t *startProfile) : mSummary(0, FRAME_SUMMARY, 1024, senderSem), mBuffer(0, FRAME_PERF_ATTRS, 1024*1024, senderSem), mCountersBuf(), mCountersGroup(&mCountersBuf), mMonitor(), mUEvent(), mSenderSem(senderSem), mStartProfile(startProfile), mInterruptFd(-1), mIsDone(false) {
+PerfSource::PerfSource(sem_t *senderSem, sem_t *startProfile) : mSummary(0, FRAME_SUMMARY, 1024, senderSem), mBuffer(0, FRAME_PERF_ATTRS, 4*1024*1024, senderSem), mCountersBuf(), mCountersGroup(&mCountersBuf), mMonitor(), mUEvent(), mSenderSem(senderSem), mStartProfile(startProfile), mInterruptFd(-1), mIsDone(false) {
 	long l = sysconf(_SC_PAGE_SIZE);
 	if (l < 0) {
 		logg->logError(__FILE__, __LINE__, "Unable to obtain the page size");
@@ -74,6 +74,9 @@ bool PerfSource::prepare() {
 	DynBuf b3;
 	long long schedSwitchId;
 
+	// Reread cpuinfo since cores may have changed since startup
+	gSessionData->readCpuInfo();
+
 	if (0
 			|| !mMonitor.init()
 			|| !mUEvent.init()
@@ -83,14 +86,14 @@ bool PerfSource::prepare() {
 			|| !sendTracepointFormat(&mBuffer, SCHED_SWITCH, &printb, &b1)
 
 			// Only want RAW but not IP on sched_switch and don't want TID on SAMPLE_ID
-			|| !mCountersGroup.add(&mBuffer, 100/**/, PERF_TYPE_TRACEPOINT, schedSwitchId, 1, PERF_SAMPLE_RAW, PERF_GROUP_MMAP | PERF_GROUP_COMM | PERF_GROUP_TASK | PERF_GROUP_SAMPLE_ID_ALL)
+			|| !mCountersGroup.add(&mBuffer, 100/**/, PERF_TYPE_TRACEPOINT, schedSwitchId, 1, PERF_SAMPLE_RAW, PERF_GROUP_MMAP | PERF_GROUP_COMM | PERF_GROUP_TASK | PERF_GROUP_SAMPLE_ID_ALL | PERF_GROUP_PER_CPU)
 
 			// Only want TID and IP but not RAW on timer
-			|| (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && !mCountersGroup.add(&mBuffer, 99/**/, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP, 0))
+			|| (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && !mCountersGroup.add(&mBuffer, 99/**/, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP, PERF_GROUP_PER_CPU))
 
 			|| !gSessionData->perf.enable(&mCountersGroup, &mBuffer)
 			|| 0) {
-		logg->logMessage("%s(%s:%i): perf setup failed, are you running Linux 3.12 or later?", __FUNCTION__, __FILE__, __LINE__);
+		logg->logMessage("%s(%s:%i): perf setup failed, are you running Linux 3.4 or later?", __FUNCTION__, __FILE__, __LINE__);
 		return false;
 	}
 
@@ -134,7 +137,7 @@ bool PerfSource::prepare() {
 		return false;
 	}
 
-	if (!readProc(&mBuffer, &printb, &b1, &b2, &b3)) {
+	if (!readProc(&mBuffer, true, &printb, &b1, &b2, &b3)) {
 		logg->logMessage("%s(%s:%i): readProc failed", __FUNCTION__, __FILE__, __LINE__);
 		return false;
 	}
@@ -260,6 +263,7 @@ bool PerfSource::isDone () {
 void PerfSource::write (Sender *sender) {
 	if (!mSummary.isDone()) {
 		mSummary.write(sender);
+		gSessionData->mSentSummary = true;
 	}
 	if (!mBuffer.isDone()) {
 		mBuffer.write(sender);
diff --git a/tools/gator/daemon/Proc.cpp b/tools/gator/daemon/Proc.cpp
index e0b9e2259cf9..9f01770d6609 100644
--- a/tools/gator/daemon/Proc.cpp
+++ b/tools/gator/daemon/Proc.cpp
@@ -57,14 +57,57 @@ static bool readProcStat(ProcStat *const ps, const char *const pathname, DynBuf
 	return true;
 }
 
-static bool readProcTask(Buffer *const buffer, const int pid, const char *const image, DynBuf *const printb, DynBuf *const b) {
+static const char *readProcExe(DynBuf *const printb, const int pid, const int tid, DynBuf *const b) {
+	if (tid == -1 ? !printb->printf("/proc/%i/exe", pid)
+			: !printb->printf("/proc/%i/task/%i/exe", pid, tid)) {
+		logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+		return NULL;
+	}
+
+	const int err = b->readlink(printb->getBuf());
+	const char *image;
+	if (err == 0) {
+		image = strrchr(b->getBuf(), '/');
+		if (image == NULL) {
+			image = b->getBuf();
+		} else {
+			++image;
+		}
+	} else if (err == -ENOENT) {
+		// readlink /proc/[pid]/exe returns ENOENT for kernel threads
+		image = "\0";
+	} else {
+		logg->logMessage("%s(%s:%i): DynBuf::readlink failed", __FUNCTION__, __FILE__, __LINE__);
+		return NULL;
+	}
+
+	// Android apps are run by app_process but the cmdline is changed to reference the actual app name
+	if (strcmp(image, "app_process") != 0) {
+		return image;
+	}
+
+	if (tid == -1 ? !printb->printf("/proc/%i/cmdline", pid)
+			: !printb->printf("/proc/%i/task/%i/cmdline", pid, tid)) {
+		logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+		return NULL;
+	}
+
+	if (!b->read(printb->getBuf())) {
+		logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the thread exited", __FUNCTION__, __FILE__, __LINE__);
+		return NULL;
+	}
+
+	return b->getBuf();
+}
+
+static bool readProcTask(Buffer *const buffer, const int pid, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2) {
 	bool result = false;
 
-	if (!b->printf("/proc/%i/task", pid)) {
+	if (!b1->printf("/proc/%i/task", pid)) {
 		logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
 		return result;
 	}
-	DIR *task = opendir(b->getBuf());
+	DIR *task = opendir(b1->getBuf());
 	if (task == NULL) {
 		logg->logMessage("%s(%s:%i): opendir failed", __FUNCTION__, __FILE__, __LINE__);
 		return result;
@@ -84,11 +127,17 @@ static bool readProcTask(Buffer *const buffer, const int pid, const char *const
 			goto fail;
 		}
 		ProcStat ps;
-		if (!readProcStat(&ps, printb->getBuf(), b)) {
+		if (!readProcStat(&ps, printb->getBuf(), b1)) {
 			logg->logMessage("%s(%s:%i): readProcStat failed", __FUNCTION__, __FILE__, __LINE__);
 			goto fail;
 		}
 
+		const char *const image = readProcExe(printb, pid, tid, b2);
+		if (image == NULL) {
+			logg->logMessage("%s(%s:%i): readImage failed", __FUNCTION__, __FILE__, __LINE__);
+			goto fail;
+		}
+
 		buffer->comm(pid, tid, image, ps.comm);
 	}
 
@@ -100,7 +149,7 @@ static bool readProcTask(Buffer *const buffer, const int pid, const char *const
 	return result;
 }
 
-bool readProc(Buffer *const buffer, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2, DynBuf *const b3) {
+bool readProc(Buffer *const buffer, bool sendMaps, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2, DynBuf *const b3) {
 	bool result = false;
 
 	DIR *proc = opendir("/proc");
@@ -128,42 +177,29 @@ bool readProc(Buffer *const buffer, DynBuf *const printb, DynBuf *const b1, DynB
 			goto fail;
 		}
 
-		if (!printb->printf("/proc/%i/exe", pid)) {
-			logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
-			goto fail;
-		}
-		const int err = b1->readlink(printb->getBuf());
-		const char *image;
-		if (err == 0) {
-			image = strrchr(b1->getBuf(), '/');
-			if (image == NULL) {
-				image = b1->getBuf();
-			} else {
-				++image;
+		if (sendMaps) {
+			if (!printb->printf("/proc/%i/maps", pid)) {
+				logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+				goto fail;
+			}
+			if (!b2->read(printb->getBuf())) {
+				logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the process exited", __FUNCTION__, __FILE__, __LINE__);
+				// This is not a fatal error - the process just doesn't exist any more
+				continue;
 			}
-		} else if (err == -ENOENT) {
-			// readlink /proc/[pid]/exe returns ENOENT for kernel threads
-			image = "\0";
-		} else {
-			logg->logMessage("%s(%s:%i): DynBuf::readlink failed", __FUNCTION__, __FILE__, __LINE__);
-			goto fail;
-		}
 
-		if (!printb->printf("/proc/%i/maps", pid)) {
-			logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
-			goto fail;
+			buffer->maps(pid, pid, b2->getBuf());
 		}
-		if (!b2->read(printb->getBuf())) {
-			logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the process exited", __FUNCTION__, __FILE__, __LINE__);
-			// This is not a fatal error - the process just doesn't exist any more
-			continue;
-		}
-
-		buffer->maps(pid, pid, b2->getBuf());
 		if (ps.numThreads <= 1) {
+			const char *const image = readProcExe(printb, pid, -1, b1);
+			if (image == NULL) {
+				logg->logMessage("%s(%s:%i): readImage failed", __FUNCTION__, __FILE__, __LINE__);
+				goto fail;
+			}
+
 			buffer->comm(pid, pid, image, ps.comm);
 		} else {
-			if (!readProcTask(buffer, pid, image, printb, b3)) {
+			if (!readProcTask(buffer, pid, printb, b1, b3)) {
 				logg->logMessage("%s(%s:%i): readProcTask failed", __FUNCTION__, __FILE__, __LINE__);
 				goto fail;
 			}
diff --git a/tools/gator/daemon/Proc.h b/tools/gator/daemon/Proc.h
index 057b6109848a..31c2eecb7aeb 100644
--- a/tools/gator/daemon/Proc.h
+++ b/tools/gator/daemon/Proc.h
@@ -12,6 +12,6 @@
 class Buffer;
 class DynBuf;
 
-bool readProc(Buffer *const buffer, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2, DynBuf *const b3);
+bool readProc(Buffer *const buffer, bool sendMaps, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2, DynBuf *const b3);
 
 #endif // PROC_H
diff --git a/tools/gator/daemon/Sender.h b/tools/gator/daemon/Sender.h
index 4c359dba82f8..33b6cc3c5d8d 100644
--- a/tools/gator/daemon/Sender.h
+++ b/tools/gator/daemon/Sender.h
@@ -39,4 +39,4 @@ class Sender {
 	Sender &operator=(const Sender &);
 };
 
-#endif 	//__SENDER_H__
+#endif //__SENDER_H__
diff --git a/tools/gator/daemon/SessionData.cpp b/tools/gator/daemon/SessionData.cpp
index c169299af872..14d995fc39fa 100644
--- a/tools/gator/daemon/SessionData.cpp
+++ b/tools/gator/daemon/SessionData.cpp
@@ -9,6 +9,7 @@
 #include "SessionData.h"
 
 #include <string.h>
+#include <sys/mman.h>
 
 #include "SessionXML.h"
 #include "Logging.h"
@@ -27,6 +28,15 @@ void SessionData::initialize() {
 	mSessionIsActive = false;
 	mLocalCapture = false;
 	mOneShot = false;
+	mSentSummary = false;
+	const size_t cpuIdSize = sizeof(int)*NR_CPUS;
+	// Share mCpuIds across all instances of gatord
+	mCpuIds = (int *)mmap(NULL, cpuIdSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (mCpuIds == MAP_FAILED) {
+		logg->logError(__FILE__, __LINE__, "Unable to mmap shared memory for cpuids");
+		handleException();
+	}
+	memset(mCpuIds, -1, cpuIdSize);
 	readCpuInfo();
 	mConfigurationXMLPath = NULL;
 	mSessionXMLPath = NULL;
@@ -91,10 +101,9 @@ void SessionData::parseSessionXML(char* xmlString) {
 void SessionData::readCpuInfo() {
 	char temp[256]; // arbitrarily large amount
 	strcpy(mCoreName, "unknown");
-	memset(&mCpuIds, -1, sizeof(mCpuIds));
 	mMaxCpuId = -1;
 
-	FILE* f = fopen("/proc/cpuinfo", "r");	
+	FILE* f = fopen("/proc/cpuinfo", "r");
 	if (f == NULL) {
 		logg->logMessage("Error opening /proc/cpuinfo\n"
 			"The core name in the captured xml file will be 'unknown'.");
@@ -102,10 +111,18 @@ void SessionData::readCpuInfo() {
 	}
 
 	bool foundCoreName = false;
-	int processor = 0;
+	int processor = -1;
 	while (fgets(temp, sizeof(temp), f)) {
-		if (strlen(temp) > 0) {
-			temp[strlen(temp) - 1] = 0;	// Replace the line feed with a null
+		const size_t len = strlen(temp);
+
+		if (len == 1) {
+			// New section, clear the processor. Streamline will not know the cpus if the pre Linux 3.8 format of cpuinfo is encountered but also that no incorrect information will be transmitted.
+			processor = -1;
+			continue;
+		}
+
+		if (len > 0) {
+			temp[len - 1] = '\0';	// Replace the line feed with a null
 		}
 
 		const bool foundHardware = strstr(temp, "Hardware") != 0;
@@ -127,10 +144,15 @@ void SessionData::readCpuInfo() {
 			}
 
 			if (foundCPUPart) {
-				mCpuIds[processor] = strtol(position, NULL, 0);
+				const int cpuId = strtol(position, NULL, 0);
 				// If this does not have the full topology in /proc/cpuinfo, mCpuIds[0] may not have the 1 CPU part emitted - this guarantees it's in mMaxCpuId
-				if (mCpuIds[processor] > mMaxCpuId) {
-					mMaxCpuId = mCpuIds[processor];
+				if (cpuId > mMaxCpuId) {
+					mMaxCpuId = cpuId;
+				}
+				if (processor >= NR_CPUS) {
+					logg->logMessage("Too many processors, please increase NR_CPUS");
+				} else if (processor >= 0) {
+					mCpuIds[processor] = cpuId;
 				}
 			}
 
@@ -142,10 +164,23 @@ void SessionData::readCpuInfo() {
 
 	if (!foundCoreName) {
 		logg->logMessage("Could not determine core name from /proc/cpuinfo\n"
-						 "The core name in the captured xml file will be 'unknown'.");
+				 "The core name in the captured xml file will be 'unknown'.");
 	}
 	fclose(f);
- }
+}
+
+uint64_t getTime() {
+	struct timespec ts;
+#ifndef CLOCK_MONOTONIC_RAW
+	// Android doesn't have this defined but it was added in Linux 2.6.28
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) {
+		logg->logError(__FILE__, __LINE__, "Failed to get uptime");
+		handleException();
+	}
+	return (NS_PER_S*ts.tv_sec + ts.tv_nsec);
+}
 
 int getEventKey() {
 	// key 0 is reserved as a timestamp
diff --git a/tools/gator/daemon/SessionData.h b/tools/gator/daemon/SessionData.h
index ea34240e2df7..835082d86c4b 100644
--- a/tools/gator/daemon/SessionData.h
+++ b/tools/gator/daemon/SessionData.h
@@ -13,12 +13,16 @@
 
 #include "Config.h"
 #include "Counter.h"
+#include "FSDriver.h"
 #include "Hwmon.h"
+#include "MaliVideoDriver.h"
 #include "PerfDriver.h"
 
-#define PROTOCOL_VERSION	18
+#define PROTOCOL_VERSION	19
 #define PROTOCOL_DEV		1000	// Differentiates development versions (timestamp) from release versions
 
+#define NS_PER_S ((uint64_t)1000000000)
+
 struct ImageLinkList {
 	char* path;
 	struct ImageLinkList *next;
@@ -32,9 +36,12 @@ class SessionData {
 	~SessionData();
 	void initialize();
 	void parseSessionXML(char* xmlString);
+	void readCpuInfo();
 
 	Hwmon hwmon;
+	FSDriver fsDriver;
 	PerfDriver perf;
+	MaliVideoDriver maliVideo;
 
 	char mCoreName[MAX_STRING_LEN];
 	struct ImageLinkList *mImages;
@@ -49,7 +56,8 @@ class SessionData {
 	bool mLocalCapture;
 	bool mOneShot;		// halt processing of the driver data until profiling is complete or the buffer is filled
 	bool mIsEBS;
-	
+	bool mSentSummary;
+
 	int mBacktraceDepth;
 	int mTotalBufferSize;	// number of MB to use for the entire collection buffer
 	int mSampleRate;
@@ -57,7 +65,7 @@ class SessionData {
 	int mDuration;
 	int mCores;
 	int mPageSize;
-	int mCpuIds[NR_CPUS];
+	int *mCpuIds;
 	int mMaxCpuId;
 
 	// PMU Counters
@@ -65,8 +73,6 @@ class SessionData {
 	Counter mCounters[MAX_PERFORMANCE_COUNTERS];
 
 private:
-	void readCpuInfo();
-
 	// Intentionally unimplemented
 	SessionData(const SessionData &);
 	SessionData &operator=(const SessionData &);
@@ -74,6 +80,7 @@ class SessionData {
 
 extern SessionData* gSessionData;
 
+uint64_t getTime();
 int getEventKey();
 
 #endif // SESSION_DATA_H
diff --git a/tools/gator/daemon/SessionXML.cpp b/tools/gator/daemon/SessionXML.cpp
index 55b2f9280709..8cdc9409ca21 100644
--- a/tools/gator/daemon/SessionXML.cpp
+++ b/tools/gator/daemon/SessionXML.cpp
@@ -17,15 +17,15 @@
 #include "SessionData.h"
 
 static const char*	TAG_SESSION = "session";
-static const char*	TAG_IMAGE	= "image";
+static const char*	TAG_IMAGE   = "image";
 
-static const char*	ATTR_VERSION            = "version";		
+static const char*	ATTR_VERSION            = "version";
 static const char*	ATTR_CALL_STACK_UNWINDING = "call_stack_unwinding";
 static const char*	ATTR_BUFFER_MODE        = "buffer_mode";
-static const char*	ATTR_SAMPLE_RATE        = "sample_rate";	
+static const char*	ATTR_SAMPLE_RATE        = "sample_rate";
 static const char*	ATTR_DURATION           = "duration";
 static const char*	ATTR_PATH               = "path";
-static const char*	ATTR_LIVE_RATE      = "live_rate";
+static const char*	ATTR_LIVE_RATE          = "live_rate";
 
 SessionXML::SessionXML(const char *str) {
 	parameters.buffer_mode[0] = 0;
diff --git a/tools/gator/daemon/StreamlineSetup.cpp b/tools/gator/daemon/StreamlineSetup.cpp
index caa665e67193..2b61eaeb290d 100644
--- a/tools/gator/daemon/StreamlineSetup.cpp
+++ b/tools/gator/daemon/StreamlineSetup.cpp
@@ -266,7 +266,7 @@ void StreamlineSetup::writeConfiguration(char* xml) {
 	{ ConfigurationXML configuration; }
 
 	if (gSessionData->mCounterOverflow > 0) {
-		logg->logError(__FILE__, __LINE__, "Only %i performance counters counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, gSessionData->mCounterOverflow);
+		logg->logError(__FILE__, __LINE__, "Only %i performance counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, gSessionData->mCounterOverflow);
 		handleException();
 	}
 }
diff --git a/tools/gator/daemon/StreamlineSetup.h b/tools/gator/daemon/StreamlineSetup.h
index 74bb197e35ff..b380f46b98f0 100644
--- a/tools/gator/daemon/StreamlineSetup.h
+++ b/tools/gator/daemon/StreamlineSetup.h
@@ -21,7 +21,7 @@ enum {
 	COMMAND_APC_START   = 2,
 	COMMAND_APC_STOP    = 3,
 	COMMAND_DISCONNECT  = 4,
-	COMMAND_PING		= 5
+	COMMAND_PING        = 5
 };
 
 class StreamlineSetup {
@@ -47,4 +47,4 @@ class StreamlineSetup {
 	StreamlineSetup &operator=(const StreamlineSetup &);
 };
 
-#endif 	//__STREAMLINE_SETUP_H__
+#endif //__STREAMLINE_SETUP_H__
diff --git a/tools/gator/daemon/UEvent.cpp b/tools/gator/daemon/UEvent.cpp
index 282e965fa67a..54d45751e3c9 100644
--- a/tools/gator/daemon/UEvent.cpp
+++ b/tools/gator/daemon/UEvent.cpp
@@ -8,11 +8,12 @@
 
 #include "UEvent.h"
 
-#include <linux/netlink.h>
 #include <string.h>
 #include <sys/socket.h>
 #include <unistd.h>
 
+#include <linux/netlink.h>
+
 #include "Logging.h"
 
 static const char EMPTY[] = "";
diff --git a/tools/gator/daemon/UserSpaceSource.cpp b/tools/gator/daemon/UserSpaceSource.cpp
index debe69636cff..8c328e0e0fb5 100644
--- a/tools/gator/daemon/UserSpaceSource.cpp
+++ b/tools/gator/daemon/UserSpaceSource.cpp
@@ -16,7 +16,6 @@
 #include "Logging.h"
 #include "SessionData.h"
 
-#define NS_PER_S ((uint64_t)1000000000)
 #define NS_PER_US 1000
 
 extern Child *child;
@@ -35,6 +34,7 @@ void UserSpaceSource::run() {
 	prctl(PR_SET_NAME, (unsigned long)&"gatord-counters", 0, 0, 0);
 
 	gSessionData->hwmon.start();
+	gSessionData->fsDriver.start();
 
 	int64_t monotonic_started = 0;
 	while (monotonic_started <= 0) {
@@ -48,16 +48,7 @@ void UserSpaceSource::run() {
 
 	uint64_t next_time = 0;
 	while (gSessionData->mSessionIsActive) {
-		struct timespec ts;
-#ifndef CLOCK_MONOTONIC_RAW
-		// Android doesn't have this defined but it was added in Linux 2.6.28
-#define CLOCK_MONOTONIC_RAW 4
-#endif
-		if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) {
-			logg->logError(__FILE__, __LINE__, "Failed to get uptime");
-			handleException();
-		}
-		const uint64_t curr_time = (NS_PER_S*ts.tv_sec + ts.tv_nsec) - monotonic_started;
+		const uint64_t curr_time = getTime() - monotonic_started;
 		// Sample ten times a second ignoring gSessionData->mSampleRate
 		next_time += NS_PER_S/10;//gSessionData->mSampleRate;
 		if (next_time < curr_time) {
@@ -67,6 +58,7 @@ void UserSpaceSource::run() {
 
 		if (mBuffer.eventHeader(curr_time)) {
 			gSessionData->hwmon.read(&mBuffer);
+			gSessionData->fsDriver.read(&mBuffer);
 			// Only check after writing all counters so that time and corresponding counters appear in the same frame
 			mBuffer.check(curr_time);
 		}
diff --git a/tools/gator/daemon/UserSpaceSource.h b/tools/gator/daemon/UserSpaceSource.h
index fb5889d26ffb..9b3666016dc5 100644
--- a/tools/gator/daemon/UserSpaceSource.h
+++ b/tools/gator/daemon/UserSpaceSource.h
@@ -14,7 +14,7 @@
 #include "Buffer.h"
 #include "Source.h"
 
-// User space counters - currently just hwmon
+// User space counters
 class UserSpaceSource : public Source {
 public:
 	UserSpaceSource(sem_t *senderSem);
diff --git a/tools/gator/daemon/c++.cpp b/tools/gator/daemon/c++.cpp
new file mode 100644
index 000000000000..6041e5e96469
--- /dev/null
+++ b/tools/gator/daemon/c++.cpp
@@ -0,0 +1,40 @@
+/**
+ * Minimal set of C++ functions so that libstdc++ is not required
+ *
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+void operator delete(void *ptr) {
+  if (ptr != NULL) {
+    free(ptr);
+  }
+}
+
+void operator delete[](void *ptr) {
+  operator delete(ptr);
+}
+
+void *operator new(size_t size) {
+  void *ptr = malloc(size == 0 ? 1 : size);
+  if (ptr == NULL) {
+    abort();
+  }
+  return ptr;
+}
+
+void *operator new[](size_t size) {
+  return operator new(size);
+}
+
+extern "C"
+void __cxa_pure_virtual() {
+  printf("pure virtual method called\n");
+  abort();
+}
diff --git a/tools/gator/daemon/common.mk b/tools/gator/daemon/common.mk
index d9dc14606b07..769a92e51a35 100644
--- a/tools/gator/daemon/common.mk
+++ b/tools/gator/daemon/common.mk
@@ -5,16 +5,17 @@
 # -Werror treats warnings as errors
 # -std=c++0x is the planned new c++ standard
 # -std=c++98 is the 1998 c++ standard
-CFLAGS += -O3 -Wall -fno-exceptions -pthread -MMD -DETCDIR=\"/etc\" -Ilibsensors
+CPPFLAGS += -O3 -Wall -fno-exceptions -pthread -MMD -DETCDIR=\"/etc\" -Ilibsensors
 CXXFLAGS += -fno-rtti -Wextra # -Weffc++
 ifeq ($(WERROR),1)
-	CFLAGS += -Werror
+	CPPFLAGS += -Werror
 endif
 # -s strips the binary of debug info
 LDFLAGS += -s
+LDLIBS += -lrt -lm -pthread
 TARGET = gatord
 C_SRC = $(wildcard mxml/*.c) $(wildcard libsensors/*.c)
-CPP_SRC = $(wildcard *.cpp)
+CXX_SRC = $(wildcard *.cpp)
 
 all: $(TARGET)
 
@@ -35,14 +36,15 @@ libsensors/conf-parse.c: ;
 	./escape $< > $@
 
 %.o: %.c
-	$(GCC) -c $(CFLAGS) -o $@ $<
+	$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
 
 %.o: %.cpp
-	$(CPP) -c $(CFLAGS) $(CXXFLAGS) -o $@ $<
+	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
 
-$(TARGET): $(CPP_SRC:%.cpp=%.o) $(C_SRC:%.c=%.o)
-	$(CPP) $(LDFLAGS) -o $@ $^ -lrt -pthread
+$(TARGET): $(CXX_SRC:%.cpp=%.o) $(C_SRC:%.c=%.o)
+	$(CC) $(LDFLAGS) $^ $(LDLIBS) -o $@
 
+# Intentionally ignore CC as a native binary is required
 escape: escape.c
 	gcc $^ -o $@
 
diff --git a/tools/gator/daemon/defaults.xml b/tools/gator/daemon/defaults.xml
index 5bf096cb2a45..39a0f656f7e6 100644
--- a/tools/gator/daemon/defaults.xml
+++ b/tools/gator/daemon/defaults.xml
@@ -58,5 +58,10 @@
   <configuration counter="Linux_meminfo_memused"/>
   <configuration counter="Linux_meminfo_memfree"/>
   <configuration counter="Linux_power_cpu_freq"/>
+  <configuration counter="ARM_Mali-4xx_fragment"/>
+  <configuration counter="ARM_Mali-4xx_vertex"/>
+  <configuration counter="ARM_Mali-T6xx_fragment" cores="1"/>
+  <configuration counter="ARM_Mali-T6xx_vertex" cores="1"/>
+  <configuration counter="ARM_Mali-T6xx_opencl" cores="1"/>
   <configuration counter="L2C-310_cnt0" event="0x1"/>
 </configurations>
diff --git a/tools/gator/daemon/escape.c b/tools/gator/daemon/escape.c
index c54aa1c3e75d..2b0863aaf425 100644
--- a/tools/gator/daemon/escape.c
+++ b/tools/gator/daemon/escape.c
@@ -6,7 +6,7 @@
  * published by the Free Software Foundation.
  */
 
-/* 
+/*
  * The Makefile in the daemon folder builds and executes 'escape'
  * 'escape' creates configuration_xml.h from configuration.xml and events_xml.h from events-*.xml
  * these genereated xml files are then #included and built as part of the gatord binary
diff --git a/tools/gator/daemon/events-CCI-400.xml b/tools/gator/daemon/events-CCI-400.xml
index 4fa77117d2d8..20002efd1543 100644
--- a/tools/gator/daemon/events-CCI-400.xml
+++ b/tools/gator/daemon/events-CCI-400.xml
@@ -1,7 +1,6 @@
-  <counter_set name="cci-400_cnt" count="4"/>
-  <category name="CCI-400" counter_set="cci-400_cnt" per_cpu="no" supports_event_based_sampling="yes">
-    <event counter="cci-400_ccnt" event="0xff" title="CCI-400 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
-
+  <counter_set name="CCI_400_cnt" count="4"/>
+  <category name="CCI-400" counter_set="CCI_400_cnt" per_cpu="no" supports_event_based_sampling="yes">
+    <event counter="CCI_400_ccnt" event="0xff" title="CCI-400 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
     <option_set name="Slave">
       <option event_delta="0x00" name="S0" description="Slave interface 0"/>
       <option event_delta="0x20" name="S1" description="Slave interface 1"/>
@@ -9,7 +8,6 @@
       <option event_delta="0x60" name="S3" description="Slave interface 3"/>
       <option event_delta="0x80" name="S4" description="Slave interface 4"/>
     </option_set>
-
     <event event="0x00" option_set="Slave" title="CCI-400" name="Read: any" description="Read request handshake: any"/>
     <event event="0x01" option_set="Slave" title="CCI-400" name="Read: transaction" description="Read request handshake: device transaction"/>
     <event event="0x02" option_set="Slave" title="CCI-400" name="Read: normal" description="Read request handshake: normal, non-shareable or system-shareable, but not barrier or cache maintenance operation"/>
@@ -30,13 +28,11 @@
     <event event="0x11" option_set="Slave" title="CCI-400" name="Write: WriteLineUnique" description="Write request handshake: WriteLineUnique"/>
     <event event="0x12" option_set="Slave" title="CCI-400" name="Write: Evict" description="Write request handshake: Evict"/>
     <event event="0x13" option_set="Slave" title="CCI-400" name="Write stall: tracker full" description="Write request stall cycle because the transaction tracker is full. Increase SIx_W_MAX to avoid this stall"/>
-
     <option_set name="Master">
       <option event_delta="0xa0" name="M0" description="Master interface 0"/>
       <option event_delta="0xc0" name="M1" description="Master interface 1"/>
       <option event_delta="0xe0" name="M2" description="Master interface 2"/>
     </option_set>
-
     <event event="0x14" option_set="Master" title="CCI-400" name="Retry fetch" description="RETRY of speculative fetch transaction"/>
     <event event="0x15" option_set="Master" title="CCI-400" name="Read stall: address hazard" description="Read request stall cycle because of an address hazard"/>
     <event event="0x16" option_set="Master" title="CCI-400" name="Read stall: ID hazard" description="Read request stall cycle because of an ID hazard"/>
@@ -45,11 +41,9 @@
     <event event="0x19" option_set="Master" title="CCI-400" name="Write stall: barrier hazard" description="Write request stall cycle because of a barrier hazard"/>
     <event event="0x1a" option_set="Master" title="CCI-400" name="Write stall: tracker full" description="Write request stall cycle because the transaction tracker is full. Increase MIx_W_MAX to avoid this stall. See the CoreLink CCI-400 Cache Coherent Interconnect Integration Manual"/>
   </category>
-
-  <counter_set name="cci-400-r1_cnt" count="4"/>
-  <category name="CCI-400" counter_set="cci-400-r1_cnt" per_cpu="no" supports_event_based_sampling="yes">
-    <event counter="cci-400-r1_ccnt" event="0xff" title="CCI-400 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
-
+  <counter_set name="CCI_400-r1_cnt" count="4"/>
+  <category name="CCI-400" counter_set="CCI_400-r1_cnt" per_cpu="no" supports_event_based_sampling="yes">
+    <event counter="CCI_400-r1_ccnt" event="0xff" title="CCI-400 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
     <option_set name="Slave">
       <option event_delta="0x00" name="S0" description="Slave interface 0"/>
       <option event_delta="0x20" name="S1" description="Slave interface 1"/>
@@ -57,7 +51,6 @@
       <option event_delta="0x60" name="S3" description="Slave interface 3"/>
       <option event_delta="0x80" name="S4" description="Slave interface 4"/>
     </option_set>
-
     <event event="0x00" option_set="Slave" title="CCI-400" name="Read: any" description="Read request handshake: any"/>
     <event event="0x01" option_set="Slave" title="CCI-400" name="Read: transaction" description="Read request handshake: device transaction"/>
     <event event="0x02" option_set="Slave" title="CCI-400" name="Read: normal" description="Read request handshake: normal, non-shareable or system-shareable, but not barrier or cache maintenance operation"/>
@@ -79,13 +72,11 @@
     <event event="0x12" option_set="Slave" title="CCI-400" name="Write: Evict" description="Write request handshake: Evict"/>
     <event event="0x13" option_set="Slave" title="CCI-400" name="Write stall: tracker full" description="Write request stall cycle because the transaction tracker is full. Increase SIx_W_MAX to avoid this stall"/>
     <event event="0x14" option_set="Slave" title="CCI-400" name="Read stall: slave hazard" description="Read request stall cycle because of a slave interface ID hazard"/>
-
     <option_set name="Master">
       <option event_delta="0xa0" name="M0" description="Master interface 0"/>
       <option event_delta="0xc0" name="M1" description="Master interface 1"/>
       <option event_delta="0xe0" name="M2" description="Master interface 2"/>
     </option_set>
-
     <event event="0x00" option_set="Master" title="CCI-400" name="Retry fetch" description="RETRY of speculative fetch transaction"/>
     <event event="0x01" option_set="Master" title="CCI-400" name="Read stall: address hazard" description="Stall cycle because of an address hazard. A read or write invalidation is stalled because of an outstanding transaction to an overlapping address"/>
     <event event="0x02" option_set="Master" title="CCI-400" name="Read stall: ID hazard" description="Read request stall cycle because of a master interface ID hazard"/>
diff --git a/tools/gator/daemon/events-CCN-504.xml b/tools/gator/daemon/events-CCN-504.xml
index cfabf65949ed..6ef3e6483717 100644
--- a/tools/gator/daemon/events-CCN-504.xml
+++ b/tools/gator/daemon/events-CCN-504.xml
@@ -1,7 +1,6 @@
   <counter_set name="CCN-504_cnt" count="4"/>
   <category name="CCN-504" counter_set="CCN-504_cnt">
     <event counter="CCN-504_ccnt" title="CCN-504 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
-
     <option_set name="XP_Region">
       <option event_delta="0x400000" name="XP 0" description="Crosspoint 0"/>
       <option event_delta="0x410000" name="XP 1" description="Crosspoint 1"/>
@@ -15,7 +14,6 @@
       <option event_delta="0x490000" name="XP 9" description="Crosspoint 9"/>
       <option event_delta="0x4A0000" name="XP 10" description="Crosspoint 10"/>
     </option_set>
-
     <event event="0x0801" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: H-bit" description="Bus 0: REQ: Set H-bit, signaled when this XP sets the H-bit."/>
     <event event="0x0802" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: S-bit" description="Bus 0: REQ: Set S-bit, signaled when this XP sets the S-bit."/>
     <event event="0x0803" option_set="XP_Region" title="CCN-504" name="Bus 0: REQ: P-Cnt" description="Bus 0: REQ: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
@@ -56,7 +54,6 @@
     <event event="0x087A" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: S-bit" description="Bus 1: DATB: Set S-bit, signaled when this XP sets the S-bit."/>
     <event event="0x087B" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: P-Cnt" description="Bus 1: DATB: Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC."/>
     <event event="0x087C" option_set="XP_Region" title="CCN-504" name="Bus 1: DATB: TknV" description="Bus 1: DATB: No TknV, signaled when this XP transmits a valid packet."/>
-
     <option_set name="HN-F_Region">
       <option event_delta="0x200000" name="HN-F 3" description="Fully-coherent Home Node 3"/>
       <option event_delta="0x210000" name="HN-F 5" description="Fully-coherent Home Node 5"/>
@@ -67,7 +64,6 @@
       <option event_delta="0x260000" name="HN-F 17" description="Fully-coherent Home Node 17"/>
       <option event_delta="0x270000" name="HN-F 18" description="Fully-coherent Home Node 18"/>
     </option_set>
-
     <event event="0x0401" option_set="HN-F_Region" title="CCN-504" name="Cache Miss" description="Counts the total cache misses. This is the first time lookup result, and is high priority."/>
     <event event="0x0402" option_set="HN-F_Region" title="CCN-504" name="L3 SF Cache Access" description="Counts the number of cache accesses. This is the first time access, and is high priority."/>
     <event event="0x0403" option_set="HN-F_Region" title="CCN-504" name="Cache Fill" description="Counts the total allocations in the HN L3 cache, and all cache line allocations to the L3 cache."/>
@@ -82,7 +78,6 @@
     <event event="0x040C" option_set="HN-F_Region" title="CCN-504" name="MC Retries" description="Counts the number of transactions retried by the memory controller."/>
     <event event="0x040D" option_set="HN-F_Region" title="CCN-504" name="MC Reqs" description="Counts the number of requests to the memory controller."/>
     <event event="0x040E" option_set="HN-F_Region" title="CCN-504" name="QOS HH Retry" description="Counts the number of times a highest-priority QoS class was retried at the HN-F."/>
-
     <option_set name="RN-I_Region">
       <option event_delta="0x800000" name="RN-I 0" description="I/O-coherent Requesting Node 0"/>
       <option event_delta="0x820000" name="RN-I 2" description="I/O-coherent Requesting Node 2"/>
@@ -91,7 +86,6 @@
       <option event_delta="0x900000" name="RN-I 16" description="I/O-coherent Requesting Node 16"/>
       <option event_delta="0x940000" name="RN-I 20" description="I/O-coherent Requesting Node 20"/>
     </option_set>
-
     <event event="0x1601" option_set="RN-I_Region" title="CCN-504" name="S0 RDataBeats" description="S0 RDataBeats."/>
     <event event="0x1602" option_set="RN-I_Region" title="CCN-504" name="S1 RDataBeats" description="S1 RDataBeats."/>
     <event event="0x1603" option_set="RN-I_Region" title="CCN-504" name="S2 RDataBeats" description="S2 RDataBeats."/>
@@ -102,14 +96,12 @@
     <event event="0x1608" option_set="RN-I_Region" title="CCN-504" name="RRT full" description="RRT full."/>
     <event event="0x1609" option_set="RN-I_Region" title="CCN-504" name="WRT full" description="WRT full."/>
     <event event="0x160A" option_set="RN-I_Region" title="CCN-504" name="Replayed TXREQ Flits" description="Replayed TXREQ Flits."/>
-
     <option_set name="SBAS_Region">
       <option event_delta="0x810000" name="SBAS 1" description="ACE master to CHI protocol bridge 1"/>
       <option event_delta="0x890000" name="SBAS 9" description="ACE master to CHI protocol bridge 9"/>
       <option event_delta="0x8B0000" name="SBAS 11" description="ACE master to CHI protocol bridge 11"/>
       <option event_delta="0x930000" name="SBAS 19" description="ACE master to CHI protocol bridge 19"/>
     </option_set>
-
     <event event="0x1001" option_set="SBAS_Region" title="CCN-504" name="S0 RDataBeats" description="S0 RDataBeats."/>
     <event event="0x1004" option_set="SBAS_Region" title="CCN-504" name="RXDAT Flits received" description="RXDAT Flits received."/>
     <event event="0x1005" option_set="SBAS_Region" title="CCN-504" name="TXDAT Flits sent" description="TXDAT Flits sent."/>
@@ -118,5 +110,4 @@
     <event event="0x1008" option_set="SBAS_Region" title="CCN-504" name="RRT full" description="RRT full."/>
     <event event="0x1009" option_set="SBAS_Region" title="CCN-504" name="WRT full" description="WRT full."/>
     <event event="0x100A" option_set="SBAS_Region" title="CCN-504" name="Replayed TXREQ Flits" description="Replayed TXREQ Flits."/>
-
   </category>
diff --git a/tools/gator/daemon/events-Cortex-A53.xml b/tools/gator/daemon/events-Cortex-A53.xml
index 577dcd94185e..5ba17907d5ab 100644
--- a/tools/gator/daemon/events-Cortex-A53.xml
+++ b/tools/gator/daemon/events-Cortex-A53.xml
@@ -1,171 +1,87 @@
   <counter_set name="ARM_Cortex-A53_cnt" count="6"/>
   <category name="Cortex-A53" counter_set="ARM_Cortex-A53_cnt" per_cpu="yes" supports_event_based_sampling="yes">
-    <!-- 0x11 CPU_CYCLES - Cycle -->
     <event counter="ARM_Cortex-A53_ccnt" event="0x11" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
-    <!-- 0x00 SW_INCR - Instruction architecturally executed (condition check pass) - Software increment -->
     <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
-    <!-- 0x01 L1I_CACHE_REFILL - Level 1 instruction cache refill -->
     <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
-    <!-- 0x02 L1I_TLB_REFILL - Level 1 instruction TLB refill -->
     <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
-    <!-- 0x03 L1D_CACHE_REFILL - Level 1 data cache refill -->
     <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
-    <!-- 0x04 L1D_CACHE - Level 1 data cache access -->
     <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
-    <!-- 0x05 L1D_TLB_REFILL - Level 1 data TLB refill -->
     <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
-    <!-- 0x08 INST_RETIRED - Instruction architecturally executed -->
     <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
-    <!-- 0x09 EXC_TAKEN - Exception taken -->
     <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
-    <!-- 0x0A EXC_RETURN - Instruction architecturally executed (condition check pass) - Exception return -->
     <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
-    <!-- 0x0B CID_WRITE_RETIRED - Instruction architecturally executed (condition check pass) - Write to CONTEXTIDR -->
     <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
-    <!-- 0x10 BR_MIS_PRED - Mispredicted or not predicted branch speculatively executed -->
     <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
-    <!-- 0x12 BR_PRED - Predictable branch speculatively executed -->
     <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
-    <!-- 0x13 MEM_ACCESS - Data memory access -->
     <event event="0x13" title="Memory" name="Memory access" description="Data memory access"/>
-    <!-- 0x14 L1I_CACHE - Level 1 instruction cache access -->
     <event event="0x14" title="Cache" name="L1 inst access" description="Level 1 instruction cache access"/>
-    <!-- 0x15 L1D_CACHE_WB - Level 1 data cache Write-Back -->
     <event event="0x15" title="Cache" name="L1 data write" description="Level 1 data cache Write-Back"/>
-    <!-- 0x16 L2D_CACHE - Level 2 data cache access -->
     <event event="0x16" title="Cache" name="L2 data access" description="Level 2 data cache access"/>
-    <!-- 0x17 L2D_CACHE_REFILL - Level 2 data cache refill -->
     <event event="0x17" title="Cache" name="L2 data refill" description="Level 2 data cache refill"/>
-    <!-- 0x18 L2D_CACHE_WB - Level 2 data cache Write-Back -->
     <event event="0x18" title="Cache" name="L2 data write" description="Level 2 data cache Write-Back"/>
-    <!-- 0x19 BUS_ACCESS - Bus access -->
     <event event="0x19" title="Bus" name="Access" description="Bus access"/>
-    <!-- 0x1A MEMORY_ERROR - Local memory error -->
     <event event="0x1A" title="Memory" name="Error" description="Local memory error"/>
-    <!-- 0x1B INST_SPEC - Operation speculatively executed -->
     <event event="0x1B" title="Instruction" name="Speculative" description="Operation speculatively executed"/>
-    <!-- 0x1C TTBR_WRITE_RETIRED - Instruction architecturally executed (condition check pass) - Write to translation table base -->
     <event event="0x1C" title="Memory" name="Translation table" description="Instruction architecturally executed (condition check pass) - Write to translation table base"/>
-    <!-- 0x1D BUS_CYCLES - Bus cycle -->
     <event event="0x1D" title="Bus" name="Cycle" description="Bus cycle"/>
-    <!-- 0x1E CHAIN - Odd performance counter chain mode -->
     <event event="0x1E" title="Counter chain" name="Odd Performance" description="Odd performance counter chain mode"/>
-    <!-- 0x40 L1D_CACHE_LD - Level 1 data cache access - Read -->
     <event event="0x40" title="Cache" name="L1 data read" description="Level 1 data cache access - Read"/>
-    <!-- 0x41 L1D_CACHE_ST - Level 1 data cache access - Write -->
     <event event="0x41" title="Cache" name="L1 data access write" description="Level 1 data cache access - Write"/>
-    <!-- 0x42 L1D_CACHE_REFILL_LD - Level 1 data cache refill - Read -->
     <event event="0x42" title="Cache" name="L1 data refill read" description="Level 1 data cache refill - Read"/>
-    <!-- 0x43 L1D_CACHE_REFILL_ST - Level 1 data cache refill - Write -->
     <event event="0x43" title="Cache" name="L1 data refill write" description="Level 1 data cache refill - Write"/>
-    <!-- 0x46 L1D_CACHE_WB_VICTIM - Level 1 data cache Write-back - Victim -->
     <event event="0x46" title="Cache" name="L1 data victim" description="Level 1 data cache Write-back - Victim"/>
-    <!-- 0x47 L1D_CACHE_WB_CLEAN - Level 1 data cache Write-back - Cleaning and coherency -->
     <event event="0x47" title="Cache" name="L1 data clean" description="Level 1 data cache Write-back - Cleaning and coherency"/>
-    <!-- 0x48 L1D_CACHE_INVAL - Level 1 data cache invalidate -->
     <event event="0x48" title="Cache" name="L1 data invalidate" description="Level 1 data cache invalidate"/>
-    <!-- 0x4C L1D_TLB_REFILL_LD - Level 1 data TLB refill - Read -->
     <event event="0x4C" title="Cache" name="L1 data refill read" description="Level 1 data TLB refill - Read"/>
-    <!-- 0x4D L1D_TLB_REFILL_ST - Level 1 data TLB refill - Write -->
     <event event="0x4D" title="Cache" name="L1 data refill write" description="Level 1 data TLB refill - Write"/>
-    <!-- 0x50 L2D_CACHE_LD - Level 2 data cache access - Read -->
     <event event="0x50" title="Cache" name="L2 data read" description="Level 2 data cache access - Read"/>
-    <!-- 0x51 L2D_CACHE_ST - Level 2 data cache access - Write -->
     <event event="0x51" title="Cache" name="L2 data access write" description="Level 2 data cache access - Write"/>
-    <!-- 0x52 L2D_CACHE_REFILL_LD - Level 2 data cache refill - Read -->
     <event event="0x52" title="Cache" name="L2 data refill read" description="Level 2 data cache refill - Read"/>
-    <!-- 0x53 L2D_CACHE_REFILL_ST - Level 2 data cache refill - Write -->
     <event event="0x53" title="Cache" name="L2 data refill write" description="Level 2 data cache refill - Write"/>
-    <!-- 0x56 L2D_CACHE_WB_VICTIM - Level 2 data cache Write-back - Victim -->
     <event event="0x56" title="Cache" name="L2 data victim" description="Level 2 data cache Write-back - Victim"/>
-    <!-- 0x57 L2D_CACHE_WB_CLEAN - Level 2 data cache Write-back - Cleaning and coherency -->
     <event event="0x57" title="Cache" name="L2 data clean" description="Level 2 data cache Write-back - Cleaning and coherency"/>
-    <!-- 0x58 L2D_CACHE_INVAL - Level 2 data cache invalidate -->
     <event event="0x58" title="Cache" name="L2 data invalidate" description="Level 2 data cache invalidate"/>
-    <!-- 0x60 BUS_ACCESS_LD - Bus access - Read -->
     <event event="0x60" title="Bus" name="Read" description="Bus access - Read"/>
-    <!-- 0x61 BUS_ACCESS_ST - Bus access - Write -->
     <event event="0x61" title="Bus" name="Write" description="Bus access - Write"/>
-    <!-- 0x62 BUS_ACCESS_SHARED - Bus access - Normal -->
     <event event="0x62" title="Bus" name="Access shared" description="Bus access - Normal"/>
-    <!-- 0x63 BUS_ACCESS_NOT_SHARED - Bus access - Not normal -->
     <event event="0x63" title="Bus" name="Access not shared" description="Bus access - Not normal"/>
-    <!-- 0x64 BUS_ACCESS_NORMAL - Bus access - Normal -->
     <event event="0x64" title="Bus" name="Access normal" description="Bus access - Normal"/>
-    <!-- 0x65 BUS_ACCESS_PERIPH - Bus access - Peripheral -->
     <event event="0x65" title="Bus" name="Peripheral" description="Bus access - Peripheral"/>
-    <!-- 0x66 MEM_ACCESS_LD - Data memory access - Read -->
     <event event="0x66" title="Memory" name="Read" description="Data memory access - Read"/>
-    <!-- 0x67 MEM_ACCESS_ST - Data memory access - Write -->
     <event event="0x67" title="Memory" name="Write" description="Data memory access - Write"/>
-    <!-- 0x68 UNALIGNED_LD_SPEC - Unaligned access - Read -->
     <event event="0x68" title="Memory" name="Unaligned Read" description="Unaligned access - Read"/>
-    <!-- 0x69 UNALIGNED_ST_SPEC - Unaligned access - Write -->
     <event event="0x69" title="Memory" name="Unaligned Write" description="Unaligned access - Write"/>
-    <!-- 0x6A UNALIGNED_LDST_SPEC - Unaligned access -->
     <event event="0x6A" title="Memory" name="Unaligned" description="Unaligned access"/>
-    <!-- 0x6C LDREX_SPEC - Exclusive operation speculatively executed - LDREX -->
     <event event="0x6C" title="Intrinsic" name="LDREX" description="Exclusive operation speculatively executed - LDREX"/>
-    <!-- 0x6D STREX_PASS_SPEC - Exclusive instruction speculatively executed - STREX pass -->
     <event event="0x6D" title="Intrinsic" name="STREX pass" description="Exclusive instruction speculatively executed - STREX pass"/>
-    <!-- 0x6E STREX_FAIL_SPEC - Exclusive operation speculatively executed - STREX fail -->
     <event event="0x6E" title="Intrinsic" name="STREX fail" description="Exclusive operation speculatively executed - STREX fail"/>
-    <!-- 0x70 LD_SPEC - Operation speculatively executed - Load -->
     <event event="0x70" title="Instruction" name="Load" description="Operation speculatively executed - Load"/>
-    <!-- 0x71 ST_SPEC - Operation speculatively executed - Store -->
     <event event="0x71" title="Instruction" name="Store" description="Operation speculatively executed - Store"/>
-    <!-- 0x72 LDST_SPEC - Operation speculatively executed - Load or store -->
     <event event="0x72" title="Instruction" name="Load/Store" description="Operation speculatively executed - Load or store"/>
-    <!-- 0x73 DP_SPEC - Operation speculatively executed - Integer data processing -->
     <event event="0x73" title="Instruction" name="Integer" description="Operation speculatively executed - Integer data processing"/>
-    <!-- 0x74 ASE_SPEC - Operation speculatively executed - Advanced SIMD -->
     <event event="0x74" title="Instruction" name="Advanced SIMD" description="Operation speculatively executed - Advanced SIMD"/>
-    <!-- 0x75 VFP_SPEC - Operation speculatively executed - VFP -->
     <event event="0x75" title="Instruction" name="VFP" description="Operation speculatively executed - VFP"/>
-    <!-- 0x76 PC_WRITE_SPEC - Operation speculatively executed - Software change of the PC -->
     <event event="0x76" title="Instruction" name="Software change" description="Operation speculatively executed - Software change of the PC"/>
-    <!-- 0x77 CRYPTO_SPEC - Operation speculatively executed, crypto data processing -->
     <event event="0x77" title="Instruction" name="Crypto" description="Operation speculatively executed, crypto data processing"/>
-    <!-- 0x78 BR_IMMED_SPEC - Branch speculatively executed - Immediate branch -->
     <event event="0x78" title="Instruction" name="Immediate branch" description="Branch speculatively executed - Immediate branch"/>
-    <!-- 0x79 BR_RETURN_SPEC - Branch speculatively executed - Procedure return -->
     <event event="0x79" title="Instruction" name="Procedure return" description="Branch speculatively executed - Procedure return"/>
-    <!-- 0x7A BR_INDIRECT_SPEC - Branch speculatively executed - Indirect branch -->
     <event event="0x7A" title="Instruction" name="Indirect branch" description="Branch speculatively executed - Indirect branch"/>
-    <!-- 0x7C ISB_SPEC - Barrier speculatively executed - ISB -->
     <event event="0x7C" title="Instruction" name="ISB" description="Barrier speculatively executed - ISB"/>
-    <!-- 0x7D DSB_SPEC - Barrier speculatively executed - DSB -->
     <event event="0x7D" title="Instruction" name="DSB" description="Barrier speculatively executed - DSB"/>
-    <!-- 0x7E DMB_SPEC - Barrier speculatively executed - DMB -->
     <event event="0x7E" title="Instruction" name="DMB" description="Barrier speculatively executed - DMB"/>
-    <!-- 0x81 EXC_UNDEF - Exception taken, other synchronous -->
     <event event="0x81" title="Exception" name="Undefined" description="Exception taken, other synchronous"/>
-    <!-- 0x82 EXC_SVC - Exception taken, Supervisor Call -->
     <event event="0x82" title="Exception" name="Supervisor" description="Exception taken, Supervisor Call"/>
-    <!-- 0x83 EXC_PABORT - Exception taken, Instruction Abort -->
     <event event="0x83" title="Exception" name="Instruction abort" description="Exception taken, Instruction Abort"/>
-    <!-- 0x84 EXC_DABORT - Exception taken, Data Abort or SError -->
     <event event="0x84" title="Exception" name="Data abort" description="Exception taken, Data Abort or SError"/>
-    <!-- 0x86 EXC_IRQ - Exception taken, IRQ -->
     <event event="0x86" title="Interrupts" name="IRQ" description="Exception taken, IRQ"/>
-    <!-- 0x87 EXC_FIQ - Exception taken, FIQ -->
     <event event="0x87" title="Interrupts" name="FIQ" description="Exception taken, FIQ"/>
-    <!-- 0x88 EXC_SMC - Exception taken, Secure Monitor Call -->
     <event event="0x88" title="Exception" name="Secure monitor call" description="Exception taken, Secure Monitor Call"/>
-    <!-- 0x8A EXC_HVC - Exception taken, Hypervisor Call -->
     <event event="0x8A" title="Exception" name="Hypervisor call" description="Exception taken, Hypervisor Call"/>
-    <!-- 0x8B EXC_TRAP_PABORT - Exception taken, Instruction Abort not taken locally -->
     <event event="0x8B" title="Exception" name="Instruction abort non-local" description="Exception taken, Instruction Abort not taken locally"/>
-    <!-- 0x8C EXC_TRAP_DABORT - Exception taken, Data Abort or SError not taken locally -->
     <event event="0x8C" title="Exception" name="Data abort non-local" description="Exception taken, Data Abort or SError not taken locally"/>
-    <!-- 0x8D EXC_TRAP_OTHER - Exception taken - Other traps not taken locally -->
     <event event="0x8D" title="Exception" name="Other non-local" description="Exception taken - Other traps not taken locally"/>
-    <!-- 0x8E EXC_TRAP_IRQ - Exception taken, IRQ not taken locally -->
     <event event="0x8E" title="Exception" name="IRQ non-local" description="Exception taken, IRQ not taken locally"/>
-    <!-- 0x8F EXC_TRAP_FIQ - Exception taken, FIQ not taken locally -->
     <event event="0x8F" title="Exception" name="FIQ non-local" description="Exception taken, FIQ not taken locally"/>
-    <!-- 0x90 RC_LD_SPEC - Release consistency instruction speculatively executed - Load Acquire -->
     <event event="0x90" title="Release Consistency" name="Load" description="Release consistency instruction speculatively executed - Load Acquire"/>
-    <!-- 0x91 RC_ST_SPEC - Release consistency instruction speculatively executed - Store Release -->
     <event event="0x91" title="Release Consistency" name="Store" description="Release consistency instruction speculatively executed - Store Release"/>
   </category>
diff --git a/tools/gator/daemon/events-Cortex-A57.xml b/tools/gator/daemon/events-Cortex-A57.xml
index b7178c0c7427..fbe96c2d4eb2 100644
--- a/tools/gator/daemon/events-Cortex-A57.xml
+++ b/tools/gator/daemon/events-Cortex-A57.xml
@@ -1,171 +1,87 @@
   <counter_set name="ARM_Cortex-A57_cnt" count="6"/>
   <category name="Cortex-A57" counter_set="ARM_Cortex-A57_cnt" per_cpu="yes" supports_event_based_sampling="yes">
-    <!-- 0x11 CPU_CYCLES - Cycle -->
     <event counter="ARM_Cortex-A57_ccnt" event="0x11" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
-    <!-- 0x00 SW_INCR - Instruction architecturally executed (condition check pass) - Software increment -->
     <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
-    <!-- 0x01 L1I_CACHE_REFILL - Level 1 instruction cache refill -->
     <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
-    <!-- 0x02 L1I_TLB_REFILL - Level 1 instruction TLB refill -->
     <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
-    <!-- 0x03 L1D_CACHE_REFILL - Level 1 data cache refill -->
     <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
-    <!-- 0x04 L1D_CACHE - Level 1 data cache access -->
     <event event="0x04" title="Cache" name="Data access" description="Memory Read or Write operation that causes a cache access to at least the level of data or unified cache closest to the processor"/>
-    <!-- 0x05 L1D_TLB_REFILL - Level 1 data TLB refill -->
     <event event="0x05" title="Cache" name="Data TLB refill" description="Memory Read or Write operation that causes a TLB refill of at least the level of TLB closest to the processor"/>
-    <!-- 0x08 INST_RETIRED - Instruction architecturally executed -->
     <event event="0x08" title="Instruction" name="Executed" description="Instruction architecturally executed"/>
-    <!-- 0x09 EXC_TAKEN - Exception taken -->
     <event event="0x09" title="Exception" name="Taken" description="Exceptions taken"/>
-    <!-- 0x0A EXC_RETURN - Instruction architecturally executed (condition check pass) - Exception return -->
     <event event="0x0a" title="Exception" name="Return" description="Exception return architecturally executed"/>
-    <!-- 0x0B CID_WRITE_RETIRED - Instruction architecturally executed (condition check pass) - Write to CONTEXTIDR -->
     <event event="0x0b" title="Instruction" name="CONTEXTIDR" description="Instruction that writes to the CONTEXTIDR architecturally executed"/>
-    <!-- 0x10 BR_MIS_PRED - Mispredicted or not predicted branch speculatively executed -->
     <event event="0x10" title="Branch" name="Mispredicted" description="Branch mispredicted or not predicted"/>
-    <!-- 0x12 BR_PRED - Predictable branch speculatively executed -->
     <event event="0x12" title="Branch" name="Potential prediction" description="Branch or other change in program flow that could have been predicted by the branch prediction resources of the processor"/>
-    <!-- 0x13 MEM_ACCESS - Data memory access -->
     <event event="0x13" title="Memory" name="Memory access" description="Data memory access"/>
-    <!-- 0x14 L1I_CACHE - Level 1 instruction cache access -->
     <event event="0x14" title="Cache" name="L1 inst access" description="Level 1 instruction cache access"/>
-    <!-- 0x15 L1D_CACHE_WB - Level 1 data cache Write-Back -->
     <event event="0x15" title="Cache" name="L1 data write" description="Level 1 data cache Write-Back"/>
-    <!-- 0x16 L2D_CACHE - Level 2 data cache access -->
     <event event="0x16" title="Cache" name="L2 data access" description="Level 2 data cache access"/>
-    <!-- 0x17 L2D_CACHE_REFILL - Level 2 data cache refill -->
     <event event="0x17" title="Cache" name="L2 data refill" description="Level 2 data cache refill"/>
-    <!-- 0x18 L2D_CACHE_WB - Level 2 data cache Write-Back -->
     <event event="0x18" title="Cache" name="L2 data write" description="Level 2 data cache Write-Back"/>
-    <!-- 0x19 BUS_ACCESS - Bus access -->
     <event event="0x19" title="Bus" name="Access" description="Bus access"/>
-    <!-- 0x1A MEMORY_ERROR - Local memory error -->
     <event event="0x1A" title="Memory" name="Error" description="Local memory error"/>
-    <!-- 0x1B INST_SPEC - Operation speculatively executed -->
     <event event="0x1B" title="Instruction" name="Speculative" description="Operation speculatively executed"/>
-    <!-- 0x1C TTBR_WRITE_RETIRED - Instruction architecturally executed (condition check pass) - Write to translation table base -->
     <event event="0x1C" title="Memory" name="Translation table" description="Instruction architecturally executed (condition check pass) - Write to translation table base"/>
-    <!-- 0x1D BUS_CYCLES - Bus cycle -->
     <event event="0x1D" title="Bus" name="Cycle" description="Bus cycle"/>
-    <!-- 0x1E CHAIN - Odd performance counter chain mode -->
     <event event="0x1E" title="Counter chain" name="Odd Performance" description="Odd performance counter chain mode"/>
-    <!-- 0x40 L1D_CACHE_LD - Level 1 data cache access - Read -->
     <event event="0x40" title="Cache" name="L1 data read" description="Level 1 data cache access - Read"/>
-    <!-- 0x41 L1D_CACHE_ST - Level 1 data cache access - Write -->
     <event event="0x41" title="Cache" name="L1 data access write" description="Level 1 data cache access - Write"/>
-    <!-- 0x42 L1D_CACHE_REFILL_LD - Level 1 data cache refill - Read -->
     <event event="0x42" title="Cache" name="L1 data refill read" description="Level 1 data cache refill - Read"/>
-    <!-- 0x43 L1D_CACHE_REFILL_ST - Level 1 data cache refill - Write -->
     <event event="0x43" title="Cache" name="L1 data refill write" description="Level 1 data cache refill - Write"/>
-    <!-- 0x46 L1D_CACHE_WB_VICTIM - Level 1 data cache Write-back - Victim -->
     <event event="0x46" title="Cache" name="L1 data victim" description="Level 1 data cache Write-back - Victim"/>
-    <!-- 0x47 L1D_CACHE_WB_CLEAN - Level 1 data cache Write-back - Cleaning and coherency -->
     <event event="0x47" title="Cache" name="L1 data clean" description="Level 1 data cache Write-back - Cleaning and coherency"/>
-    <!-- 0x48 L1D_CACHE_INVAL - Level 1 data cache invalidate -->
     <event event="0x48" title="Cache" name="L1 data invalidate" description="Level 1 data cache invalidate"/>
-    <!-- 0x4C L1D_TLB_REFILL_LD - Level 1 data TLB refill - Read -->
     <event event="0x4C" title="Cache" name="L1 data refill read" description="Level 1 data TLB refill - Read"/>
-    <!-- 0x4D L1D_TLB_REFILL_ST - Level 1 data TLB refill - Write -->
     <event event="0x4D" title="Cache" name="L1 data refill write" description="Level 1 data TLB refill - Write"/>
-    <!-- 0x50 L2D_CACHE_LD - Level 2 data cache access - Read -->
     <event event="0x50" title="Cache" name="L2 data read" description="Level 2 data cache access - Read"/>
-    <!-- 0x51 L2D_CACHE_ST - Level 2 data cache access - Write -->
     <event event="0x51" title="Cache" name="L2 data access write" description="Level 2 data cache access - Write"/>
-    <!-- 0x52 L2D_CACHE_REFILL_LD - Level 2 data cache refill - Read -->
     <event event="0x52" title="Cache" name="L2 data refill read" description="Level 2 data cache refill - Read"/>
-    <!-- 0x53 L2D_CACHE_REFILL_ST - Level 2 data cache refill - Write -->
     <event event="0x53" title="Cache" name="L2 data refill write" description="Level 2 data cache refill - Write"/>
-    <!-- 0x56 L2D_CACHE_WB_VICTIM - Level 2 data cache Write-back - Victim -->
     <event event="0x56" title="Cache" name="L2 data victim" description="Level 2 data cache Write-back - Victim"/>
-    <!-- 0x57 L2D_CACHE_WB_CLEAN - Level 2 data cache Write-back - Cleaning and coherency -->
     <event event="0x57" title="Cache" name="L2 data clean" description="Level 2 data cache Write-back - Cleaning and coherency"/>
-    <!-- 0x58 L2D_CACHE_INVAL - Level 2 data cache invalidate -->
     <event event="0x58" title="Cache" name="L2 data invalidate" description="Level 2 data cache invalidate"/>
-    <!-- 0x60 BUS_ACCESS_LD - Bus access - Read -->
     <event event="0x60" title="Bus" name="Read" description="Bus access - Read"/>
-    <!-- 0x61 BUS_ACCESS_ST - Bus access - Write -->
     <event event="0x61" title="Bus" name="Write" description="Bus access - Write"/>
-    <!-- 0x62 BUS_ACCESS_SHARED - Bus access - Normal -->
     <event event="0x62" title="Bus" name="Access shared" description="Bus access - Normal"/>
-    <!-- 0x63 BUS_ACCESS_NOT_SHARED - Bus access - Not normal -->
     <event event="0x63" title="Bus" name="Access not shared" description="Bus access - Not normal"/>
-    <!-- 0x64 BUS_ACCESS_NORMAL - Bus access - Normal -->
     <event event="0x64" title="Bus" name="Access normal" description="Bus access - Normal"/>
-    <!-- 0x65 BUS_ACCESS_PERIPH - Bus access - Peripheral -->
     <event event="0x65" title="Bus" name="Peripheral" description="Bus access - Peripheral"/>
-    <!-- 0x66 MEM_ACCESS_LD - Data memory access - Read -->
     <event event="0x66" title="Memory" name="Read" description="Data memory access - Read"/>
-    <!-- 0x67 MEM_ACCESS_ST - Data memory access - Write -->
     <event event="0x67" title="Memory" name="Write" description="Data memory access - Write"/>
-    <!-- 0x68 UNALIGNED_LD_SPEC - Unaligned access - Read -->
     <event event="0x68" title="Memory" name="Unaligned Read" description="Unaligned access - Read"/>
-    <!-- 0x69 UNALIGNED_ST_SPEC - Unaligned access - Write -->
     <event event="0x69" title="Memory" name="Unaligned Write" description="Unaligned access - Write"/>
-    <!-- 0x6A UNALIGNED_LDST_SPEC - Unaligned access -->
     <event event="0x6A" title="Memory" name="Unaligned" description="Unaligned access"/>
-    <!-- 0x6C LDREX_SPEC - Exclusive operation speculatively executed - LDREX -->
     <event event="0x6C" title="Intrinsic" name="LDREX" description="Exclusive operation speculatively executed - LDREX"/>
-    <!-- 0x6D STREX_PASS_SPEC - Exclusive instruction speculatively executed - STREX pass -->
     <event event="0x6D" title="Intrinsic" name="STREX pass" description="Exclusive instruction speculatively executed - STREX pass"/>
-    <!-- 0x6E STREX_FAIL_SPEC - Exclusive operation speculatively executed - STREX fail -->
     <event event="0x6E" title="Intrinsic" name="STREX fail" description="Exclusive operation speculatively executed - STREX fail"/>
-    <!-- 0x70 LD_SPEC - Operation speculatively executed - Load -->
     <event event="0x70" title="Instruction" name="Load" description="Operation speculatively executed - Load"/>
-    <!-- 0x71 ST_SPEC - Operation speculatively executed - Store -->
     <event event="0x71" title="Instruction" name="Store" description="Operation speculatively executed - Store"/>
-    <!-- 0x72 LDST_SPEC - Operation speculatively executed - Load or store -->
     <event event="0x72" title="Instruction" name="Load/Store" description="Operation speculatively executed - Load or store"/>
-    <!-- 0x73 DP_SPEC - Operation speculatively executed - Integer data processing -->
     <event event="0x73" title="Instruction" name="Integer" description="Operation speculatively executed - Integer data processing"/>
-    <!-- 0x74 ASE_SPEC - Operation speculatively executed - Advanced SIMD -->
     <event event="0x74" title="Instruction" name="Advanced SIMD" description="Operation speculatively executed - Advanced SIMD"/>
-    <!-- 0x75 VFP_SPEC - Operation speculatively executed - VFP -->
     <event event="0x75" title="Instruction" name="VFP" description="Operation speculatively executed - VFP"/>
-    <!-- 0x76 PC_WRITE_SPEC - Operation speculatively executed - Software change of the PC -->
     <event event="0x76" title="Instruction" name="Software change" description="Operation speculatively executed - Software change of the PC"/>
-    <!-- 0x77 CRYPTO_SPEC - Operation speculatively executed, crypto data processing -->
     <event event="0x77" title="Instruction" name="Crypto" description="Operation speculatively executed, crypto data processing"/>
-    <!-- 0x78 BR_IMMED_SPEC - Branch speculatively executed - Immediate branch -->
     <event event="0x78" title="Instruction" name="Immediate branch" description="Branch speculatively executed - Immediate branch"/>
-    <!-- 0x79 BR_RETURN_SPEC - Branch speculatively executed - Procedure return -->
     <event event="0x79" title="Instruction" name="Procedure return" description="Branch speculatively executed - Procedure return"/>
-    <!-- 0x7A BR_INDIRECT_SPEC - Branch speculatively executed - Indirect branch -->
     <event event="0x7A" title="Instruction" name="Indirect branch" description="Branch speculatively executed - Indirect branch"/>
-    <!-- 0x7C ISB_SPEC - Barrier speculatively executed - ISB -->
     <event event="0x7C" title="Instruction" name="ISB" description="Barrier speculatively executed - ISB"/>
-    <!-- 0x7D DSB_SPEC - Barrier speculatively executed - DSB -->
     <event event="0x7D" title="Instruction" name="DSB" description="Barrier speculatively executed - DSB"/>
-    <!-- 0x7E DMB_SPEC - Barrier speculatively executed - DMB -->
     <event event="0x7E" title="Instruction" name="DMB" description="Barrier speculatively executed - DMB"/>
-    <!-- 0x81 EXC_UNDEF - Exception taken, other synchronous -->
     <event event="0x81" title="Exception" name="Undefined" description="Exception taken, other synchronous"/>
-    <!-- 0x82 EXC_SVC - Exception taken, Supervisor Call -->
     <event event="0x82" title="Exception" name="Supervisor" description="Exception taken, Supervisor Call"/>
-    <!-- 0x83 EXC_PABORT - Exception taken, Instruction Abort -->
     <event event="0x83" title="Exception" name="Instruction abort" description="Exception taken, Instruction Abort"/>
-    <!-- 0x84 EXC_DABORT - Exception taken, Data Abort or SError -->
     <event event="0x84" title="Exception" name="Data abort" description="Exception taken, Data Abort or SError"/>
-    <!-- 0x86 EXC_IRQ - Exception taken, IRQ -->
     <event event="0x86" title="Interrupts" name="IRQ" description="Exception taken, IRQ"/>
-    <!-- 0x87 EXC_FIQ - Exception taken, FIQ -->
     <event event="0x87" title="Interrupts" name="FIQ" description="Exception taken, FIQ"/>
-    <!-- 0x88 EXC_SMC - Exception taken, Secure Monitor Call -->
     <event event="0x88" title="Exception" name="Secure monitor call" description="Exception taken, Secure Monitor Call"/>
-    <!-- 0x8A EXC_HVC - Exception taken, Hypervisor Call -->
     <event event="0x8A" title="Exception" name="Hypervisor call" description="Exception taken, Hypervisor Call"/>
-    <!-- 0x8B EXC_TRAP_PABORT - Exception taken, Instruction Abort not taken locally -->
     <event event="0x8B" title="Exception" name="Instruction abort non-local" description="Exception taken, Instruction Abort not taken locally"/>
-    <!-- 0x8C EXC_TRAP_DABORT - Exception taken, Data Abort or SError not taken locally -->
     <event event="0x8C" title="Exception" name="Data abort non-local" description="Exception taken, Data Abort or SError not taken locally"/>
-    <!-- 0x8D EXC_TRAP_OTHER - Exception taken - Other traps not taken locally -->
     <event event="0x8D" title="Exception" name="Other non-local" description="Exception taken - Other traps not taken locally"/>
-    <!-- 0x8E EXC_TRAP_IRQ - Exception taken, IRQ not taken locally -->
     <event event="0x8E" title="Exception" name="IRQ non-local" description="Exception taken, IRQ not taken locally"/>
-    <!-- 0x8F EXC_TRAP_FIQ - Exception taken, FIQ not taken locally -->
     <event event="0x8F" title="Exception" name="FIQ non-local" description="Exception taken, FIQ not taken locally"/>
-    <!-- 0x90 RC_LD_SPEC - Release consistency instruction speculatively executed - Load Acquire -->
     <event event="0x90" title="Release Consistency" name="Load" description="Release consistency instruction speculatively executed - Load Acquire"/>
-    <!-- 0x91 RC_ST_SPEC - Release consistency instruction speculatively executed - Store Release -->
     <event event="0x91" title="Release Consistency" name="Store" description="Release consistency instruction speculatively executed - Store Release"/>
   </category>
diff --git a/tools/gator/daemon/events-Filesystem.xml b/tools/gator/daemon/events-Filesystem.xml
new file mode 100644
index 000000000000..5feeb9014a63
--- /dev/null
+++ b/tools/gator/daemon/events-Filesystem.xml
@@ -0,0 +1,11 @@
+  <category name="Filesystem">
+    <!-- counter attributes must be unique -->
+    <!-- regex item in () is the value shown -->
+    <!-- these counters are not compatible with userspace gator, i.e. gator.ko must be loaded -->
+    <!--
+    <event counter="/sys/devices/system/cpu/cpu1/online" title="online" name="cpu 1" class="absolute" description="If cpu 1 is online"/>
+    <event counter="/proc/self/loginuid" title="loginuid" name="loginuid" class="absolute" description="loginuid"/>
+    <event counter="/proc/self/stat" title="stat" name="rss" class="absolute" regex="-?[0-9]+ \(.*\) . -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ (-?[0-9]+)" units="pages" description="resident set size"/>
+    <event counter="/proc/stat" title="proc-stat" name="processes" class="absolute" regex="processes ([0-9]+)" description="Number of processes and threads created"/>
+    -->
+  </category>
diff --git a/tools/gator/daemon/events-L2C-310.xml b/tools/gator/daemon/events-L2C-310.xml
index 4da4d1d63431..923fb90334d0 100644
--- a/tools/gator/daemon/events-L2C-310.xml
+++ b/tools/gator/daemon/events-L2C-310.xml
@@ -1,18 +1,18 @@
   <counter_set name="L2C-310_cnt" count="2"/>
   <category name="L2C-310" counter_set="L2C-310_cnt" per_cpu="no">
-    <event event="0x1" title="L2 Cache" name="CO" description="Eviction, CastOUT, of a line from the L2 cache"/>
-    <event event="0x2" title="L2 Cache" name="DRH" description="Data read hit"/>
-    <event event="0x3" title="L2 Cache" name="DRREQ" description="Data read request"/>
-    <event event="0x4" title="L2 Cache" name="DWHIT" description="Data write hit"/>
-    <event event="0x5" title="L2 Cache" name="DWREQ" description="Data write request"/>
-    <event event="0x6" title="L2 Cache" name="DWTREQ" description="Data write request with write-through attribute"/>
-    <event event="0x7" title="L2 Cache" name="IRHIT" description="Instruction read hit"/>
-    <event event="0x8" title="L2 Cache" name="IRREQ" description="Instruction read request"/>
-    <event event="0x9" title="L2 Cache" name="WA" description="Write allocate"/>
-    <event event="0xa" title="L2 Cache" name="IPFALLOC" description="Allocation of a prefetch generated by L2C-310 into the L2 cache"/>
-    <event event="0xb" title="L2 Cache" name="EPFHIT" description="Prefetch hint hits in the L2 cache"/>
-    <event event="0xc" title="L2 Cache" name="EPFALLOC" description="Prefetch hint allocated into the L2 cache"/>
-    <event event="0xd" title="L2 Cache" name="SRRCVD" description="Speculative read received"/>
-    <event event="0xe" title="L2 Cache" name="SRCONF" description="Speculative read confirmed"/>
-    <event event="0xf" title="L2 Cache" name="EPFRCVD" description="Prefetch hint received"/>
+    <event event="0x1" title="L2 Cache" name="CastOUT" description="Eviction, CastOUT, of a line from the L2 cache"/>
+    <event event="0x2" title="L2 Cache" name="Data Read Hit" description="Data read hit in the L2 cache"/>
+    <event event="0x3" title="L2 Cache" name="Data Read Request" description="Data read lookup to the L2 cache. Subsequently results in a hit or miss"/>
+    <event event="0x4" title="L2 Cache" name="Data Write Hit" description="Data write hit in the L2 cache"/>
+    <event event="0x5" title="L2 Cache" name="Data Write Request" description="Data write lookup to the L2 cache. Subsequently results in a hit or miss"/>
+    <event event="0x6" title="L2 Cache" name="Data Write-Through Request" description="Data write lookup to the L2 cache with Write-Through attribute. Subsequently results in a hit or miss"/>
+    <event event="0x7" title="L2 Cache" name="Instruction Read Hit" description="Instruction read hit in the L2 cache"/>
+    <event event="0x8" title="L2 Cache" name="Instruction Read Request" description="Instruction read lookup to the L2 cache. Subsequently results in a hit or miss"/>
+    <event event="0x9" title="L2 Cache" name="Write Allocate Miss" description="Allocation into the L2 cache caused by a write, with Write-Allocate attribute, miss"/>
+    <event event="0xa" title="L2 Cache" name="Internal Prefetch Allocate" description="Allocation of a prefetch generated by L2C-310 into the L2 cache"/>
+    <event event="0xb" title="L2 Cache" name="Prefitch Hit" description="Prefetch hint hits in the L2 cache"/>
+    <event event="0xc" title="L2 Cache" name="Prefitch Allocate" description="Prefetch hint allocated into the L2 cache"/>
+    <event event="0xd" title="L2 Cache" name="Speculative Read Received" description="Speculative read received"/>
+    <event event="0xe" title="L2 Cache" name="Speculative Read Confirmed" description="Speculative read confirmed"/>
+    <event event="0xf" title="L2 Cache" name="Prefetch Hint Received" description="Prefetch hint received"/>
   </category>
diff --git a/tools/gator/daemon/events-Linux.xml b/tools/gator/daemon/events-Linux.xml
index 4d677e15db7e..c306dd62208e 100644
--- a/tools/gator/daemon/events-Linux.xml
+++ b/tools/gator/daemon/events-Linux.xml
@@ -11,7 +11,6 @@
     <event counter="Linux_meminfo_bufferram" title="Memory" name="Buffer" class="absolute" units="B" description="Memory used by OS disk buffers"/>
     <event counter="Linux_power_cpu_freq" title="Clock" name="Frequency" per_cpu="yes" class="absolute" units="Hz" series_composition="overlay" average_cores="yes" description="Frequency setting of the CPU"/>
     <event counter="Linux_power_cpu_idle" title="Idle" name="State" per_cpu="yes" class="absolute" description="CPU Idle State + 1, set the Sample Rate to None to prevent the hrtimer from interrupting the system"/>
-    <event counter="Linux_cpu_wait_contention" title="CPU Contention" name="Wait" per_cpu="no" class="activity" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" description="Thread waiting on contended resource"/>
-    <event counter="Linux_cpu_wait_io" title="CPU I/O" name="Wait" per_cpu="no" class="activity" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" description="Thread waiting on I/O resource"/>
+    <event counter="Linux_cpu_wait_contention" title="CPU Contention" name="Wait" per_cpu="no" class="activity" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" color="0x003c96fb" description="Thread waiting on contended resource"/>
+    <event counter="Linux_cpu_wait_io" title="CPU I/O" name="Wait" per_cpu="no" class="activity" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" color="0x00b30000" description="Thread waiting on I/O resource"/>
   </category>
-
diff --git a/tools/gator/daemon/events-Mali-4xx.xml b/tools/gator/daemon/events-Mali-4xx.xml
index 5a71386830ba..0a95dfeb6485 100644
--- a/tools/gator/daemon/events-Mali-4xx.xml
+++ b/tools/gator/daemon/events-Mali-4xx.xml
@@ -1,34 +1,33 @@
   <counter_set name="ARM_Mali-4xx_VP_0_cnt" count="2"/>
   <counter_set name="ARM_Mali-4xx_SW_cnt" count="0"/>
-  <counter_set name="ARM_Mali-4xx_Filmstrip_cnt" count="1"/>
-  <category name="Mali-4xx-VP" counter_set="ARM_Mali-4xx_VP_0_cnt" per_cpu="no">
-    <event event="0x01" title="Mali GPU Vertex Processor" name="Active cycles" description="Number of cycles per frame the MaliGP2 was active."/>
-    <event event="0x02" title="Mali GPU Vertex Processor" name="Active cycles, vertex shader" description="Number of cycles per frame the vertex shader unit was active."/>
-    <event event="0x03" title="Mali GPU Vertex Processor" name="Active cycles, vertex storer" description="Number of cycles per frame the vertex storer unit was active."/>
-    <event event="0x04" title="Mali GPU Vertex Processor" name="Active cycles, vertex loader" description="Number of cycles per frame the vertex loader unit was active."/>
-    <event event="0x05" title="Mali GPU Vertex Processor" name="Cycles vertex loader waiting for vertex shader" description="Number of cycles per frame the vertex loader was idle while waiting on the vertex shader."/>
-    <event event="0x06" title="Mali GPU Vertex Processor" name="Words read, system bus" description="Total number of 64 bit words read by the GP2 from the system bus per frame."/>
-    <event event="0x07" title="Mali GPU Vertex Processor" name="Words written, system bus" description="Total number of 64 bit words written by the GP2 to the system bus per frame."/>
-    <event event="0x08" title="Mali GPU Vertex Processor" name="Read bursts, system bus" description="Number of read bursts by the GP2 from the system bus per frame."/>
-    <event event="0x09" title="Mali GPU Vertex Processor" name="Write bursts, system bus" description="Number of write bursts from the MaliGP2 to the system bus per frame."/>
-    <event event="0x0a" title="Mali GPU Vertex Processor" name="Vertices processed" description="Number of vertices processed by the MaliGP2 per frame."/>
-    <event event="0x0b" title="Mali GPU Vertex Processor" name="Vertices fetched" description="Number of vertices fetched by the MaliGP2 per frame."/>
-    <event event="0x0c" title="Mali GPU Vertex Processor" name="Primitives fetched" description="Number of graphics primitives fetched by the MaliGP2 per frame."/>
-    <event event="0x0e" title="Mali GPU Vertex Processor" name="Primitives culled" description="Number of graphics primitives discarded per frame, because they were seen from the back or were offscreen."/>
-    <event event="0x0f" title="Mali GPU Vertex Processor" name="Commands written to tiles" description="Number of commands (8 Bytes, mainly primitives) written by GP2 to the PP input data structure per frame."/>
-    <event event="0x10" title="Mali GPU Vertex Processor" name="Memory blocks allocated" description="Number of overflow data blocks needed for outputting the PP input data structure per frame ."/>
-    <event event="0x13" title="Mali GPU Vertex Processor" name="Vertex loader cache misses" description="Number of cache misses for the vertex shader's vertex input unit per frame."/>
-    <event event="0x16" title="Mali GPU Vertex Processor" name="Active cycles, vertex shader command processor" description="Number of cycles per frame the GP2 vertex shader command processor was active. This includes time waiting for semaphores."/>
-    <event event="0x17" title="Mali GPU Vertex Processor" name="Active cycles, PLBU command processor" description="Number of cycles per frame the MaliGP2 PLBU command processor was active. This includes time waiting for semaphores."/>
-    <event event="0x18" title="Mali GPU Vertex Processor" name="MaliGP2 PLBU cycles per frame" description="Number of cycles per frame the MaliGP2 PLBU output unit was active. This includes time spent waiting on the bus."/>
-    <event event="0x19" title="Mali GPU Vertex Processor" name="Active cycles, PLBU geometry processing" description="Number of cycles per frame the MaliGP2 PLBU was active, excepting final data output. In other words: active cycles through the prepare list commands. This includes time spent waiting on the bus."/>
-    <event event="0x1b" title="Mali GPU Vertex Processor" name="Active cycles, PLBU primitive assembly" description="Number of active cycles per frame spent by the MaliGP2 PLBU doing primitive assembly. This does not include scissoring or final output. This includes time spent waiting on the bus."/>
-    <event event="0x1c" title="Mali GPU Vertex Processor" name="Active cycles, PLBU vertex fetcher" description="Number of active cycles per frame spent by the MaliGP2 PLBU fetching vertex data. This includes time spent waiting on the bus."/>
-    <event event="0x1e" title="Mali GPU Vertex Processor" name="Active cycles, Bounding-box and command generator" description="Number of active cycles per frame spent by the MaliGP2 PLBU setting up bounding boxes and commands (mainly graphics primitives). This includes time spent waiting on the bus."/>
-    <event event="0x20" title="Mali GPU Vertex Processor" name="Active cycles, Scissor tile iterator" description="Number of active cycles per frame spent by the MaliGP2 PLBU iterating over tiles to perform scissoring. This includes time spent waiting on the bus."/>
-    <event event="0x21" title="Mali GPU Vertex Processor" name="Active cycles, PLBU tile iterator" description="Number of active cycles per frame spent by the MaliGP2 PLBU iterating over the tiles in the bounding box generating commands (mainly graphics primitives). This includes time spent waiting on the bus."/>
+  <category name="Mali Vertex Processor" counter_set="ARM_Mali-4xx_VP_0_cnt" per_cpu="no">
+    <event event="0x01" title="Mali-4xx VP" name="Active cycles" description="Number of cycles per frame the MaliGP2 was active."/>
+    <event event="0x02" title="Mali-4xx VP" name="Active cycles, vertex shader" description="Number of cycles per frame the vertex shader unit was active."/>
+    <event event="0x03" title="Mali-4xx VP" name="Active cycles, vertex storer" description="Number of cycles per frame the vertex storer unit was active."/>
+    <event event="0x04" title="Mali-4xx VP" name="Active cycles, vertex loader" description="Number of cycles per frame the vertex loader unit was active."/>
+    <event event="0x05" title="Mali-4xx VP" name="Cycles vertex loader waiting for vertex shader" description="Number of cycles per frame the vertex loader was idle while waiting on the vertex shader."/>
+    <event event="0x06" title="Mali-4xx VP" name="Words read, system bus" description="Total number of 64 bit words read by the GP2 from the system bus per frame."/>
+    <event event="0x07" title="Mali-4xx VP" name="Words written, system bus" description="Total number of 64 bit words written by the GP2 to the system bus per frame."/>
+    <event event="0x08" title="Mali-4xx VP" name="Read bursts, system bus" description="Number of read bursts by the GP2 from the system bus per frame."/>
+    <event event="0x09" title="Mali-4xx VP" name="Write bursts, system bus" description="Number of write bursts from the MaliGP2 to the system bus per frame."/>
+    <event event="0x0a" title="Mali-4xx VP" name="Vertices processed" description="Number of vertices processed by the MaliGP2 per frame."/>
+    <event event="0x0b" title="Mali-4xx VP" name="Vertices fetched" description="Number of vertices fetched by the MaliGP2 per frame."/>
+    <event event="0x0c" title="Mali-4xx VP" name="Primitives fetched" description="Number of graphics primitives fetched by the MaliGP2 per frame."/>
+    <event event="0x0e" title="Mali-4xx VP" name="Primitives culled" description="Number of graphics primitives discarded per frame, because they were seen from the back or were offscreen."/>
+    <event event="0x0f" title="Mali-4xx VP" name="Commands written to tiles" description="Number of commands (8 Bytes, mainly primitives) written by GP2 to the PP input data structure per frame."/>
+    <event event="0x10" title="Mali-4xx VP" name="Memory blocks allocated" description="Number of overflow data blocks needed for outputting the PP input data structure per frame ."/>
+    <event event="0x13" title="Mali-4xx VP" name="Vertex loader cache misses" description="Number of cache misses for the vertex shader's vertex input unit per frame."/>
+    <event event="0x16" title="Mali-4xx VP" name="Active cycles, vertex shader command processor" description="Number of cycles per frame the GP2 vertex shader command processor was active. This includes time waiting for semaphores."/>
+    <event event="0x17" title="Mali-4xx VP" name="Active cycles, PLBU command processor" description="Number of cycles per frame the MaliGP2 PLBU command processor was active. This includes time waiting for semaphores."/>
+    <event event="0x18" title="Mali-4xx VP" name="Active Cycles, PLBU list writer" description="Number of cycles per frame the MaliGP2 PLBU output unit was active. This includes time spent waiting on the bus."/>
+    <event event="0x19" title="Mali-4xx VP" name="Active cycles, PLBU geometry processing" description="Number of cycles per frame the MaliGP2 PLBU was active, excepting final data output. In other words: active cycles through the prepare list commands. This includes time spent waiting on the bus."/>
+    <event event="0x1b" title="Mali-4xx VP" name="Active cycles, PLBU primitive assembly" description="Number of active cycles per frame spent by the MaliGP2 PLBU doing primitive assembly. This does not include scissoring or final output. This includes time spent waiting on the bus."/>
+    <event event="0x1c" title="Mali-4xx VP" name="Active cycles, PLBU vertex fetcher" description="Number of active cycles per frame spent by the MaliGP2 PLBU fetching vertex data. This includes time spent waiting on the bus."/>
+    <event event="0x1e" title="Mali-4xx VP" name="Active cycles, Bounding-box and command generator" description="Number of active cycles per frame spent by the MaliGP2 PLBU setting up bounding boxes and commands (mainly graphics primitives). This includes time spent waiting on the bus."/>
+    <event event="0x20" title="Mali-4xx VP" name="Active cycles, Scissor tile iterator" description="Number of active cycles per frame spent by the MaliGP2 PLBU iterating over tiles to perform scissoring. This includes time spent waiting on the bus."/>
+    <event event="0x21" title="Mali-4xx VP" name="Active cycles, PLBU tile iterator" description="Number of active cycles per frame spent by the MaliGP2 PLBU iterating over the tiles in the bounding box generating commands (mainly graphics primitives). This includes time spent waiting on the bus."/>
   </category>
-  <category name="Mali GPU Fragment Processor" per_cpu="no">
+  <category name="Mali Fragment Processor" per_cpu="no">
     <counter_set name="ARM_Mali-4xx_FP_0_cnt" title="Mali-4xx FP0" description="Mali GPU Fragment Processor 0" count="2"/>
     <counter_set name="ARM_Mali-4xx_FP_1_cnt" title="Mali-4xx FP1" description="Mali GPU Fragment Processor 1" count="2"/>
     <counter_set name="ARM_Mali-4xx_FP_2_cnt" title="Mali-4xx FP2" description="Mali GPU Fragment Processor 2" count="2"/>
@@ -37,7 +36,6 @@
     <counter_set name="ARM_Mali-4xx_FP_5_cnt" title="Mali-4xx FP5" description="Mali GPU Fragment Processor 5" count="2"/>
     <counter_set name="ARM_Mali-4xx_FP_6_cnt" title="Mali-4xx FP6" description="Mali GPU Fragment Processor 6" count="2"/>
     <counter_set name="ARM_Mali-4xx_FP_7_cnt" title="Mali-4xx FP7" description="Mali GPU Fragment Processor 7" count="2"/>
-
     <event event="0x00" title="Mali-4xx FP" name="Active clock cycles" description="Active clock cycles, between polygon start and IRQ."/>
     <event event="0x02" title="Mali-4xx FP" name="Total bus reads" description="Total number of 64-bit words read from the bus."/>
     <event event="0x03" title="Mali-4xx FP" name="Total bus writes" description="Total number of 64-bit words written to the bus."/>
@@ -96,11 +94,10 @@
     <event event="0x3c" title="Mali-4xx FP" name="Program cache hit count" description="Number of hits in the program cache."/>
     <event event="0x3d" title="Mali-4xx FP" name="Program cache miss count" description="Number of misses in the program cache."/>
   </category>
-  <counter_set name="ARM_Mali-4xx_L2_0_cnt" title="Mali-4xx L2 0" description="Mali GPU L2 Cache Core 0" count="2"/>
-  <category name="Mali-4xx-L2_0" counter_set="ARM_Mali-4xx_L2_0_cnt" per_cpu="no">
+  <counter_set name="ARM_Mali-4xx_L2_0_cnt" title="Mali-4xx L2" description="Mali GPU L2 Cache Core 0" count="2"/>
+  <category name="Mali-4xx L2" counter_set="ARM_Mali-4xx_L2_0_cnt" per_cpu="no">
     <event event="0x01" title="Mali L2 Cache" name="Total clock cycles" description="Total clock cycles"/>
     <event event="0x02" title="Mali L2 Cache" name="Active clock cycles" description="Active clock cycles"/>
-
     <option_set name="All">
       <option event_delta="0x08" name="Master" description="Master"/>
       <option event_delta="0x10" name="All slaves" description="All slaves"/>
@@ -110,7 +107,6 @@
       <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
       <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
     </option_set>
-
     <option_set name="Slaves">
       <option event_delta="0x10" name="All slaves" description="All slaves"/>
       <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
@@ -119,7 +115,6 @@
       <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
       <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
     </option_set>
-
     <event event="0x00" option_set="All" title="Mali L2 Cache" name="Read transactions" description="Read transactions"/>
     <event event="0x01" option_set="All" title="Mali L2 Cache" name="Write transactions" description="Write transactions"/>
     <event event="0x02" option_set="All" title="Mali L2 Cache" name="Words read" description="Words read"/>
@@ -131,10 +126,9 @@
     <event event="0x08" option_set="Slaves" title="Mali L2 Cache" name="Cacheable read transactions" description="Cacheable read transactions"/>
   </category>
   <counter_set name="ARM_Mali-4xx_L2_1_cnt" title="Mali-4xx L2 1" description="Mali GPU L2 Cache Core 1" count="2"/>
-  <category name="Mali-4xx-L2_1" counter_set="ARM_Mali-4xx_L2_1_cnt" per_cpu="no">
-    <event event="0x01" title="Mali L2 Cache" name="Total clock cycles" description="Total clock cycles"/>
-    <event event="0x02" title="Mali L2 Cache" name="Active clock cycles" description="Active clock cycles"/>
-
+  <category name="Mali-4xx L2_1" counter_set="ARM_Mali-4xx_L2_1_cnt" per_cpu="no">
+    <event event="0x01" title="Mali L2 Cache 1" name="Total clock cycles" description="Total clock cycles"/>
+    <event event="0x02" title="Mali L2 Cache 1" name="Active clock cycles" description="Active clock cycles"/>
     <option_set name="All">
       <option event_delta="0x08" name="Master" description="Master"/>
       <option event_delta="0x10" name="All slaves" description="All slaves"/>
@@ -144,7 +138,6 @@
       <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
       <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
     </option_set>
-
     <option_set name="Slaves">
       <option event_delta="0x10" name="All slaves" description="All slaves"/>
       <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
@@ -153,22 +146,20 @@
       <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
       <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
     </option_set>
-
-    <event event="0x00" option_set="All" title="Mali L2 Cache" name="Read transactions" description="Read transactions"/>
-    <event event="0x01" option_set="All" title="Mali L2 Cache" name="Write transactions" description="Write transactions"/>
-    <event event="0x02" option_set="All" title="Mali L2 Cache" name="Words read" description="Words read"/>
-    <event event="0x03" option_set="All" title="Mali L2 Cache" name="Words written" description="Words written"/>
-    <event event="0x04" option_set="Slaves" title="Mali L2 Cache" name="Read hits" description="Read hits"/>
-    <event event="0x05" option_set="Slaves" title="Mali L2 Cache" name="Read misses" description="Read misses"/>
-    <event event="0x06" option_set="Slaves" title="Mali L2 Cache" name="Write invalidates" description="Write invalidates"/>
-    <event event="0x07" option_set="Slaves" title="Mali L2 Cache" name="Read invalidates" description="Read invalidates"/>
-    <event event="0x08" option_set="Slaves" title="Mali L2 Cache" name="Cacheable read transactions" description="Cacheable read transactions"/>
+    <event event="0x00" option_set="All" title="Mali L2 Cache 1" name="Read transactions" description="Read transactions"/>
+    <event event="0x01" option_set="All" title="Mali L2 Cache 1" name="Write transactions" description="Write transactions"/>
+    <event event="0x02" option_set="All" title="Mali L2 Cache 1" name="Words read" description="Words read"/>
+    <event event="0x03" option_set="All" title="Mali L2 Cache 1" name="Words written" description="Words written"/>
+    <event event="0x04" option_set="Slaves" title="Mali L2 Cache 1" name="Read hits" description="Read hits"/>
+    <event event="0x05" option_set="Slaves" title="Mali L2 Cache 1" name="Read misses" description="Read misses"/>
+    <event event="0x06" option_set="Slaves" title="Mali L2 Cache 1" name="Write invalidates" description="Write invalidates"/>
+    <event event="0x07" option_set="Slaves" title="Mali L2 Cache 1" name="Read invalidates" description="Read invalidates"/>
+    <event event="0x08" option_set="Slaves" title="Mali L2 Cache 1" name="Cacheable read transactions" description="Cacheable read transactions"/>
   </category>
   <counter_set name="ARM_Mali-4xx_L2_2_cnt" title="Mali-4xx L2 2" description="Mali GPU L2 Cache Core 2" count="2"/>
-  <category name="Mali-4xx-L2_2" counter_set="ARM_Mali-4xx_L2_2_cnt" per_cpu="no">
-    <event event="0x01" title="Mali L2 Cache" name="Total clock cycles" description="Total clock cycles"/>
-    <event event="0x02" title="Mali L2 Cache" name="Active clock cycles" description="Active clock cycles"/>
-
+  <category name="Mali-4xx L2_2" counter_set="ARM_Mali-4xx_L2_2_cnt" per_cpu="no">
+    <event event="0x01" title="Mali L2 Cache 2" name="Total clock cycles" description="Total clock cycles"/>
+    <event event="0x02" title="Mali L2 Cache 2" name="Active clock cycles" description="Active clock cycles"/>
     <option_set name="All">
       <option event_delta="0x08" name="Master" description="Master"/>
       <option event_delta="0x10" name="All slaves" description="All slaves"/>
@@ -178,7 +169,6 @@
       <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
       <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
     </option_set>
-
     <option_set name="Slaves">
       <option event_delta="0x10" name="All slaves" description="All slaves"/>
       <option event_delta="0x20" name="Slave 0" description="Slave 0"/>
@@ -187,18 +177,18 @@
       <option event_delta="0x50" name="Slave 3" description="Slave 3"/>
       <option event_delta="0x60" name="Slave 4" description="Slave 4"/>
     </option_set>
-
-    <event event="0x00" option_set="All" title="Mali L2 Cache" name="Read transactions" description="Read transactions"/>
-    <event event="0x01" option_set="All" title="Mali L2 Cache" name="Write transactions" description="Write transactions"/>
-    <event event="0x02" option_set="All" title="Mali L2 Cache" name="Words read" description="Words read"/>
-    <event event="0x03" option_set="All" title="Mali L2 Cache" name="Words written" description="Words written"/>
-    <event event="0x04" option_set="Slaves" title="Mali L2 Cache" name="Read hits" description="Read hits"/>
-    <event event="0x05" option_set="Slaves" title="Mali L2 Cache" name="Read misses" description="Read misses"/>
-    <event event="0x06" option_set="Slaves" title="Mali L2 Cache" name="Write invalidates" description="Write invalidates"/>
-    <event event="0x07" option_set="Slaves" title="Mali L2 Cache" name="Read invalidates" description="Read invalidates"/>
-    <event event="0x08" option_set="Slaves" title="Mali L2 Cache" name="Cacheable read transactions" description="Cacheable read transactions"/>
+    <event event="0x00" option_set="All" title="Mali L2 Cache 2" name="Read transactions" description="Read transactions"/>
+    <event event="0x01" option_set="All" title="Mali L2 Cache 2" name="Write transactions" description="Write transactions"/>
+    <event event="0x02" option_set="All" title="Mali L2 Cache 2" name="Words read" description="Words read"/>
+    <event event="0x03" option_set="All" title="Mali L2 Cache 2" name="Words written" description="Words written"/>
+    <event event="0x04" option_set="Slaves" title="Mali L2 Cache 2" name="Read hits" description="Read hits"/>
+    <event event="0x05" option_set="Slaves" title="Mali L2 Cache 2" name="Read misses" description="Read misses"/>
+    <event event="0x06" option_set="Slaves" title="Mali L2 Cache 2" name="Write invalidates" description="Write invalidates"/>
+    <event event="0x07" option_set="Slaves" title="Mali L2 Cache 2" name="Read invalidates" description="Read invalidates"/>
+    <event event="0x08" option_set="Slaves" title="Mali L2 Cache 2" name="Cacheable read transactions" description="Cacheable read transactions"/>
   </category>
-  <category name="ARM Mali-4xx Filmstrip" counter_set="ARM_Mali-4xx_Filmstrip_cnt" per_cpu="no">
+  <counter_set name="ARM_Mali-4xx_Filmstrip_cnt" count="1"/>
+  <category name="Mali-4xx Filmstrip" counter_set="ARM_Mali-4xx_Filmstrip_cnt" per_cpu="no">
     <option_set name="fs">
       <option event_delta="0x3c" name="1:60" description="captures every 60th frame"/>
       <option event_delta="0x1e" name="1:30" description="captures every 30th frame"/>
@@ -212,7 +202,11 @@
   <category name="ARM_Mali-4xx_Frequency" per_cpu="no">
     <event counter="ARM_Mali-4xx_Frequency" title="Mali GPU Frequency" name="Frequency" display="average" average_selection="yes" units="MHz" description="GPU core frequency."/>
   </category>
-  <category name="Mali-4xx-SW" counter_set="ARM_Mali-4xx_SW_cnt" per_cpu="no">
+  <category name="Mali-4xx Activity" counter_set="ARM_Mali-4xx_Activity_cnt">
+    <event counter="ARM_Mali-4xx_fragment" title="GPU Fragment" name="Activity" class="activity" activity1="Activity" activity_color1="0x00006fcc" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" description="GPU Fragment Activity"/>
+    <event counter="ARM_Mali-4xx_vertex" title="GPU Vertex" name="Activity" class="activity" activity1="Activity" activity_color1="0x00eda000" rendering_type="bar" average_selection="yes" percentage="yes" description="GPU Vertex Activity"/>
+  </category>
+  <category name="Mali-4xx Software Counters" counter_set="ARM_Mali-4xx_SW_cnt" per_cpu="no">
     <!-- EGL Counters -->
     <event counter="ARM_Mali-4xx_SW_0" title="Mali EGL Software Counters" name="Blit Time" description="Time spent blitting the framebuffer from video memory to framebuffer."/>
     <!-- glDrawElements Counters -->
diff --git a/tools/gator/daemon/events-Mali-T6xx.xml b/tools/gator/daemon/events-Mali-T6xx.xml
index ec9ca006f85f..5e8979704870 100644
--- a/tools/gator/daemon/events-Mali-T6xx.xml
+++ b/tools/gator/daemon/events-Mali-T6xx.xml
@@ -1,9 +1,7 @@
-
-  <category name="Mali-T6xx-SW-counters" per_cpu="no">
+  <category name="Mali-T6xx Software Counters" per_cpu="no">
     <event counter="ARM_Mali-T6xx_TOTAL_ALLOC_PAGES" title="Mali Total Alloc Pages" name="Total number of allocated pages" description="Mali total number of allocated pages."/>
   </category>
-
-  <category name="Mali-T6xx-PMShader" per_cpu="no">
+  <category name="Mali-T6xx PM Shader" per_cpu="no">
     <event counter="ARM_Mali-T6xx_PM_SHADER_0" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 0" description="Mali PM Shader: PM Shader Core 0."/>
     <event counter="ARM_Mali-T6xx_PM_SHADER_1" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 1" description="Mali PM Shader: PM Shader Core 1."/>
     <event counter="ARM_Mali-T6xx_PM_SHADER_2" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 2" description="Mali PM Shader: PM Shader Core 2."/>
@@ -13,32 +11,27 @@
     <event counter="ARM_Mali-T6xx_PM_SHADER_6" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 6" description="Mali PM Shader: PM Shader Core 6."/>
     <event counter="ARM_Mali-T6xx_PM_SHADER_7" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 7" description="Mali PM Shader: PM Shader Core 7."/>
   </category>
-
-  <category name="Mali-T6xx-PMTiler" per_cpu="no">
+  <category name="Mali-T6xx PM Tiler" per_cpu="no">
     <event counter="ARM_Mali-T6xx_PM_TILER_0" display="average" average_selection="yes" percentage="yes" title="Mali PM Tiler" name="PM Tiler Core 0" description="Mali PM Tiler: PM Tiler Core 0."/>
   </category>
-
-  <category name="Mali-T6xx-PML2" per_cpu="no">
+  <category name="Mali-T6xx PM L2" per_cpu="no">
     <event counter="ARM_Mali-T6xx_PM_L2_0" display="average" average_selection="yes" percentage="yes" title="Mali PM L2" name="PM L2 Core 0" description="Mali PM L2: PM L2 Core 0."/>
     <event counter="ARM_Mali-T6xx_PM_L2_1" display="average" average_selection="yes" percentage="yes" title="Mali PM L2" name="PM L2 Core 1" description="Mali PM L2: PM L2 Core 1."/>
   </category>
-
-  <category name="Mali-T6xx-MMU_AS" per_cpu="no">
+  <category name="Mali-T6xx MMU Address Space" per_cpu="no">
     <event counter="ARM_Mali-T6xx_MMU_AS_0" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 0" description="Mali MMU Address Space 0 usage."/>
     <event counter="ARM_Mali-T6xx_MMU_AS_1" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 1" description="Mali MMU Address Space 1 usage."/>
     <event counter="ARM_Mali-T6xx_MMU_AS_2" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 2" description="Mali MMU Address Space 2 usage."/>
     <event counter="ARM_Mali-T6xx_MMU_AS_3" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 3" description="Mali MMU Address Space 3 usage."/>
   </category>
-
-  <category name="Mali-T6xx-MMU_page_fault" per_cpu="no">
+  <category name="Mali-T6xx MMU Page Fault" per_cpu="no">
     <event counter="ARM_Mali-T6xx_MMU_PAGE_FAULT_0" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 0" description="Reports the number of newly allocated pages after a MMU page fault in address space 0."/>
     <event counter="ARM_Mali-T6xx_MMU_PAGE_FAULT_1" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 1" description="Reports the number of newly allocated pages after a MMU page fault in address space 1."/>
     <event counter="ARM_Mali-T6xx_MMU_PAGE_FAULT_2" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 2" description="Reports the number of newly allocated pages after a MMU page fault in address space 2."/>
     <event counter="ARM_Mali-T6xx_MMU_PAGE_FAULT_3" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 3" description="Reports the number of newly allocated pages after a MMU page fault in address space 3."/>
   </category>
-
   <counter_set name="ARM_Mali-T6xx_Filmstrip_cnt" count="1"/>
-  <category name="ARM Mali-T6xx Filmstrip" counter_set="ARM_Mali-T6xx_Filmstrip_cnt" per_cpu="no">
+  <category name="Mali-T6xx Filmstrip" counter_set="ARM_Mali-T6xx_Filmstrip_cnt" per_cpu="no">
     <option_set name="fs">
       <option event_delta="0x3c" name="1:60" description="captures every 60th frame"/>
       <option event_delta="0x1e" name="1:30" description="captures every 30th frame"/>
@@ -46,3 +39,8 @@
     </option_set>
     <event event="0x0400" option_set="fs" title="ARM Mali-T6xx" name="Filmstrip" description="Scaled framebuffer"/>
   </category>
+  <category name="Mali-T6xx Activity" per_cpu="no">
+    <event counter="ARM_Mali-T6xx_fragment" title="GPU Fragment" name="Activity" class="activity" activity1="Activity" activity_color1="0x00006fcc" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 0 Activity"/>
+    <event counter="ARM_Mali-T6xx_vertex" title="GPU Vertex-Tiling-Compute" name="Activity" class="activity" activity1="Activity" activity_color1="0x00eda000" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 1 Activity"/>
+    <event counter="ARM_Mali-T6xx_opencl" title="GPU Vertex-Compute" name="Activity" class="activity" activity1="Activity" activity_color1="0x00ef022f" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 2 Activity"/>
+  </category>
diff --git a/tools/gator/daemon/events-Mali-T6xx_hw.xml b/tools/gator/daemon/events-Mali-T6xx_hw.xml
index 03566cbb06ab..df2796262473 100644
--- a/tools/gator/daemon/events-Mali-T6xx_hw.xml
+++ b/tools/gator/daemon/events-Mali-T6xx_hw.xml
@@ -1,35 +1,27 @@
-
-  <category name="Mali-T6xx-JobManager" per_cpu="no">
-
+  <category name="Mali-T6xx Job Manager" per_cpu="no">
     <event counter="ARM_Mali-T6xx_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles the GPU was active"/>
     <event counter="ARM_Mali-T6xx_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles the GPU had a pending interrupt"/>
     <event counter="ARM_Mali-T6xx_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) was active"/>
     <event counter="ARM_Mali-T6xx_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) was active"/>
     <event counter="ARM_Mali-T6xx_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) was active"/>
-
     <event counter="ARM_Mali-T6xx_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
     <event counter="ARM_Mali-T6xx_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
     <event counter="ARM_Mali-T6xx_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
     <event counter="ARM_Mali-T6xx_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
     <event counter="ARM_Mali-T6xx_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
     <event counter="ARM_Mali-T6xx_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
-
   </category>
-
-  <category name="Mali-T6xx-Tiler" per_cpu="no">
-
+  <category name="Mali-T6xx Tiler" per_cpu="no">
     <event counter="ARM_Mali-T6xx_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
     <event counter="ARM_Mali-T6xx_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
     <event counter="ARM_Mali-T6xx_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
     <event counter="ARM_Mali-T6xx_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
     <event counter="ARM_Mali-T6xx_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
-
     <event counter="ARM_Mali-T6xx_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
     <event counter="ARM_Mali-T6xx_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
     <event counter="ARM_Mali-T6xx_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
     <event counter="ARM_Mali-T6xx_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
     <event counter="ARM_Mali-T6xx_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
-
     <event counter="ARM_Mali-T6xx_LEVEL0" title="Mali Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0"/>
     <event counter="ARM_Mali-T6xx_LEVEL1" title="Mali Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1"/>
     <event counter="ARM_Mali-T6xx_LEVEL2" title="Mali Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2"/>
@@ -38,7 +30,6 @@
     <event counter="ARM_Mali-T6xx_LEVEL5" title="Mali Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5"/>
     <event counter="ARM_Mali-T6xx_LEVEL6" title="Mali Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6"/>
     <event counter="ARM_Mali-T6xx_LEVEL7" title="Mali Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7"/>
-
     <event counter="ARM_Mali-T6xx_COMMAND_1" title="Mali Tiler Commands" name="Prims in 1 command" description="Number of primitives producing 1 command"/>
     <event counter="ARM_Mali-T6xx_COMMAND_2" title="Mali Tiler Commands" name="Prims in 2 command" description="Number of primitives producing 2 commands"/>
     <event counter="ARM_Mali-T6xx_COMMAND_3" title="Mali Tiler Commands" name="Prims in 3 command" description="Number of primitives producing 3 commands"/>
@@ -48,48 +39,36 @@
     <event counter="ARM_Mali-T6xx_COMMAND_8_15" title="Mali Tiler Commands" name="Prims in 8-15 commands" description="Number of primitives producing 8-15 commands"/>
     <event counter="ARM_Mali-T6xx_COMMAND_16_63" title="Mali Tiler Commands" name="Prims in 16-63 commands" description="Number of primitives producing 16-63 commands"/>
     <event counter="ARM_Mali-T6xx_COMMAND_64" title="Mali Tiler Commands" name="Prims in &gt;= 64 commands" description="Number of primitives producing &gt;= 64 commands"/>
-
   </category>
-
-  <category name="Mali-T6xx-ShaderCore" per_cpu="no">
-
+  <category name="Mali-T6xx Shader Core" per_cpu="no">
     <event counter="ARM_Mali-T6xx_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles the Tripipe was active"/>
     <event counter="ARM_Mali-T6xx_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
     <event counter="ARM_Mali-T6xx_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
     <event counter="ARM_Mali-T6xx_FRAG_CYCLE_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
-
     <event counter="ARM_Mali-T6xx_FRAG_THREADS" title="Mali Core Threads" name="Fragment threads" description="Number of fragment threads started"/>
     <event counter="ARM_Mali-T6xx_FRAG_DUMMY_THREADS" title="Mali Core Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
     <event counter="ARM_Mali-T6xx_FRAG_QUADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS" description="Number of threads doing late ZS test"/>
     <event counter="ARM_Mali-T6xx_FRAG_QUADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS" description="Number of threads killed by late ZS test"/>
     <event counter="ARM_Mali-T6xx_FRAG_THREADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS" description="Number of threads doing late ZS test"/>
     <event counter="ARM_Mali-T6xx_FRAG_THREADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS" description="Number of threads killed by late ZS test"/>
-
     <event counter="ARM_Mali-T6xx_COMPUTE_TASKS" title="Mali Compute Threads" name="Compute tasks" description="Number of compute tasks"/>
     <event counter="ARM_Mali-T6xx_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads started" description="Number of compute threads started"/>
     <event counter="ARM_Mali-T6xx_COMPUTE_CYCLES_DESC" title="Mali Compute Threads" name="Compute cycles awaiting descriptors" description="Number of compute cycles spent waiting for descriptors"/>
-
     <event counter="ARM_Mali-T6xx_FRAG_PRIMATIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
     <event counter="ARM_Mali-T6xx_FRAG_PRIMATIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
     <event counter="ARM_Mali-T6xx_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
     <event counter="ARM_Mali-T6xx_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
-
     <event counter="ARM_Mali-T6xx_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
     <event counter="ARM_Mali-T6xx_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
     <event counter="ARM_Mali-T6xx_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
-
     <event counter="ARM_Mali-T6xx_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
     <event counter="ARM_Mali-T6xx_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
-
     <event counter="ARM_Mali-T6xx_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)"/>
-
     <event counter="ARM_Mali-T6xx_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
     <event counter="ARM_Mali-T6xx_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
-
     <event counter="ARM_Mali-T6xx_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
     <event counter="ARM_Mali-T6xx_TEX_THREADS" title="Mali Texture Pipe" name="T instruction issues" description="Number of instructions issused to the T-pipe, including restarts"/>
     <event counter="ARM_Mali-T6xx_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
-
     <event counter="ARM_Mali-T6xx_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
     <event counter="ARM_Mali-T6xx_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
     <event counter="ARM_Mali-T6xx_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
@@ -99,11 +78,8 @@
     <event counter="ARM_Mali-T6xx_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
     <event counter="ARM_Mali-T6xx_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
     <event counter="ARM_Mali-T6xx_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
-
   </category>
-
-  <category name="Mali-T6xx-L2AndMMU" per_cpu="no">
-
+  <category name="Mali-T6xx L2 and MMU" per_cpu="no">
     <event counter="ARM_Mali-T6xx_L2_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
     <event counter="ARM_Mali-T6xx_L2_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
     <event counter="ARM_Mali-T6xx_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
@@ -112,5 +88,4 @@
     <event counter="ARM_Mali-T6xx_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
     <event counter="ARM_Mali-T6xx_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
     <event counter="ARM_Mali-T6xx_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
-
   </category>
diff --git a/tools/gator/daemon/events-Mali-V500.xml b/tools/gator/daemon/events-Mali-V500.xml
new file mode 100644
index 000000000000..d2751e7239b5
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-V500.xml
@@ -0,0 +1,29 @@
+  <category name="Mali-V500">
+    <event counter="ARM_Mali-V500_cnt0" title="Mali Video Engine" name="Samples" class="absolute" description="The number of times we have taken a sample"/>
+    <event counter="ARM_Mali-V500_cnt1" title="Mali Video Engine" name="Queued input-buffers" class="absolute" description="The number of input-buffers that has been queued for consumption by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt2" title="Mali Video Engine" name="Consumed input-buffers" class="absolute" description="The number of input-buffers that has been consumed by the MVE and returned to the application"/>
+    <event counter="ARM_Mali-V500_cnt3" title="Mali Video Engine" name="Queued output-buffers" class="absolute" description="The number of output-buffers that has been queued for usage by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt4" title="Mali Video Engine" name="Consumed output-buffers" class="absolute" description="The number of output-buffers that has been consumed by the MVE and returned to the application"/>
+    <event counter="ARM_Mali-V500_cnt5" title="Mali Video Engine" name="Created Sessions" class="absolute" description="The number of created sessions throughout the lifetime of the process"/>
+    <event counter="ARM_Mali-V500_cnt6" title="Mali Video Engine" name="Active Sessions" description="The number of currently existing sessions"/>
+    <event counter="ARM_Mali-V500_cnt7" title="Mali Video Engine" name="Processed Frames" class="absolute" description="The number of processed frames. A processed frame is one where the encode or decode is complete for that particular frame. Frames can be processed out of order so this is not the same as the number of output-buffers returned"/>
+    <event counter="ARM_Mali-V500_cnt8" title="Mali Video Engine" name="Input Flushes Requested" class="absolute" description="The number of requested flushes of the input queue"/>
+    <event counter="ARM_Mali-V500_cnt9" title="Mali Video Engine" name="Input Flushes Complete" class="absolute" description="The number of completed flushes of the input queue"/>
+    <event counter="ARM_Mali-V500_cnt10" title="Mali Video Engine" name="Output Flushes Requested" class="absolute" description="The number of requested flushes of the output queue"/>
+    <event counter="ARM_Mali-V500_cnt11" title="Mali Video Engine" name="Output Flushes Complete" class="absolute" description="The number of completed flushes of the output queue"/>
+    <event counter="ARM_Mali-V500_cnt12" title="Mali Video Engine" name="Queued Output Buffers (current)" description="The number of output-buffers that are currently queued for usage by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt13" title="Mali Video Engine" name="Queued Input Buffers (current)" description="The number of input-buffers that are currently queued for consumption by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt14" title="Mali Video Engine" name="Output Queue Flushes" description="The number of pending flushes for the MVE output-queue"/>
+    <event counter="ARM_Mali-V500_cnt15" title="Mali Video Engine" name="Input Queue Flushes" description="The number of pending flushes for the MVE input-queue"/>
+    <event counter="ARM_Mali-V500_cnt16" title="Mali Video Engine" name="Errors encountered" class="absolute" description="The number of errors encountered"/>
+    <event counter="ARM_Mali-V500_cnt17" title="Mali Video Engine" name="Bits consumed" class="absolute" description="The number of bits consumed during decode"/>
+    <event counter="ARM_Mali-V500_cnt18" title="Mali Video Engine" name="AFBC bandwidth" class="absolute" description="The amount of AFBC-encoded bytes read or written"/>
+    <event counter="ARM_Mali-V500_cnt19" title="Mali Video Engine" name="Bandwidth (read)" class="absolute" description="The amount of bytes read over the AXI bus"/>
+    <event counter="ARM_Mali-V500_cnt20" title="Mali Video Engine" name="Bandwidth (write)" class="absolute" description="The amount of bytes written over the AXI bus"/>
+    <event counter="ARM_Mali-V500_evn0" title="Mali Video Engine" name="Session created" description="Generated when a session has been created"/>
+    <event counter="ARM_Mali-V500_evn1" title="Mali Video Engine" name="Session destroyed" description="Generated when a session has been destroyed"/>
+    <event counter="ARM_Mali-V500_evn2" title="Mali Video Engine" name="Frame Processed" description="Generated when the MVE has finished processing a frame"/>
+    <event counter="ARM_Mali-V500_evn3" title="Mali Video Engine" name="Output buffer received" description="Generated when an an output buffer is returned to us from the MVE"/>
+    <event counter="ARM_Mali-V500_evn4" title="Mali Video Engine" name="Input buffer received" description="Generated when we an input buffer is returned to us from the MVE"/>
+    <!--event counter="ARM_Mali-V500_act" title="VPU" name="Activity" class="activity" activity1="Parsed" activity_color1="0x000000ff" activity2="Piped" activity_color2="0x0000ff00" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" cores="8" description="Mali-V500 Activity"/-->
+  </category>
diff --git a/tools/gator/daemon/main.cpp b/tools/gator/daemon/main.cpp
index 1275aef1cb79..2998c7012221 100644
--- a/tools/gator/daemon/main.cpp
+++ b/tools/gator/daemon/main.cpp
@@ -20,8 +20,10 @@
 #include <unistd.h>
 
 #include "Child.h"
+#include "EventsXML.h"
 #include "KMod.h"
 #include "Logging.h"
+#include "Monitor.h"
 #include "OlySocket.h"
 #include "OlyUtility.h"
 #include "SessionData.h"
@@ -31,8 +33,9 @@
 extern Child* child;
 static int shutdownFilesystem();
 static pthread_mutex_t numSessions_mutex;
-static int numSessions = 0;
 static OlyServerSocket* sock = NULL;
+static Monitor monitor;
+static int numSessions = 0;
 static bool driverRunningAtStart = false;
 static bool driverMountedAtStart = false;
 
@@ -102,42 +105,8 @@ static void child_exit(int) {
 	}
 }
 
-static int udpPort(int port) {
-	int s;
-	struct sockaddr_in6 sockaddr;
-	int on;
-	int family = AF_INET6;
-
-	s = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP);
-	if (s == -1) {
-		family = AF_INET;
-		s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
-		if (s == -1) {
-			logg->logError(__FILE__, __LINE__, "socket failed");
-			handleException();
-		}
-	}
-
-	on = 1;
-	if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on)) != 0) {
-		logg->logError(__FILE__, __LINE__, "setsockopt failed");
-		handleException();
-	}
-
-	memset((void*)&sockaddr, 0, sizeof(sockaddr));
-	sockaddr.sin6_family = family;
-	sockaddr.sin6_port = htons(port);
-	sockaddr.sin6_addr = in6addr_any;
-	if (bind(s, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
-		logg->logError(__FILE__, __LINE__, "socket failed");
-		handleException();
-	}
-
-	return s;
-}
-
-#define UDP_ANS_PORT 30000
-#define UDP_REQ_PORT 30001
+static const int UDP_ANS_PORT = 30000;
+static const int UDP_REQ_PORT = 30001;
 
 typedef struct {
 	char rviHeader[8];
@@ -149,50 +118,102 @@ typedef struct {
 	uint32_t ipAddress;
 	uint32_t defaultGateway;
 	uint32_t subnetMask;
-	uint32_t activeConnections; 
+	uint32_t activeConnections;
 } RVIConfigureInfo;
 
 static const char DST_REQ[] = { 'D', 'S', 'T', '_', 'R', 'E', 'Q', ' ', 0, 0, 0, 0x64 };
 
-static void* answerThread(void* pVoid) {
-	prctl(PR_SET_NAME, (unsigned long)&"gatord-discover", 0, 0, 0);
-	const struct cmdline_t * const cmdline = (struct cmdline_t *)pVoid;
-	RVIConfigureInfo dstAns;
-	int req = udpPort(UDP_REQ_PORT);
-	int ans = udpPort(UDP_ANS_PORT);
+class UdpListener {
+public:
+	UdpListener() : mDstAns(), mReq(-1), mAns(-1) {}
 
-	// Format the answer buffer
-	memset(&dstAns, 0, sizeof(dstAns));
-	memcpy(dstAns.rviHeader, "STR_ANS ", sizeof(dstAns.rviHeader));
-	if (gethostname(dstAns.dhcpName, sizeof(dstAns.dhcpName) - 1) != 0) {
-		logg->logError(__FILE__, __LINE__, "gethostname failed");
-		handleException();
-	}
-	// Subvert the defaultGateway field for the port number
-	if (cmdline->port != DEFAULT_PORT) {
-		dstAns.defaultGateway = cmdline->port;
-	}
-	// Subvert the subnetMask field for the protocol version
-	dstAns.subnetMask = PROTOCOL_VERSION;
+	void setup(int port) {
+		mReq = udpPort(UDP_REQ_PORT);
+		mAns = udpPort(UDP_ANS_PORT);
 
-	for (;;) {
+		// Format the answer buffer
+		memset(&mDstAns, 0, sizeof(mDstAns));
+		memcpy(mDstAns.rviHeader, "STR_ANS ", sizeof(mDstAns.rviHeader));
+		if (gethostname(mDstAns.dhcpName, sizeof(mDstAns.dhcpName) - 1) != 0) {
+			logg->logError(__FILE__, __LINE__, "gethostname failed");
+			handleException();
+		}
+		// Subvert the defaultGateway field for the port number
+		if (port != DEFAULT_PORT) {
+			mDstAns.defaultGateway = port;
+		}
+		// Subvert the subnetMask field for the protocol version
+		mDstAns.subnetMask = PROTOCOL_VERSION;
+	}
+
+	int getReq() const {
+		return mReq;
+	}
+
+	void handle() {
 		char buf[128];
 		struct sockaddr_in6 sockaddr;
 		socklen_t addrlen;
 		int read;
 		addrlen = sizeof(sockaddr);
-		read = recvfrom(req, &buf, sizeof(buf), 0, (struct sockaddr *)&sockaddr, &addrlen);
+		read = recvfrom(mReq, &buf, sizeof(buf), 0, (struct sockaddr *)&sockaddr, &addrlen);
 		if (read < 0) {
 			logg->logError(__FILE__, __LINE__, "recvfrom failed");
 			handleException();
 		} else if ((read == 12) && (memcmp(buf, DST_REQ, sizeof(DST_REQ)) == 0)) {
-			if (sendto(ans, &dstAns, sizeof(dstAns), 0, (struct sockaddr *)&sockaddr, addrlen) != sizeof(dstAns)) {
+			if (sendto(mAns, &mDstAns, sizeof(mDstAns), 0, (struct sockaddr *)&sockaddr, addrlen) != sizeof(mDstAns)) {
 				logg->logError(__FILE__, __LINE__, "sendto failed");
 				handleException();
 			}
 		}
 	}
-}
+
+	void close() {
+		::close(mReq);
+		::close(mAns);
+	}
+
+private:
+	int udpPort(int port) {
+		int s;
+		struct sockaddr_in6 sockaddr;
+		int on;
+		int family = AF_INET6;
+
+		s = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP);
+		if (s == -1) {
+			family = AF_INET;
+			s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+			if (s == -1) {
+				logg->logError(__FILE__, __LINE__, "socket failed");
+				handleException();
+			}
+		}
+
+		on = 1;
+		if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on)) != 0) {
+			logg->logError(__FILE__, __LINE__, "setsockopt failed");
+			handleException();
+		}
+
+		memset((void*)&sockaddr, 0, sizeof(sockaddr));
+		sockaddr.sin6_family = family;
+		sockaddr.sin6_port = htons(port);
+		sockaddr.sin6_addr = in6addr_any;
+		if (bind(s, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
+			logg->logError(__FILE__, __LINE__, "socket failed");
+			handleException();
+		}
+
+		return s;
+	}
+
+	RVIConfigureInfo mDstAns;
+	int mReq;
+	int mAns;
+};
+
+static UdpListener udpListener;
 
 // retval: -1 = failure; 0 = was already mounted; 1 = successfully mounted
 static int mountGatorFS() {
@@ -218,7 +239,7 @@ static bool init_module (const char * const location) {
 		if (fstat(fd, &st) == 0) {
 			void * const p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
 			if (p != MAP_FAILED) {
-				if (syscall(__NR_init_module, p, st.st_size, "") == 0) {
+				if (syscall(__NR_init_module, p, (unsigned long)st.st_size, "") == 0) {
 					ret = true;
 				}
 				munmap(p, st.st_size);
@@ -264,8 +285,14 @@ static bool setupFilesystem(char* module) {
 		}
 
 		if (access(location, F_OK) == -1) {
-			// The gator kernel is not already loaded and unable to locate gator.ko
-			return false;
+			if (module == NULL) {
+				// The gator kernel is not already loaded and unable to locate gator.ko in the default location
+				return false;
+			} else {
+				// gator location specified on the command line but it was not found
+				logg->logError(__FILE__, __LINE__, "gator module not found at %s", location);
+				handleException();
+			}
 		}
 
 		// Load driver
@@ -380,6 +407,45 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) {
 	return cmdline;
 }
 
+void handleClient() {
+	OlySocket client(sock->acceptConnection());
+
+	int pid = fork();
+	if (pid < 0) {
+		// Error
+		logg->logError(__FILE__, __LINE__, "Fork process failed. Please power cycle the target device if this error persists.");
+	} else if (pid == 0) {
+		// Child
+		sock->closeServerSocket();
+		udpListener.close();
+		monitor.close();
+		child = new Child(&client, numSessions + 1);
+		child->run();
+		delete child;
+		exit(0);
+	} else {
+		// Parent
+		client.closeSocket();
+
+		pthread_mutex_lock(&numSessions_mutex);
+		numSessions++;
+		pthread_mutex_unlock(&numSessions_mutex);
+
+		// Maximum number of connections is 2
+		int wait = 0;
+		while (numSessions > 1) {
+			// Throttle until one of the children exits before continuing to accept another socket connection
+			logg->logMessage("%d sessions active!", numSessions);
+			if (wait++ >= 10) { // Wait no more than 10 seconds
+				// Kill last created child
+				kill(pid, SIGALRM);
+				break;
+			}
+			sleep(1);
+		}
+	}
+}
+
 // Gator data flow: collector -> collector fifo -> sender
 int main(int argc, char** argv) {
 	// Ensure proper signal handling by making gatord the process group leader
@@ -420,16 +486,23 @@ int main(int argc, char** argv) {
 		logg->logMessage("Unable to setup gatorfs, trying perf");
 		if (!gSessionData->perf.setup()) {
 			logg->logError(__FILE__, __LINE__,
-										 "Unable to locate gator.ko driver:\n"
-										 "  >>> gator.ko should be co-located with gatord in the same directory\n"
-										 "  >>> OR insmod gator.ko prior to launching gatord\n"
-										 "  >>> OR specify the location of gator.ko on the command line\n"
-										 "  >>> OR run Linux 3.12 or later with perf support to collect data via userspace only");
+				       "Unable to locate gator.ko driver:\n"
+				       "  >>> gator.ko should be co-located with gatord in the same directory\n"
+				       "  >>> OR insmod gator.ko prior to launching gatord\n"
+				       "  >>> OR specify the location of gator.ko on the command line\n"
+				       "  >>> OR run Linux 3.4 or later with perf (CONFIG_PERF_EVENTS and CONFIG_HW_PERF_EVENTS) and tracing (CONFIG_TRACING) support to collect data via userspace only");
 			handleException();
 		}
 	}
 
 	gSessionData->hwmon.setup();
+	{
+		EventsXML eventsXML;
+		mxml_node_t *xml = eventsXML.getTree();
+		gSessionData->fsDriver.setup(xml);
+		gSessionData->maliVideo.setup(xml);
+		mxmlDelete(xml);
+	}
 
 	// Handle child exit codes
 	signal(SIGCHLD, child_exit);
@@ -444,47 +517,26 @@ int main(int argc, char** argv) {
 		child->run();
 		delete child;
 	} else {
-		pthread_t answerThreadID;
-		if (pthread_create(&answerThreadID, NULL, answerThread, &cmdline)) {
-			logg->logError(__FILE__, __LINE__, "Failed to create answer thread");
+		sock = new OlyServerSocket(cmdline.port);
+		udpListener.setup(cmdline.port);
+		if (!monitor.init() || !monitor.add(sock->getFd()) || !monitor.add(udpListener.getReq())) {
+			logg->logError(__FILE__, __LINE__, "Monitor setup failed");
 			handleException();
 		}
-		sock = new OlyServerSocket(cmdline.port);
 		// Forever loop, can be exited via a signal or exception
 		while (1) {
+			struct epoll_event events[2];
 			logg->logMessage("Waiting on connection...");
-			OlySocket client(sock->acceptConnection());
-
-			int pid = fork();
-			if (pid < 0) {
-				// Error
-				logg->logError(__FILE__, __LINE__, "Fork process failed. Please power cycle the target device if this error persists.");
-			} else if (pid == 0) {
-				// Child
-				sock->closeServerSocket();
-				child = new Child(&client, numSessions + 1);
-				child->run();
-				delete child;
-				exit(0);
-			} else {
-				// Parent
-				client.closeSocket();
-
-				pthread_mutex_lock(&numSessions_mutex);
-				numSessions++;
-				pthread_mutex_unlock(&numSessions_mutex);
-
-				// Maximum number of connections is 2
-				int wait = 0;
-				while (numSessions > 1) {
-					// Throttle until one of the children exits before continuing to accept another socket connection
-					logg->logMessage("%d sessions active!", numSessions);
-					if (wait++ >= 10) { // Wait no more than 10 seconds
-						// Kill last created child
-						kill(pid, SIGALRM);
-						break;
-					}
-					sleep(1);
+			int ready = monitor.wait(events, ARRAY_LENGTH(events), -1);
+			if (ready < 0) {
+				logg->logError(__FILE__, __LINE__, "Monitor::wait failed");
+				handleException();
+			}
+			for (int i = 0; i < ready; ++i) {
+				if (events[i].data.fd == sock->getFd()) {
+					handleClient();
+				} else if (events[i].data.fd == udpListener.getReq()) {
+					udpListener.handle();
 				}
 			}
 		}

From 3e8c107b31cfdc780b280180443c83e3e21727d5 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@linaro.org>
Date: Thu, 10 May 2012 17:35:03 +0100
Subject: [PATCH 0111/1185] gator: Add config for building the module in-tree

Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 drivers/Kconfig  | 2 ++
 drivers/Makefile | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/Kconfig b/drivers/Kconfig
index 9953a42809ec..d27feb5460f3 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -166,4 +166,6 @@ source "drivers/ipack/Kconfig"
 
 source "drivers/reset/Kconfig"
 
+source "drivers/gator/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 130abc1dfd65..092a62e79688 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -152,3 +152,5 @@ obj-$(CONFIG_IIO)		+= iio/
 obj-$(CONFIG_VME_BUS)		+= vme/
 obj-$(CONFIG_IPACK_BUS)		+= ipack/
 obj-$(CONFIG_NTB)		+= ntb/
+
+obj-$(CONFIG_GATOR)		+= gator/

From 4d5b67b4724e43c79ec8a12fab7abf2eb97ec267 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Tue, 5 Aug 2014 14:16:56 +0100
Subject: [PATCH 0112/1185] configs: Enable BLK_DEV_LOOP everywhere for LTP

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 linaro/configs/linaro-base.conf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/linaro/configs/linaro-base.conf b/linaro/configs/linaro-base.conf
index 1cdf0ba23e24..eb9fe266ca04 100644
--- a/linaro/configs/linaro-base.conf
+++ b/linaro/configs/linaro-base.conf
@@ -113,3 +113,4 @@ CONFIG_SECURITY_NETWORK=y
 CONFIG_LSM_MMAP_MIN_ADDR=4096
 CONFIG_SECURITY_SELINUX=y
 CONFIG_EXT4_FS_SECURITY=y
+CONFIG_BLK_DEV_LOOP=y

From d0d4907e3550caf434e2ffa862676c98d9975b7f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 4 Feb 2014 12:29:12 +0000
Subject: [PATCH 0113/1185] arm64: atomics: fix use of acquire + release for
 full barrier semantics

Linux requires a number of atomic operations to provide full barrier
semantics, that is no memory accesses after the operation can be
observed before any accesses up to and including the operation in
program order.

On arm64, these operations have been incorrectly implemented as follows:

	// A, B, C are independent memory locations

	<Access [A]>

	// atomic_op (B)
1:	ldaxr	x0, [B]		// Exclusive load with acquire
	<op(B)>
	stlxr	w1, x0, [B]	// Exclusive store with release
	cbnz	w1, 1b

	<Access [C]>

The assumption here being that two half barriers are equivalent to a
full barrier, so the only permitted ordering would be A -> B -> C
(where B is the atomic operation involving both a load and a store).

Unfortunately, this is not the case by the letter of the architecture
and, in fact, the accesses to A and C are permitted to pass their
nearest half barrier resulting in orderings such as Bl -> A -> C -> Bs
or Bl -> C -> A -> Bs (where Bl is the load-acquire on B and Bs is the
store-release on B). This is a clear violation of the full barrier
requirement.

The simple way to fix this is to implement the same algorithm as ARMv7
using explicit barriers:

	<Access [A]>

	// atomic_op (B)
	dmb	ish		// Full barrier
1:	ldxr	x0, [B]		// Exclusive load
	<op(B)>
	stxr	w1, x0, [B]	// Exclusive store
	cbnz	w1, 1b
	dmb	ish		// Full barrier

	<Access [C]>

but this has the undesirable effect of introducing *two* full barrier
instructions. A better approach is actually the following, non-intuitive
sequence:

	<Access [A]>

	// atomic_op (B)
1:	ldxr	x0, [B]		// Exclusive load
	<op(B)>
	stlxr	w1, x0, [B]	// Exclusive store with release
	cbnz	w1, 1b
	dmb	ish		// Full barrier

	<Access [C]>

The simple observations here are:

  - The dmb ensures that no subsequent accesses (e.g. the access to C)
    can enter or pass the atomic sequence.

  - The dmb also ensures that no prior accesses (e.g. the access to A)
    can pass the atomic sequence.

  - Therefore, no prior access can pass a subsequent access, or
    vice-versa (i.e. A is strictly ordered before C).

  - The stlxr ensures that no prior access can pass the store component
    of the atomic operation.

The only tricky part remaining is the ordering between the ldxr and the
access to A, since the absence of the first dmb means that we're now
permitting re-ordering between the ldxr and any prior accesses.

From an (arbitrary) observer's point of view, there are two scenarios:

  1. We have observed the ldxr. This means that if we perform a store to
     [B], the ldxr will still return older data. If we can observe the
     ldxr, then we can potentially observe the permitted re-ordering
     with the access to A, which is clearly an issue when compared to
     the dmb variant of the code. Thankfully, the exclusive monitor will
     save us here since it will be cleared as a result of the store and
     the ldxr will retry. Notice that any use of a later memory
     observation to imply observation of the ldxr will also imply
     observation of the access to A, since the stlxr/dmb ensure strict
     ordering.

  2. We have not observed the ldxr. This means we can perform a store
     and influence the later ldxr. However, that doesn't actually tell
     us anything about the access to [A], so we've not lost anything
     here either when compared to the dmb variant.

This patch implements this solution for our barriered atomic operations,
ensuring that we satisfy the full barrier requirements where they are
needed.

Cc: <stable@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 8e86f0b409a44193f1587e87b69c5dcf8f65be67)
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 arch/arm64/include/asm/atomic.h  | 29 ++++++++++++++++++++---------
 arch/arm64/include/asm/cmpxchg.h |  9 +++++----
 arch/arm64/include/asm/futex.h   |  6 ++++--
 arch/arm64/kernel/kuser32.S      |  6 ++++--
 arch/arm64/lib/bitops.S          |  3 ++-
 5 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 736c5916d367..a049bf7f5150 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -63,7 +63,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	int result;
 
 	asm volatile("// atomic_add_return\n"
-"1:	ldaxr	%w0, %2\n"
+"1:	ldxr	%w0, %2\n"
 "	add	%w0, %w0, %w3\n"
 "	stlxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
@@ -71,6 +71,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	: "Ir" (i)
 	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -94,7 +95,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	int result;
 
 	asm volatile("// atomic_sub_return\n"
-"1:	ldaxr	%w0, %2\n"
+"1:	ldxr	%w0, %2\n"
 "	sub	%w0, %w0, %w3\n"
 "	stlxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
@@ -102,6 +103,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	: "Ir" (i)
 	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -110,17 +112,20 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 	unsigned long tmp;
 	int oldval;
 
+	smp_mb();
+
 	asm volatile("// atomic_cmpxchg\n"
-"1:	ldaxr	%w1, %2\n"
+"1:	ldxr	%w1, %2\n"
 "	cmp	%w1, %w3\n"
 "	b.ne	2f\n"
-"	stlxr	%w0, %w4, %2\n"
+"	stxr	%w0, %w4, %2\n"
 "	cbnz	%w0, 1b\n"
 "2:"
 	: "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter)
 	: "Ir" (old), "r" (new)
 	: "cc");
 
+	smp_mb();
 	return oldval;
 }
 
@@ -194,7 +199,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_add_return\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	add	%0, %0, %3\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
@@ -202,6 +207,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
 	: "Ir" (i)
 	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -225,7 +231,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_sub_return\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	sub	%0, %0, %3\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
@@ -233,6 +239,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
 	: "Ir" (i)
 	: "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -241,17 +248,20 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
 	long oldval;
 	unsigned long res;
 
+	smp_mb();
+
 	asm volatile("// atomic64_cmpxchg\n"
-"1:	ldaxr	%1, %2\n"
+"1:	ldxr	%1, %2\n"
 "	cmp	%1, %3\n"
 "	b.ne	2f\n"
-"	stlxr	%w0, %4, %2\n"
+"	stxr	%w0, %4, %2\n"
 "	cbnz	%w0, 1b\n"
 "2:"
 	: "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter)
 	: "Ir" (old), "r" (new)
 	: "cc");
 
+	smp_mb();
 	return oldval;
 }
 
@@ -263,11 +273,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_dec_if_positive\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	subs	%0, %0, #1\n"
 "	b.mi	2f\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b\n"
+"	dmb	ish\n"
 "2:"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	:
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 0a234d0a41d0..d3d4089d9fc3 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -29,7 +29,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	switch (size) {
 	case 1:
 		asm volatile("//	__xchg1\n"
-		"1:	ldaxrb	%w0, %2\n"
+		"1:	ldxrb	%w0, %2\n"
 		"	stlxrb	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
@@ -38,7 +38,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		break;
 	case 2:
 		asm volatile("//	__xchg2\n"
-		"1:	ldaxrh	%w0, %2\n"
+		"1:	ldxrh	%w0, %2\n"
 		"	stlxrh	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
@@ -47,7 +47,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		break;
 	case 4:
 		asm volatile("//	__xchg4\n"
-		"1:	ldaxr	%w0, %2\n"
+		"1:	ldxr	%w0, %2\n"
 		"	stlxr	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
@@ -56,7 +56,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		break;
 	case 8:
 		asm volatile("//	__xchg8\n"
-		"1:	ldaxr	%0, %2\n"
+		"1:	ldxr	%0, %2\n"
 		"	stlxr	%w1, %3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
@@ -67,6 +67,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		BUILD_BUG();
 	}
 
+	smp_mb();
 	return ret;
 }
 
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 6230baba7869..5f750dc96e0f 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -24,10 +24,11 @@
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg)		\
 	asm volatile(							\
-"1:	ldaxr	%w1, %2\n"						\
+"1:	ldxr	%w1, %2\n"						\
 	insn "\n"							\
 "2:	stlxr	%w3, %w0, %2\n"						\
 "	cbnz	%w3, 1b\n"						\
+"	dmb	ish\n"							\
 "3:\n"									\
 "	.pushsection .fixup,\"ax\"\n"					\
 "	.align	2\n"							\
@@ -111,11 +112,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		return -EFAULT;
 
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
-"1:	ldaxr	%w1, %2\n"
+"1:	ldxr	%w1, %2\n"
 "	sub	%w3, %w1, %w4\n"
 "	cbnz	%w3, 3f\n"
 "2:	stlxr	%w3, %w5, %2\n"
 "	cbnz	%w3, 1b\n"
+"	dmb	ish\n"
 "3:\n"
 "	.pushsection .fixup,\"ax\"\n"
 "4:	mov	%w0, %w6\n"
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index f2754710f5e9..87542397b3ac 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -35,12 +35,13 @@ __kuser_cmpxchg64:			// 0xffff0f60
 	.inst	0xe92d00f0		//	push		{r4, r5, r6, r7}
 	.inst	0xe1c040d0		//	ldrd		r4, r5, [r0]
 	.inst	0xe1c160d0		//	ldrd		r6, r7, [r1]
-	.inst	0xe1b20e9f		// 1:	ldaexd		r0, r1, [r2]
+	.inst	0xe1b20f9f		// 1:	ldrexd		r0, r1, [r2]
 	.inst	0xe0303004		//	eors		r3, r0, r4
 	.inst	0x00313005		//	eoreqs		r3, r1, r5
 	.inst	0x01a23e96		//	stlexdeq	r3, r6, [r2]
 	.inst	0x03330001		//	teqeq		r3, #1
 	.inst	0x0afffff9		//	beq		1b
+	.inst	0xf57ff05b		//	dmb		ish
 	.inst	0xe2730000		//	rsbs		r0, r3, #0
 	.inst	0xe8bd00f0		//	pop		{r4, r5, r6, r7}
 	.inst	0xe12fff1e		//	bx		lr
@@ -52,11 +53,12 @@ __kuser_memory_barrier:			// 0xffff0fa0
 
 	.align	5
 __kuser_cmpxchg:			// 0xffff0fc0
-	.inst	0xe1923e9f		// 1:	ldaex		r3, [r2]
+	.inst	0xe1923f9f		// 1:	ldrex		r3, [r2]
 	.inst	0xe0533000		//	subs		r3, r3, r0
 	.inst	0x01823e91		//	stlexeq		r3, r1, [r2]
 	.inst	0x03330001		//	teqeq		r3, #1
 	.inst	0x0afffffa		//	beq		1b
+	.inst	0xf57ff05b		//	dmb		ish
 	.inst	0xe2730000		//	rsbs		r0, r3, #0
 	.inst	0xe12fff1e		//	bx		lr
 
diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S
index e5db797790d3..7dac371cc9a2 100644
--- a/arch/arm64/lib/bitops.S
+++ b/arch/arm64/lib/bitops.S
@@ -46,11 +46,12 @@ ENTRY(	\name	)
 	mov	x2, #1
 	add	x1, x1, x0, lsr #3	// Get word offset
 	lsl	x4, x2, x3		// Create mask
-1:	ldaxr	x2, [x1]
+1:	ldxr	x2, [x1]
 	lsr	x0, x2, x3		// Save old value of bit
 	\instr	x2, x2, x4		// toggle bit
 	stlxr	w5, x2, [x1]
 	cbnz	w5, 1b
+	dmb	ish
 	and	x0, x0, #1
 3:	ret
 ENDPROC(\name	)

From 4a6d0c804feb6f77953e6abe786fef49725faf8b Mon Sep 17 00:00:00 2001
From: Milan Broz <gmazyland@gmail.com>
Date: Tue, 29 Jul 2014 18:41:09 +0000
Subject: [PATCH 0114/1185] crypto: af_alg - properly label AF_ALG socket

commit 4c63f83c2c2e16a13ce274ee678e28246bd33645 upstream.

Th AF_ALG socket was missing a security label (e.g. SELinux)
which means that socket was in "unlabeled" state.

This was recently demonstrated in the cryptsetup package
(cryptsetup v1.6.5 and later.)
See https://bugzilla.redhat.com/show_bug.cgi?id=1115120

This patch clones the sock's label from the parent sock
and resolves the issue (similar to AF_BLUETOOTH protocol family).

Signed-off-by: Milan Broz <gmazyland@gmail.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/af_alg.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index ac33d5f30778..bf948e134981 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/rwsem.h>
+#include <linux/security.h>
 
 struct alg_type_list {
 	const struct af_alg_type *type;
@@ -243,6 +244,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
 
 	sock_init_data(newsock, sk2);
 	sock_graft(sk2, newsock);
+	security_sk_clone(sk, sk2);
 
 	err = type->accept(ask->private, sk2);
 	if (err) {

From c3056d9f28d86c4f0091e7843d38e4584b51128b Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Fri, 25 Jul 2014 09:17:12 +0100
Subject: [PATCH 0115/1185] ARM: 8115/1: LPAE: reduce damage caused by idmap to
 virtual memory layout

commit 811a2407a3cf7bbd027fbe92d73416f17485a3d8 upstream.

On LPAE, each level 1 (pgd) page table entry maps 1GiB, and the level 2
(pmd) entries map 2MiB.

When the identity mapping is created on LPAE, the pgd pointers are copied
from the swapper_pg_dir.  If we find that we need to modify the contents
of a pmd, we allocate a new empty pmd table and insert it into the
appropriate 1GB slot, before then filling it with the identity mapping.

However, if the 1GB slot covers the kernel lowmem mappings, we obliterate
those mappings.

When replacing a PMD, first copy the old PMD contents to the new PMD, so
that we preserve the existing mappings, particularly the mappings of the
kernel itself.

[rewrote commit message and added code comment -- rmk]

Fixes: ae2de101739c ("ARM: LPAE: Add identity mapping support for the 3-level page table format")
Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/idmap.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 83cb3ac27095..c61d2373408c 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -24,6 +24,13 @@ static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 			pr_warning("Failed to allocate identity pmd.\n");
 			return;
 		}
+		/*
+		 * Copy the original PMD to ensure that the PMD entries for
+		 * the kernel image are preserved.
+		 */
+		if (!pud_none(*pud))
+			memcpy(pmd, pmd_offset(pud, 0),
+			       PTRS_PER_PMD * sizeof(pmd_t));
 		pud_populate(&init_mm, pud, pmd);
 		pmd += pmd_index(addr);
 	} else

From 201c26e77bfc4064afc6a630fc336a825047be7b Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Thu, 17 Jul 2014 15:00:56 +0300
Subject: [PATCH 0116/1185] cfg80211: fix mic_failure tracing

commit 8c26d458394be44e135d1c6bd4557e1c4e1a0535 upstream.

tsc can be NULL (mac80211 currently always passes NULL),
resulting in NULL-dereference. check before copying it.

Signed-off-by: Eliad Peller <eliadx.peller@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/wireless/trace.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 5755bc14abbd..bc5a75b1aef8 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1972,7 +1972,8 @@ TRACE_EVENT(cfg80211_michael_mic_failure,
 		MAC_ASSIGN(addr, addr);
 		__entry->key_type = key_type;
 		__entry->key_id = key_id;
-		memcpy(__entry->tsc, tsc, 6);
+		if (tsc)
+			memcpy(__entry->tsc, tsc, 6);
 	),
 	TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT ", key type: %d, key id: %d, tsc: %pm",
 		  NETDEV_PR_ARG, MAC_PR_ARG(addr), __entry->key_type,

From 7bfa5bfd88fb4c11a78740ea53509cc38b896d84 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Wed, 30 Jul 2014 16:08:26 -0700
Subject: [PATCH 0117/1185] rapidio/tsi721_dma: fix failure to obtain
 transaction descriptor

commit 0193ed8225e1a79ed64632106ec3cc81798cb13c upstream.

This is a bug fix for the situation when function tsi721_desc_get() fails
to obtain a free transaction descriptor.

The bug usually results in a memory access crash dump when data transfer
scatter-gather list has more entries than size of hardware buffer
descriptors ring.  This fix ensures that error is properly returned to a
caller instead of an invalid entry.

This patch is applicable to kernel versions starting from v3.5.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Cc: Stef van Os <stef.van.os@prodrive-technologies.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rapidio/devices/tsi721_dma.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c
index 91245f5dbe81..47257b6eea84 100644
--- a/drivers/rapidio/devices/tsi721_dma.c
+++ b/drivers/rapidio/devices/tsi721_dma.c
@@ -287,6 +287,12 @@ struct tsi721_tx_desc *tsi721_desc_get(struct tsi721_bdma_chan *bdma_chan)
 			"desc %p not ACKed\n", tx_desc);
 	}
 
+	if (ret == NULL) {
+		dev_dbg(bdma_chan->dchan.device->dev,
+			"%s: unable to obtain tx descriptor\n", __func__);
+		goto err_out;
+	}
+
 	i = bdma_chan->wr_count_next % bdma_chan->bd_num;
 	if (i == bdma_chan->bd_num - 1) {
 		i = 0;
@@ -297,7 +303,7 @@ struct tsi721_tx_desc *tsi721_desc_get(struct tsi721_bdma_chan *bdma_chan)
 	tx_desc->txd.phys = bdma_chan->bd_phys +
 				i * sizeof(struct tsi721_dma_desc);
 	tx_desc->hw_desc = &((struct tsi721_dma_desc *)bdma_chan->bd_base)[i];
-
+err_out:
 	spin_unlock_bh(&bdma_chan->lock);
 
 	return ret;

From 73e586351af10daf53b09a0cff5f05e955bbd110 Mon Sep 17 00:00:00 2001
From: James Bottomley <JBottomley@Parallels.com>
Date: Thu, 3 Jul 2014 19:17:34 +0200
Subject: [PATCH 0118/1185] scsi: handle flush errors properly

commit 89fb4cd1f717a871ef79fa7debbe840e3225cd54 upstream.

Flush commands don't transfer data and thus need to be special cased
in the I/O completion handler so that we can propagate errors to
the block layer and filesystem.

Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Reported-by: Steven Haber <steven@qumulo.com>
Tested-by: Steven Haber <steven@qumulo.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/scsi_lib.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 86d522004a20..e5953c8018c5 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -815,6 +815,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			scsi_next_command(cmd);
 			return;
 		}
+	} else if (blk_rq_bytes(req) == 0 && result && !sense_deferred) {
+		/*
+		 * Certain non BLOCK_PC requests are commands that don't
+		 * actually transfer anything (FLUSH), so cannot use
+		 * good_bytes != blk_rq_bytes(req) as the signal for an error.
+		 * This sets the error explicitly for the problem case.
+		 */
+		error = __scsi_error_from_host_byte(cmd, result);
 	}
 
 	/* no bidi support for !REQ_TYPE_BLOCK_PC yet */

From 1144d70b3748745ff5d2a71cb2719c3eab4648dd Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 30 Jul 2014 16:08:24 -0700
Subject: [PATCH 0119/1185] mm, thp: do not allow thp faults to avoid cpuset
 restrictions

commit b104a35d32025ca740539db2808aa3385d0f30eb upstream.

The page allocator relies on __GFP_WAIT to determine if ALLOC_CPUSET
should be set in allocflags.  ALLOC_CPUSET controls if a page allocation
should be restricted only to the set of allowed cpuset mems.

Transparent hugepages clears __GFP_WAIT when defrag is disabled to prevent
the fault path from using memory compaction or direct reclaim.  Thus, it
is unfairly able to allocate outside of its cpuset mems restriction as a
side-effect.

This patch ensures that ALLOC_CPUSET is only cleared when the gfp mask is
truly GFP_ATOMIC by verifying it is also not a thp allocation.

Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Alex Thorlton <athorlton@sgi.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Bob Liu <lliubbo@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hedi Berriche <hedi@sgi.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/page_alloc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0ab02fb8e9b1..71305c6aba5b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2339,7 +2339,7 @@ static inline int
 gfp_to_alloc_flags(gfp_t gfp_mask)
 {
 	int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
-	const gfp_t wait = gfp_mask & __GFP_WAIT;
+	const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
 
 	/* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
 	BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
@@ -2348,20 +2348,20 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 	 * The caller may dip into page reserves a bit more if the caller
 	 * cannot run direct reclaim, or if the caller has realtime scheduling
 	 * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
-	 * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
+	 * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH).
 	 */
 	alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
 
-	if (!wait) {
+	if (atomic) {
 		/*
-		 * Not worth trying to allocate harder for
-		 * __GFP_NOMEMALLOC even if it can't schedule.
+		 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
+		 * if it can't schedule.
 		 */
-		if  (!(gfp_mask & __GFP_NOMEMALLOC))
+		if (!(gfp_mask & __GFP_NOMEMALLOC))
 			alloc_flags |= ALLOC_HARDER;
 		/*
-		 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
-		 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+		 * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
+		 * comment for __cpuset_node_allowed_softwall().
 		 */
 		alloc_flags &= ~ALLOC_CPUSET;
 	} else if (unlikely(rt_task(current)) && !in_interrupt())

From d20b1088e2ed58eeab3b440e30a20b01afc48590 Mon Sep 17 00:00:00 2001
From: Malcolm Priestley <tvboxspy@gmail.com>
Date: Wed, 23 Jul 2014 21:35:12 +0100
Subject: [PATCH 0120/1185] staging: vt6655: Fix disassociated messages every
 10 seconds

commit 4aa0abed3a2a11b7d71ad560c1a3e7631c5a31cd upstream.

byReAssocCount is incremented every second resulting in
disassociated message being send every 10 seconds whether
connection or not.

byReAssocCount should only advance while eCommandState
is in WLAN_ASSOCIATE_WAIT

Change existing scope to if condition.

Signed-off-by: Malcolm Priestley <tvboxspy@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vt6655/bssdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/vt6655/bssdb.c b/drivers/staging/vt6655/bssdb.c
index f983915168b7..3496a77612ba 100644
--- a/drivers/staging/vt6655/bssdb.c
+++ b/drivers/staging/vt6655/bssdb.c
@@ -1026,7 +1026,7 @@ BSSvSecondCallBack(
 		pDevice->byERPFlag &= ~(WLAN_SET_ERP_USE_PROTECTION(1));
 	}
 
-	{
+	if (pDevice->eCommandState == WLAN_ASSOCIATE_WAIT) {
 		pDevice->byReAssocCount++;
 		if ((pDevice->byReAssocCount > 10) && (pDevice->bLinkPass != true)) {  //10 sec timeout
 			printk("Re-association timeout!!!\n");

From 46e8c10748ae262b4c78800f73db4dd9d4ef04ee Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 17 Jul 2014 16:59:00 +0100
Subject: [PATCH 0121/1185] iio: buffer: Fix demux table creation

commit 61bd55ce1667809f022be88da77db17add90ea4e upstream.

When creating the demux table we need to iterate over the selected scan mask for
the buffer to get the samples which should be copied to destination buffer.
Right now the code uses the mask which contains all active channels, which means
the demux table contains entries which causes it to copy all the samples from
source to destination buffer one by one without doing any demuxing.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/industrialio-buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index d344cf3ac9e3..e13c5f4b12cb 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -849,7 +849,7 @@ static int iio_buffer_update_demux(struct iio_dev *indio_dev,
 
 	/* Now we have the two masks, work from least sig and build up sizes */
 	for_each_set_bit(out_ind,
-			 indio_dev->active_scan_mask,
+			 buffer->scan_mask,
 			 indio_dev->masklength) {
 		in_ind = find_next_bit(indio_dev->active_scan_mask,
 				       indio_dev->masklength,

From 3984bb13c8f5f2f192aed228a88696d4a697a435 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 4 Jun 2014 16:11:40 -0700
Subject: [PATCH 0122/1185] printk: rename printk_sched to printk_deferred

commit aac74dc495456412c4130a1167ce4beb6c1f0b38 upstream.

After learning we'll need some sort of deferred printk functionality in
the timekeeping core, Peter suggested we rename the printk_sched function
so it can be reused by needed subsystems.

This only changes the function name. No logic changes.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Bohac <jbohac@suse.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/printk.h | 6 +++---
 kernel/printk.c        | 2 +-
 kernel/sched/core.c    | 2 +-
 kernel/sched/rt.c      | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 22c7052e9372..708b8a84f6c0 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -124,9 +124,9 @@ asmlinkage __printf(1, 2) __cold
 int printk(const char *fmt, ...);
 
 /*
- * Special printk facility for scheduler use only, _DO_NOT_USE_ !
+ * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
  */
-__printf(1, 2) __cold int printk_sched(const char *fmt, ...);
+__printf(1, 2) __cold int printk_deferred(const char *fmt, ...);
 
 /*
  * Please don't use printk_ratelimit(), because it shares ratelimiting state
@@ -161,7 +161,7 @@ int printk(const char *s, ...)
 	return 0;
 }
 static inline __printf(1, 2) __cold
-int printk_sched(const char *s, ...)
+int printk_deferred(const char *s, ...)
 {
 	return 0;
 }
diff --git a/kernel/printk.c b/kernel/printk.c
index d37d45c90ae6..f7aff4bd5454 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -2485,7 +2485,7 @@ void wake_up_klogd(void)
 	preempt_enable();
 }
 
-int printk_sched(const char *fmt, ...)
+int printk_deferred(const char *fmt, ...)
 {
 	unsigned long flags;
 	va_list args;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2672eca82a2b..c771f2547bef 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1235,7 +1235,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 		 * leave kernel.
 		 */
 		if (p->mm && printk_ratelimit()) {
-			printk_sched("process %d (%s) no longer affine to cpu%d\n",
+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
 					task_pid_nr(p), p->comm, cpu);
 		}
 	}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 15334e6de832..2dffc7b5d469 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -892,7 +892,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 
 			if (!once) {
 				once = true;
-				printk_sched("sched: RT throttling activated\n");
+				printk_deferred("sched: RT throttling activated\n");
 			}
 		} else {
 			/*

From 562eebeb9c07101e49f6803fd018ac45a01e3f43 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 1 Aug 2014 12:20:02 +0200
Subject: [PATCH 0123/1185] timer: Fix lock inversion between
 hrtimer_bases.lock and scheduler locks

commit 504d58745c9ca28d33572e2d8a9990b43e06075d upstream.

clockevents_increase_min_delta() calls printk() from under
hrtimer_bases.lock. That causes lock inversion on scheduler locks because
printk() can call into the scheduler. Lockdep puts it as:

======================================================
[ INFO: possible circular locking dependency detected ]
3.15.0-rc8-06195-g939f04b #2 Not tainted
-------------------------------------------------------
trinity-main/74 is trying to acquire lock:
 (&port_lock_key){-.....}, at: [<811c60be>] serial8250_console_write+0x8c/0x10c

but task is already holding lock:
 (hrtimer_bases.lock){-.-...}, at: [<8103caeb>] hrtimer_try_to_cancel+0x13/0x66

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #5 (hrtimer_bases.lock){-.-...}:
       [<8104a942>] lock_acquire+0x92/0x101
       [<8142f11d>] _raw_spin_lock_irqsave+0x2e/0x3e
       [<8103c918>] __hrtimer_start_range_ns+0x1c/0x197
       [<8107ec20>] perf_swevent_start_hrtimer.part.41+0x7a/0x85
       [<81080792>] task_clock_event_start+0x3a/0x3f
       [<810807a4>] task_clock_event_add+0xd/0x14
       [<8108259a>] event_sched_in+0xb6/0x17a
       [<810826a2>] group_sched_in+0x44/0x122
       [<81082885>] ctx_sched_in.isra.67+0x105/0x11f
       [<810828e6>] perf_event_sched_in.isra.70+0x47/0x4b
       [<81082bf6>] __perf_install_in_context+0x8b/0xa3
       [<8107eb8e>] remote_function+0x12/0x2a
       [<8105f5af>] smp_call_function_single+0x2d/0x53
       [<8107e17d>] task_function_call+0x30/0x36
       [<8107fb82>] perf_install_in_context+0x87/0xbb
       [<810852c9>] SYSC_perf_event_open+0x5c6/0x701
       [<810856f9>] SyS_perf_event_open+0x17/0x19
       [<8142f8ee>] syscall_call+0x7/0xb

-> #4 (&ctx->lock){......}:
       [<8104a942>] lock_acquire+0x92/0x101
       [<8142f04c>] _raw_spin_lock+0x21/0x30
       [<81081df3>] __perf_event_task_sched_out+0x1dc/0x34f
       [<8142cacc>] __schedule+0x4c6/0x4cb
       [<8142cae0>] schedule+0xf/0x11
       [<8142f9a6>] work_resched+0x5/0x30

-> #3 (&rq->lock){-.-.-.}:
       [<8104a942>] lock_acquire+0x92/0x101
       [<8142f04c>] _raw_spin_lock+0x21/0x30
       [<81040873>] __task_rq_lock+0x33/0x3a
       [<8104184c>] wake_up_new_task+0x25/0xc2
       [<8102474b>] do_fork+0x15c/0x2a0
       [<810248a9>] kernel_thread+0x1a/0x1f
       [<814232a2>] rest_init+0x1a/0x10e
       [<817af949>] start_kernel+0x303/0x308
       [<817af2ab>] i386_start_kernel+0x79/0x7d

-> #2 (&p->pi_lock){-.-...}:
       [<8104a942>] lock_acquire+0x92/0x101
       [<8142f11d>] _raw_spin_lock_irqsave+0x2e/0x3e
       [<810413dd>] try_to_wake_up+0x1d/0xd6
       [<810414cd>] default_wake_function+0xb/0xd
       [<810461f3>] __wake_up_common+0x39/0x59
       [<81046346>] __wake_up+0x29/0x3b
       [<811b8733>] tty_wakeup+0x49/0x51
       [<811c3568>] uart_write_wakeup+0x17/0x19
       [<811c5dc1>] serial8250_tx_chars+0xbc/0xfb
       [<811c5f28>] serial8250_handle_irq+0x54/0x6a
       [<811c5f57>] serial8250_default_handle_irq+0x19/0x1c
       [<811c56d8>] serial8250_interrupt+0x38/0x9e
       [<810510e7>] handle_irq_event_percpu+0x5f/0x1e2
       [<81051296>] handle_irq_event+0x2c/0x43
       [<81052cee>] handle_level_irq+0x57/0x80
       [<81002a72>] handle_irq+0x46/0x5c
       [<810027df>] do_IRQ+0x32/0x89
       [<8143036e>] common_interrupt+0x2e/0x33
       [<8142f23c>] _raw_spin_unlock_irqrestore+0x3f/0x49
       [<811c25a4>] uart_start+0x2d/0x32
       [<811c2c04>] uart_write+0xc7/0xd6
       [<811bc6f6>] n_tty_write+0xb8/0x35e
       [<811b9beb>] tty_write+0x163/0x1e4
       [<811b9cd9>] redirected_tty_write+0x6d/0x75
       [<810b6ed6>] vfs_write+0x75/0xb0
       [<810b7265>] SyS_write+0x44/0x77
       [<8142f8ee>] syscall_call+0x7/0xb

-> #1 (&tty->write_wait){-.....}:
       [<8104a942>] lock_acquire+0x92/0x101
       [<8142f11d>] _raw_spin_lock_irqsave+0x2e/0x3e
       [<81046332>] __wake_up+0x15/0x3b
       [<811b8733>] tty_wakeup+0x49/0x51
       [<811c3568>] uart_write_wakeup+0x17/0x19
       [<811c5dc1>] serial8250_tx_chars+0xbc/0xfb
       [<811c5f28>] serial8250_handle_irq+0x54/0x6a
       [<811c5f57>] serial8250_default_handle_irq+0x19/0x1c
       [<811c56d8>] serial8250_interrupt+0x38/0x9e
       [<810510e7>] handle_irq_event_percpu+0x5f/0x1e2
       [<81051296>] handle_irq_event+0x2c/0x43
       [<81052cee>] handle_level_irq+0x57/0x80
       [<81002a72>] handle_irq+0x46/0x5c
       [<810027df>] do_IRQ+0x32/0x89
       [<8143036e>] common_interrupt+0x2e/0x33
       [<8142f23c>] _raw_spin_unlock_irqrestore+0x3f/0x49
       [<811c25a4>] uart_start+0x2d/0x32
       [<811c2c04>] uart_write+0xc7/0xd6
       [<811bc6f6>] n_tty_write+0xb8/0x35e
       [<811b9beb>] tty_write+0x163/0x1e4
       [<811b9cd9>] redirected_tty_write+0x6d/0x75
       [<810b6ed6>] vfs_write+0x75/0xb0
       [<810b7265>] SyS_write+0x44/0x77
       [<8142f8ee>] syscall_call+0x7/0xb

-> #0 (&port_lock_key){-.....}:
       [<8104a62d>] __lock_acquire+0x9ea/0xc6d
       [<8104a942>] lock_acquire+0x92/0x101
       [<8142f11d>] _raw_spin_lock_irqsave+0x2e/0x3e
       [<811c60be>] serial8250_console_write+0x8c/0x10c
       [<8104e402>] call_console_drivers.constprop.31+0x87/0x118
       [<8104f5d5>] console_unlock+0x1d7/0x398
       [<8104fb70>] vprintk_emit+0x3da/0x3e4
       [<81425f76>] printk+0x17/0x19
       [<8105bfa0>] clockevents_program_min_delta+0x104/0x116
       [<8105c548>] clockevents_program_event+0xe7/0xf3
       [<8105cc1c>] tick_program_event+0x1e/0x23
       [<8103c43c>] hrtimer_force_reprogram+0x88/0x8f
       [<8103c49e>] __remove_hrtimer+0x5b/0x79
       [<8103cb21>] hrtimer_try_to_cancel+0x49/0x66
       [<8103cb4b>] hrtimer_cancel+0xd/0x18
       [<8107f102>] perf_swevent_cancel_hrtimer.part.60+0x2b/0x30
       [<81080705>] task_clock_event_stop+0x20/0x64
       [<81080756>] task_clock_event_del+0xd/0xf
       [<81081350>] event_sched_out+0xab/0x11e
       [<810813e0>] group_sched_out+0x1d/0x66
       [<81081682>] ctx_sched_out+0xaf/0xbf
       [<81081e04>] __perf_event_task_sched_out+0x1ed/0x34f
       [<8142cacc>] __schedule+0x4c6/0x4cb
       [<8142cae0>] schedule+0xf/0x11
       [<8142f9a6>] work_resched+0x5/0x30

other info that might help us debug this:

Chain exists of:
  &port_lock_key --> &ctx->lock --> hrtimer_bases.lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(hrtimer_bases.lock);
                               lock(&ctx->lock);
                               lock(hrtimer_bases.lock);
  lock(&port_lock_key);

 *** DEADLOCK ***

4 locks held by trinity-main/74:
 #0:  (&rq->lock){-.-.-.}, at: [<8142c6f3>] __schedule+0xed/0x4cb
 #1:  (&ctx->lock){......}, at: [<81081df3>] __perf_event_task_sched_out+0x1dc/0x34f
 #2:  (hrtimer_bases.lock){-.-...}, at: [<8103caeb>] hrtimer_try_to_cancel+0x13/0x66
 #3:  (console_lock){+.+...}, at: [<8104fb5d>] vprintk_emit+0x3c7/0x3e4

stack backtrace:
CPU: 0 PID: 74 Comm: trinity-main Not tainted 3.15.0-rc8-06195-g939f04b #2
 00000000 81c3a310 8b995c14 81426f69 8b995c44 81425a99 8161f671 8161f570
 8161f538 8161f559 8161f538 8b995c78 8b142bb0 00000004 8b142fdc 8b142bb0
 8b995ca8 8104a62d 8b142fac 000016f2 81c3a310 00000001 00000001 00000003
Call Trace:
 [<81426f69>] dump_stack+0x16/0x18
 [<81425a99>] print_circular_bug+0x18f/0x19c
 [<8104a62d>] __lock_acquire+0x9ea/0xc6d
 [<8104a942>] lock_acquire+0x92/0x101
 [<811c60be>] ? serial8250_console_write+0x8c/0x10c
 [<811c6032>] ? wait_for_xmitr+0x76/0x76
 [<8142f11d>] _raw_spin_lock_irqsave+0x2e/0x3e
 [<811c60be>] ? serial8250_console_write+0x8c/0x10c
 [<811c60be>] serial8250_console_write+0x8c/0x10c
 [<8104af87>] ? lock_release+0x191/0x223
 [<811c6032>] ? wait_for_xmitr+0x76/0x76
 [<8104e402>] call_console_drivers.constprop.31+0x87/0x118
 [<8104f5d5>] console_unlock+0x1d7/0x398
 [<8104fb70>] vprintk_emit+0x3da/0x3e4
 [<81425f76>] printk+0x17/0x19
 [<8105bfa0>] clockevents_program_min_delta+0x104/0x116
 [<8105cc1c>] tick_program_event+0x1e/0x23
 [<8103c43c>] hrtimer_force_reprogram+0x88/0x8f
 [<8103c49e>] __remove_hrtimer+0x5b/0x79
 [<8103cb21>] hrtimer_try_to_cancel+0x49/0x66
 [<8103cb4b>] hrtimer_cancel+0xd/0x18
 [<8107f102>] perf_swevent_cancel_hrtimer.part.60+0x2b/0x30
 [<81080705>] task_clock_event_stop+0x20/0x64
 [<81080756>] task_clock_event_del+0xd/0xf
 [<81081350>] event_sched_out+0xab/0x11e
 [<810813e0>] group_sched_out+0x1d/0x66
 [<81081682>] ctx_sched_out+0xaf/0xbf
 [<81081e04>] __perf_event_task_sched_out+0x1ed/0x34f
 [<8104416d>] ? __dequeue_entity+0x23/0x27
 [<81044505>] ? pick_next_task_fair+0xb1/0x120
 [<8142cacc>] __schedule+0x4c6/0x4cb
 [<81047574>] ? trace_hardirqs_off_caller+0xd7/0x108
 [<810475b0>] ? trace_hardirqs_off+0xb/0xd
 [<81056346>] ? rcu_irq_exit+0x64/0x77

Fix the problem by using printk_deferred() which does not call into the
scheduler.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/time/clockevents.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 9df0e3b19f09..58e8430165b5 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -138,7 +138,8 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)
 {
 	/* Nothing to do if we already reached the limit */
 	if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
-		printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n");
+		printk_deferred(KERN_WARNING
+				"CE: Reprogramming failure. Giving up\n");
 		dev->next_event.tv64 = KTIME_MAX;
 		return -ETIME;
 	}
@@ -151,9 +152,10 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)
 	if (dev->min_delta_ns > MIN_DELTA_LIMIT)
 		dev->min_delta_ns = MIN_DELTA_LIMIT;
 
-	printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
-	       dev->name ? dev->name : "?",
-	       (unsigned long long) dev->min_delta_ns);
+	printk_deferred(KERN_WARNING
+			"CE: %s increased min_delta_ns to %llu nsec\n",
+			dev->name ? dev->name : "?",
+			(unsigned long long) dev->min_delta_ns);
 	return 0;
 }
 

From d6b0ea8a938e4c05a00139793a405b764e63e37b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 21 May 2014 10:22:59 -0700
Subject: [PATCH 0124/1185] Revert "x86-64, modify_ldt: Make support for 16-bit
 segments a runtime option"

commit 7ed6fb9b5a5510e4ef78ab27419184741169978a upstream.

This reverts commit fa81511bb0bbb2b1aace3695ce869da9762624ff in
preparation of merging in the proper fix (espfix64).

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/ldt.c        | 4 +---
 arch/x86/vdso/vdso32-setup.c | 8 --------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index dcbbaa165bde..af1d14a9ebda 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -20,8 +20,6 @@
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
 
-int sysctl_ldt16 = 0;
-
 #ifdef CONFIG_SMP
 static void flush_ldt(void *current_mm)
 {
@@ -236,7 +234,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 	 * IRET leaking the high bits of the kernel stack address.
 	 */
 #ifdef CONFIG_X86_64
-	if (!ldt_info.seg_32bit && !sysctl_ldt16) {
+	if (!ldt_info.seg_32bit) {
 		error = -EINVAL;
 		goto out_unlock;
 	}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 0f134c7cfc24..0faad646f5fd 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -41,7 +41,6 @@ enum {
 #ifdef CONFIG_X86_64
 #define vdso_enabled			sysctl_vsyscall32
 #define arch_setup_additional_pages	syscall32_setup_pages
-extern int sysctl_ldt16;
 #endif
 
 /*
@@ -381,13 +380,6 @@ static ctl_table abi_table2[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{
-		.procname	= "ldt16",
-		.data		= &sysctl_ldt16,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
 	{}
 };
 

From a7b854c979859471402fe4fd275d3c584ff40f8a Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Tue, 29 Apr 2014 16:46:09 -0700
Subject: [PATCH 0125/1185] x86-64, espfix: Don't leak bits 31:16 of %esp
 returning to 16-bit stack

commit 3891a04aafd668686239349ea58f3314ea2af86b upstream.

The IRET instruction, when returning to a 16-bit segment, only
restores the bottom 16 bits of the user space stack pointer.  This
causes some 16-bit software to break, but it also leaks kernel state
to user space.  We have a software workaround for that ("espfix") for
the 32-bit kernel, but it relies on a nonzero stack segment base which
is not available in 64-bit mode.

In checkin:

    b3b42ac2cbae x86-64, modify_ldt: Ban 16-bit segments on 64-bit kernels

we "solved" this by forbidding 16-bit segments on 64-bit kernels, with
the logic that 16-bit support is crippled on 64-bit kernels anyway (no
V86 support), but it turns out that people are doing stuff like
running old Win16 binaries under Wine and expect it to work.

This works around this by creating percpu "ministacks", each of which
is mapped 2^16 times 64K apart.  When we detect that the return SS is
on the LDT, we copy the IRET frame to the ministack and use the
relevant alias to return to userspace.  The ministacks are mapped
readonly, so if IRET faults we promote #GP to #DF which is an IST
vector and thus has its own stack; we then do the fixup in the #DF
handler.

(Making #GP an IST exception would make the msr_safe functions unsafe
in NMI/MC context, and quite possibly have other effects.)

Special thanks to:

- Andy Lutomirski, for the suggestion of using very small stack slots
  and copy (as opposed to map) the IRET frame there, and for the
  suggestion to mark them readonly and let the fault promote to #DF.
- Konrad Wilk for paravirt fixup and testing.
- Borislav Petkov for testing help and useful comments.

Reported-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1398816946-3351-1-git-send-email-hpa@linux.intel.com
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Andrew Lutomriski <amluto@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Dirk Hohndel <dirk@hohndel.org>
Cc: Arjan van de Ven <arjan.van.de.ven@intel.com>
Cc: comex <comexk@gmail.com>
Cc: Alexander van Heukelum <heukelum@fastmail.fm>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/x86/x86_64/mm.txt         |   2 +
 arch/x86/include/asm/pgtable_64_types.h |   2 +
 arch/x86/include/asm/setup.h            |   3 +
 arch/x86/kernel/Makefile                |   1 +
 arch/x86/kernel/entry_64.S              |  73 ++++++++-
 arch/x86/kernel/espfix_64.c             | 208 ++++++++++++++++++++++++
 arch/x86/kernel/ldt.c                   |  11 --
 arch/x86/kernel/smpboot.c               |   7 +
 arch/x86/mm/dump_pagetables.c           |  37 +++--
 init/main.c                             |   4 +
 10 files changed, 323 insertions(+), 25 deletions(-)
 create mode 100644 arch/x86/kernel/espfix_64.c

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 881582f75c9c..bd4370487b07 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -12,6 +12,8 @@ ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
 ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
 ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
+ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+... unused hole ...
 ffffffff80000000 - ffffffffa0000000 (=512 MB)  kernel text mapping, from phys 0
 ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space
 ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 2d883440cb9a..b1609f2c524c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -61,6 +61,8 @@ typedef struct { pteval_t pte; } pte_t;
 #define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+#define ESPFIX_PGD_ENTRY _AC(-2, UL)
+#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
 
 #define EARLY_DYNAMIC_PAGE_TABLES	64
 
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index b7bf3505e1ec..93797d17ef32 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -60,6 +60,9 @@ extern void x86_ce4100_early_setup(void);
 static inline void x86_ce4100_early_setup(void) { }
 #endif
 
+extern void init_espfix_bsp(void);
+extern void init_espfix_ap(void);
+
 #ifndef _SETUP
 
 /*
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7bd3bd310106..0fde29333ca0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-y			+= syscall_$(BITS).o
 obj-$(CONFIG_X86_64)	+= vsyscall_64.o
 obj-$(CONFIG_X86_64)	+= vsyscall_emu_64.o
+obj-$(CONFIG_X86_64)	+= espfix_64.o
 obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7ac938a4bfab..b44acb51ac8b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -58,6 +58,7 @@
 #include <asm/asm.h>
 #include <asm/context_tracking.h>
 #include <asm/smap.h>
+#include <asm/pgtable_types.h>
 #include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
@@ -1055,8 +1056,16 @@ restore_args:
 	RESTORE_ARGS 1,8,1
 
 irq_return:
+	/*
+	 * Are we returning to a stack segment from the LDT?  Note: in
+	 * 64-bit mode SS:RSP on the exception stack is always valid.
+	 */
+	testb $4,(SS-RIP)(%rsp)
+	jnz irq_return_ldt
+
+irq_return_iret:
 	INTERRUPT_RETURN
-	_ASM_EXTABLE(irq_return, bad_iret)
+	_ASM_EXTABLE(irq_return_iret, bad_iret)
 
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_iret)
@@ -1064,6 +1073,30 @@ ENTRY(native_iret)
 	_ASM_EXTABLE(native_iret, bad_iret)
 #endif
 
+irq_return_ldt:
+	pushq_cfi %rax
+	pushq_cfi %rdi
+	SWAPGS
+	movq PER_CPU_VAR(espfix_waddr),%rdi
+	movq %rax,(0*8)(%rdi)	/* RAX */
+	movq (2*8)(%rsp),%rax	/* RIP */
+	movq %rax,(1*8)(%rdi)
+	movq (3*8)(%rsp),%rax	/* CS */
+	movq %rax,(2*8)(%rdi)
+	movq (4*8)(%rsp),%rax	/* RFLAGS */
+	movq %rax,(3*8)(%rdi)
+	movq (6*8)(%rsp),%rax	/* SS */
+	movq %rax,(5*8)(%rdi)
+	movq (5*8)(%rsp),%rax	/* RSP */
+	movq %rax,(4*8)(%rdi)
+	andl $0xffff0000,%eax
+	popq_cfi %rdi
+	orq PER_CPU_VAR(espfix_stack),%rax
+	SWAPGS
+	movq %rax,%rsp
+	popq_cfi %rax
+	jmp irq_return_iret
+
 	.section .fixup,"ax"
 bad_iret:
 	/*
@@ -1127,9 +1160,41 @@ ENTRY(retint_kernel)
 	call preempt_schedule_irq
 	jmp exit_intr
 #endif
-
 	CFI_ENDPROC
 END(common_interrupt)
+
+	/*
+	 * If IRET takes a fault on the espfix stack, then we
+	 * end up promoting it to a doublefault.  In that case,
+	 * modify the stack to make it look like we just entered
+	 * the #GP handler from user space, similar to bad_iret.
+	 */
+	ALIGN
+__do_double_fault:
+	XCPT_FRAME 1 RDI+8
+	movq RSP(%rdi),%rax		/* Trap on the espfix stack? */
+	sarq $PGDIR_SHIFT,%rax
+	cmpl $ESPFIX_PGD_ENTRY,%eax
+	jne do_double_fault		/* No, just deliver the fault */
+	cmpl $__KERNEL_CS,CS(%rdi)
+	jne do_double_fault
+	movq RIP(%rdi),%rax
+	cmpq $irq_return_iret,%rax
+#ifdef CONFIG_PARAVIRT
+	je 1f
+	cmpq $native_iret,%rax
+#endif
+	jne do_double_fault		/* This shouldn't happen... */
+1:
+	movq PER_CPU_VAR(kernel_stack),%rax
+	subq $(6*8-KERNEL_STACK_OFFSET),%rax	/* Reset to original stack */
+	movq %rax,RSP(%rdi)
+	movq $0,(%rax)			/* Missing (lost) #GP error code */
+	movq $general_protection,RIP(%rdi)
+	retq
+	CFI_ENDPROC
+END(__do_double_fault)
+
 /*
  * End of kprobes section
  */
@@ -1298,7 +1363,7 @@ zeroentry overflow do_overflow
 zeroentry bounds do_bounds
 zeroentry invalid_op do_invalid_op
 zeroentry device_not_available do_device_not_available
-paranoiderrorentry double_fault do_double_fault
+paranoiderrorentry double_fault __do_double_fault
 zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
 errorentry invalid_TSS do_invalid_TSS
 errorentry segment_not_present do_segment_not_present
@@ -1585,7 +1650,7 @@ error_sti:
  */
 error_kernelspace:
 	incl %ebx
-	leaq irq_return(%rip),%rcx
+	leaq irq_return_iret(%rip),%rcx
 	cmpq %rcx,RIP+8(%rsp)
 	je error_swapgs
 	movl %ecx,%eax	/* zero extend */
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
new file mode 100644
index 000000000000..8a64da36310f
--- /dev/null
+++ b/arch/x86/kernel/espfix_64.c
@@ -0,0 +1,208 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 2014 Intel Corporation; author: H. Peter Anvin
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms and conditions of the GNU General Public License,
+ *   version 2, as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope it will be useful, but WITHOUT
+ *   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *   more details.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * The IRET instruction, when returning to a 16-bit segment, only
+ * restores the bottom 16 bits of the user space stack pointer.  This
+ * causes some 16-bit software to break, but it also leaks kernel state
+ * to user space.
+ *
+ * This works around this by creating percpu "ministacks", each of which
+ * is mapped 2^16 times 64K apart.  When we detect that the return SS is
+ * on the LDT, we copy the IRET frame to the ministack and use the
+ * relevant alias to return to userspace.  The ministacks are mapped
+ * readonly, so if the IRET fault we promote #GP to #DF which is an IST
+ * vector and thus has its own stack; we then do the fixup in the #DF
+ * handler.
+ *
+ * This file sets up the ministacks and the related page tables.  The
+ * actual ministack invocation is in entry_64.S.
+ */
+
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/gfp.h>
+#include <linux/random.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/setup.h>
+
+/*
+ * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
+ * it up to a cache line to avoid unnecessary sharing.
+ */
+#define ESPFIX_STACK_SIZE	(8*8UL)
+#define ESPFIX_STACKS_PER_PAGE	(PAGE_SIZE/ESPFIX_STACK_SIZE)
+
+/* There is address space for how many espfix pages? */
+#define ESPFIX_PAGE_SPACE	(1UL << (PGDIR_SHIFT-PAGE_SHIFT-16))
+
+#define ESPFIX_MAX_CPUS		(ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE)
+#if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS
+# error "Need more than one PGD for the ESPFIX hack"
+#endif
+
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+
+/* This contains the *bottom* address of the espfix stack */
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
+
+/* Initialization mutex - should this be a spinlock? */
+static DEFINE_MUTEX(espfix_init_mutex);
+
+/* Page allocation bitmap - each page serves ESPFIX_STACKS_PER_PAGE CPUs */
+#define ESPFIX_MAX_PAGES  DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE)
+static void *espfix_pages[ESPFIX_MAX_PAGES];
+
+static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD]
+	__aligned(PAGE_SIZE);
+
+static unsigned int page_random, slot_random;
+
+/*
+ * This returns the bottom address of the espfix stack for a specific CPU.
+ * The math allows for a non-power-of-two ESPFIX_STACK_SIZE, in which case
+ * we have to account for some amount of padding at the end of each page.
+ */
+static inline unsigned long espfix_base_addr(unsigned int cpu)
+{
+	unsigned long page, slot;
+	unsigned long addr;
+
+	page = (cpu / ESPFIX_STACKS_PER_PAGE) ^ page_random;
+	slot = (cpu + slot_random) % ESPFIX_STACKS_PER_PAGE;
+	addr = (page << PAGE_SHIFT) + (slot * ESPFIX_STACK_SIZE);
+	addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16);
+	addr += ESPFIX_BASE_ADDR;
+	return addr;
+}
+
+#define PTE_STRIDE        (65536/PAGE_SIZE)
+#define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE)
+#define ESPFIX_PMD_CLONES PTRS_PER_PMD
+#define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES))
+
+#define PGTABLE_PROT	  ((_KERNPG_TABLE & ~_PAGE_RW) | _PAGE_NX)
+
+static void init_espfix_random(void)
+{
+	unsigned long rand;
+
+	/*
+	 * This is run before the entropy pools are initialized,
+	 * but this is hopefully better than nothing.
+	 */
+	if (!arch_get_random_long(&rand)) {
+		/* The constant is an arbitrary large prime */
+		rdtscll(rand);
+		rand *= 0xc345c6b72fd16123UL;
+	}
+
+	slot_random = rand % ESPFIX_STACKS_PER_PAGE;
+	page_random = (rand / ESPFIX_STACKS_PER_PAGE)
+		& (ESPFIX_PAGE_SPACE - 1);
+}
+
+void __init init_espfix_bsp(void)
+{
+	pgd_t *pgd_p;
+	pteval_t ptemask;
+
+	ptemask = __supported_pte_mask;
+
+	/* Install the espfix pud into the kernel page directory */
+	pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+	pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
+
+	/* Randomize the locations */
+	init_espfix_random();
+
+	/* The rest is the same as for any other processor */
+	init_espfix_ap();
+}
+
+void init_espfix_ap(void)
+{
+	unsigned int cpu, page;
+	unsigned long addr;
+	pud_t pud, *pud_p;
+	pmd_t pmd, *pmd_p;
+	pte_t pte, *pte_p;
+	int n;
+	void *stack_page;
+	pteval_t ptemask;
+
+	/* We only have to do this once... */
+	if (likely(this_cpu_read(espfix_stack)))
+		return;		/* Already initialized */
+
+	cpu = smp_processor_id();
+	addr = espfix_base_addr(cpu);
+	page = cpu/ESPFIX_STACKS_PER_PAGE;
+
+	/* Did another CPU already set this up? */
+	stack_page = ACCESS_ONCE(espfix_pages[page]);
+	if (likely(stack_page))
+		goto done;
+
+	mutex_lock(&espfix_init_mutex);
+
+	/* Did we race on the lock? */
+	stack_page = ACCESS_ONCE(espfix_pages[page]);
+	if (stack_page)
+		goto unlock_done;
+
+	ptemask = __supported_pte_mask;
+
+	pud_p = &espfix_pud_page[pud_index(addr)];
+	pud = *pud_p;
+	if (!pud_present(pud)) {
+		pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
+		pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
+		paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
+		for (n = 0; n < ESPFIX_PUD_CLONES; n++)
+			set_pud(&pud_p[n], pud);
+	}
+
+	pmd_p = pmd_offset(&pud, addr);
+	pmd = *pmd_p;
+	if (!pmd_present(pmd)) {
+		pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
+		pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
+		paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
+		for (n = 0; n < ESPFIX_PMD_CLONES; n++)
+			set_pmd(&pmd_p[n], pmd);
+	}
+
+	pte_p = pte_offset_kernel(&pmd, addr);
+	stack_page = (void *)__get_free_page(GFP_KERNEL);
+	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
+	paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT);
+	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
+		set_pte(&pte_p[n*PTE_STRIDE], pte);
+
+	/* Job is done for this CPU and any CPU which shares this page */
+	ACCESS_ONCE(espfix_pages[page]) = stack_page;
+
+unlock_done:
+	mutex_unlock(&espfix_init_mutex);
+done:
+	this_cpu_write(espfix_stack, addr);
+	this_cpu_write(espfix_waddr, (unsigned long)stack_page
+		       + (addr & ~PAGE_MASK));
+}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index af1d14a9ebda..ebc987398923 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -229,17 +229,6 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 		}
 	}
 
-	/*
-	 * On x86-64 we do not support 16-bit segments due to
-	 * IRET leaking the high bits of the kernel stack address.
-	 */
-#ifdef CONFIG_X86_64
-	if (!ldt_info.seg_32bit) {
-		error = -EINVAL;
-		goto out_unlock;
-	}
-#endif
-
 	fill_ldt(&ldt, &ldt_info);
 	if (oldmode)
 		ldt.avl = 0;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bfd348e99369..9f009cc7fcb2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -264,6 +264,13 @@ notrace static void __cpuinit start_secondary(void *unused)
 	 */
 	check_tsc_sync_target();
 
+	/*
+	 * Enable the espfix hack for this CPU
+	 */
+#ifdef CONFIG_X86_64
+	init_espfix_ap();
+#endif
+
 	/*
 	 * We need to hold vector_lock so there the set of online cpus
 	 * does not change while we are assigning vectors to cpus.  Holding
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 0002a3a33081..e04e67753238 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -30,11 +30,13 @@ struct pg_state {
 	unsigned long start_address;
 	unsigned long current_address;
 	const struct addr_marker *marker;
+	unsigned long lines;
 };
 
 struct addr_marker {
 	unsigned long start_address;
 	const char *name;
+	unsigned long max_lines;
 };
 
 /* indices for address_markers; keep sync'd w/ address_markers below */
@@ -45,6 +47,7 @@ enum address_markers_idx {
 	LOW_KERNEL_NR,
 	VMALLOC_START_NR,
 	VMEMMAP_START_NR,
+	ESPFIX_START_NR,
 	HIGH_KERNEL_NR,
 	MODULES_VADDR_NR,
 	MODULES_END_NR,
@@ -67,6 +70,7 @@ static struct addr_marker address_markers[] = {
 	{ PAGE_OFFSET,		"Low Kernel Mapping" },
 	{ VMALLOC_START,        "vmalloc() Area" },
 	{ VMEMMAP_START,        "Vmemmap" },
+	{ ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
 	{ __START_KERNEL_map,   "High Kernel Mapping" },
 	{ MODULES_VADDR,        "Modules" },
 	{ MODULES_END,          "End Modules" },
@@ -163,7 +167,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
 		      pgprot_t new_prot, int level)
 {
 	pgprotval_t prot, cur;
-	static const char units[] = "KMGTPE";
+	static const char units[] = "BKMGTPE";
 
 	/*
 	 * If we have a "break" in the series, we need to flush the state that
@@ -178,6 +182,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
 		st->current_prot = new_prot;
 		st->level = level;
 		st->marker = address_markers;
+		st->lines = 0;
 		seq_printf(m, "---[ %s ]---\n", st->marker->name);
 	} else if (prot != cur || level != st->level ||
 		   st->current_address >= st->marker[1].start_address) {
@@ -188,17 +193,21 @@ static void note_page(struct seq_file *m, struct pg_state *st,
 		/*
 		 * Now print the actual finished series
 		 */
-		seq_printf(m, "0x%0*lx-0x%0*lx   ",
-			   width, st->start_address,
-			   width, st->current_address);
+		if (!st->marker->max_lines ||
+		    st->lines < st->marker->max_lines) {
+			seq_printf(m, "0x%0*lx-0x%0*lx   ",
+				   width, st->start_address,
+				   width, st->current_address);
 
-		delta = (st->current_address - st->start_address) >> 10;
-		while (!(delta & 1023) && unit[1]) {
-			delta >>= 10;
-			unit++;
+			delta = (st->current_address - st->start_address);
+			while (!(delta & 1023) && unit[1]) {
+				delta >>= 10;
+				unit++;
+			}
+			seq_printf(m, "%9lu%c ", delta, *unit);
+			printk_prot(m, st->current_prot, st->level);
 		}
-		seq_printf(m, "%9lu%c ", delta, *unit);
-		printk_prot(m, st->current_prot, st->level);
+		st->lines++;
 
 		/*
 		 * We print markers for special areas of address space,
@@ -206,7 +215,15 @@ static void note_page(struct seq_file *m, struct pg_state *st,
 		 * This helps in the interpretation.
 		 */
 		if (st->current_address >= st->marker[1].start_address) {
+			if (st->marker->max_lines &&
+			    st->lines > st->marker->max_lines) {
+				unsigned long nskip =
+					st->lines - st->marker->max_lines;
+				seq_printf(m, "... %lu entr%s skipped ... \n",
+					   nskip, nskip == 1 ? "y" : "ies");
+			}
 			st->marker++;
+			st->lines = 0;
 			seq_printf(m, "---[ %s ]---\n", st->marker->name);
 		}
 
diff --git a/init/main.c b/init/main.c
index e83ac04fda97..600136515caf 100644
--- a/init/main.c
+++ b/init/main.c
@@ -605,6 +605,10 @@ asmlinkage void __init start_kernel(void)
 #ifdef CONFIG_X86
 	if (efi_enabled(EFI_RUNTIME_SERVICES))
 		efi_enter_virtual_mode();
+#endif
+#ifdef CONFIG_X86_64
+	/* Should be run before the first non-init thread is created */
+	init_espfix_bsp();
 #endif
 	thread_info_cache_init();
 	cred_init();

From ebb3e5b75860d94180cdfcdfe34387ca510a2d1f Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 1 May 2014 14:12:23 -0700
Subject: [PATCH 0126/1185] x86, espfix: Move espfix definitions into a
 separate header file

commit e1fe9ed8d2a4937510d0d60e20705035c2609aea upstream.

Sparse warns that the percpu variables aren't declared before they are
defined.  Rather than hacking around it, move espfix definitions into
a proper header file.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/espfix.h | 16 ++++++++++++++++
 arch/x86/include/asm/setup.h  |  5 ++---
 arch/x86/kernel/espfix_64.c   |  1 +
 3 files changed, 19 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/include/asm/espfix.h

diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
new file mode 100644
index 000000000000..729051c82b02
--- /dev/null
+++ b/arch/x86/include/asm/espfix.h
@@ -0,0 +1,16 @@
+#ifdef _ASM_X86_ESPFIX_H
+#define _ASM_X86_ESPFIX_H
+
+#ifdef CONFIG_X86_64
+
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
+DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
+
+extern void init_espfix_bsp(void);
+extern void init_espfix_ap(void);
+
+#endif /* CONFIG_X86_64 */
+
+#endif /* _ASM_X86_ESPFIX_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 93797d17ef32..2e327f114a1b 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -60,11 +60,10 @@ extern void x86_ce4100_early_setup(void);
 static inline void x86_ce4100_early_setup(void) { }
 #endif
 
-extern void init_espfix_bsp(void);
-extern void init_espfix_ap(void);
-
 #ifndef _SETUP
 
+#include <asm/espfix.h>
+
 /*
  * This is set up by the setup-routine at boot-time
  */
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 8a64da36310f..6afbb16e9b79 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -40,6 +40,7 @@
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/setup.h>
+#include <asm/espfix.h>
 
 /*
  * Note: we only need 6*8 = 48 bytes for the espfix stack, but round

From cd91e9adf307219809355ad19509e0411d6d0a8c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 2 May 2014 11:33:51 -0700
Subject: [PATCH 0127/1185] x86, espfix: Fix broken header guard

commit 20b68535cd27183ebd3651ff313afb2b97dac941 upstream.

Header guard is #ifndef, not #ifdef...

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/espfix.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 729051c82b02..99efebb2f69d 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -1,4 +1,4 @@
-#ifdef _ASM_X86_ESPFIX_H
+#ifndef _ASM_X86_ESPFIX_H
 #define _ASM_X86_ESPFIX_H
 
 #ifdef CONFIG_X86_64

From cd4033c9ed9e9abf51a3fb033c34daff2ca8df24 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Sun, 4 May 2014 10:00:49 -0700
Subject: [PATCH 0128/1185] x86, espfix: Make espfix64 a Kconfig option, fix
 UML

commit 197725de65477bc8509b41388157c1a2283542bb upstream.

Make espfix64 a hidden Kconfig option.  This fixes the x86-64 UML
build which had broken due to the non-existence of init_espfix_bsp()
in UML: since UML uses its own Kconfig, this option does not appear in
the UML build.

This also makes it possible to make support for 16-bit segments a
configuration option, for the people who want to minimize the size of
the kernel.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Richard Weinberger <richard@nod.at>
Link: http://lkml.kernel.org/r/1398816946-3351-1-git-send-email-hpa@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/Kconfig          | 4 ++++
 arch/x86/kernel/Makefile  | 2 +-
 arch/x86/kernel/smpboot.c | 2 +-
 init/main.c               | 2 +-
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index af88b27ce313..0c152c8200b8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -957,6 +957,10 @@ config VM86
 	  XFree86 to initialize some video cards via BIOS. Disabling this
 	  option saves about 6k.
 
+config X86_ESPFIX64
+	def_bool y
+	depends on X86_64
+
 config TOSHIBA
 	tristate "Toshiba Laptop support"
 	depends on X86_32
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0fde29333ca0..111eb356dbea 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-y			+= syscall_$(BITS).o
 obj-$(CONFIG_X86_64)	+= vsyscall_64.o
 obj-$(CONFIG_X86_64)	+= vsyscall_emu_64.o
-obj-$(CONFIG_X86_64)	+= espfix_64.o
+obj-$(CONFIG_X86_ESPFIX64)	+= espfix_64.o
 obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9f009cc7fcb2..fe862750583b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -267,7 +267,7 @@ notrace static void __cpuinit start_secondary(void *unused)
 	/*
 	 * Enable the espfix hack for this CPU
 	 */
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ESPFIX64
 	init_espfix_ap();
 #endif
 
diff --git a/init/main.c b/init/main.c
index 600136515caf..2132ffd5e031 100644
--- a/init/main.c
+++ b/init/main.c
@@ -606,7 +606,7 @@ asmlinkage void __init start_kernel(void)
 	if (efi_enabled(EFI_RUNTIME_SERVICES))
 		efi_enter_virtual_mode();
 #endif
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ESPFIX64
 	/* Should be run before the first non-init thread is created */
 	init_espfix_bsp();
 #endif

From 044f72e1163f83cab78e92411033c8b6bf06d3de Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Sun, 4 May 2014 10:36:22 -0700
Subject: [PATCH 0129/1185] x86, espfix: Make it possible to disable 16-bit
 support

commit 34273f41d57ee8d854dcd2a1d754cbb546cb548f upstream.

Embedded systems, which may be very memory-size-sensitive, are
extremely unlikely to ever encounter any 16-bit software, so make it
a CONFIG_EXPERT option to turn off support for any 16-bit software
whatsoever.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1398816946-3351-1-git-send-email-hpa@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/Kconfig           | 23 ++++++++++++++++++-----
 arch/x86/kernel/entry_32.S | 12 ++++++++++++
 arch/x86/kernel/entry_64.S |  8 ++++++++
 arch/x86/kernel/ldt.c      |  5 +++++
 4 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0c152c8200b8..a649cb686692 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -952,14 +952,27 @@ config VM86
 	default y
 	depends on X86_32
 	---help---
-	  This option is required by programs like DOSEMU to run 16-bit legacy
-	  code on X86 processors. It also may be needed by software like
-	  XFree86 to initialize some video cards via BIOS. Disabling this
-	  option saves about 6k.
+	  This option is required by programs like DOSEMU to run
+	  16-bit real mode legacy code on x86 processors. It also may
+	  be needed by software like XFree86 to initialize some video
+	  cards via BIOS. Disabling this option saves about 6K.
+
+config X86_16BIT
+	bool "Enable support for 16-bit segments" if EXPERT
+	default y
+	---help---
+	  This option is required by programs like Wine to run 16-bit
+	  protected mode legacy code on x86 processors.  Disabling
+	  this option saves about 300 bytes on i386, or around 6K text
+	  plus 16K runtime memory on x86-64,
+
+config X86_ESPFIX32
+	def_bool y
+	depends on X86_16BIT && X86_32
 
 config X86_ESPFIX64
 	def_bool y
-	depends on X86_64
+	depends on X86_16BIT && X86_64
 
 config TOSHIBA
 	tristate "Toshiba Laptop support"
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 08fa44443a01..5c38e2b298cd 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -532,6 +532,7 @@ syscall_exit:
 restore_all:
 	TRACE_IRQS_IRET
 restore_all_notrace:
+#ifdef CONFIG_X86_ESPFIX32
 	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
 	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
 	# are returning to the kernel.
@@ -542,6 +543,7 @@ restore_all_notrace:
 	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
 	CFI_REMEMBER_STATE
 	je ldt_ss			# returning to user-space with LDT SS
+#endif
 restore_nocheck:
 	RESTORE_REGS 4			# skip orig_eax/error_code
 irq_return:
@@ -554,6 +556,7 @@ ENTRY(iret_exc)
 .previous
 	_ASM_EXTABLE(irq_return,iret_exc)
 
+#ifdef CONFIG_X86_ESPFIX32
 	CFI_RESTORE_STATE
 ldt_ss:
 #ifdef CONFIG_PARAVIRT
@@ -597,6 +600,7 @@ ldt_ss:
 	lss (%esp), %esp		/* switch to espfix segment */
 	CFI_ADJUST_CFA_OFFSET -8
 	jmp restore_nocheck
+#endif
 	CFI_ENDPROC
 ENDPROC(system_call)
 
@@ -709,6 +713,7 @@ END(syscall_badsys)
  * the high word of the segment base from the GDT and swiches to the
  * normal stack and adjusts ESP with the matching offset.
  */
+#ifdef CONFIG_X86_ESPFIX32
 	/* fixup the stack */
 	mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
 	mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
@@ -718,8 +723,10 @@ END(syscall_badsys)
 	pushl_cfi %eax
 	lss (%esp), %esp		/* switch to the normal stack segment */
 	CFI_ADJUST_CFA_OFFSET -8
+#endif
 .endm
 .macro UNWIND_ESPFIX_STACK
+#ifdef CONFIG_X86_ESPFIX32
 	movl %ss, %eax
 	/* see if on espfix stack */
 	cmpw $__ESPFIX_SS, %ax
@@ -730,6 +737,7 @@ END(syscall_badsys)
 	/* switch to normal stack */
 	FIXUP_ESPFIX_STACK
 27:
+#endif
 .endm
 
 /*
@@ -1337,11 +1345,13 @@ END(debug)
 ENTRY(nmi)
 	RING0_INT_FRAME
 	ASM_CLAC
+#ifdef CONFIG_X86_ESPFIX32
 	pushl_cfi %eax
 	movl %ss, %eax
 	cmpw $__ESPFIX_SS, %ax
 	popl_cfi %eax
 	je nmi_espfix_stack
+#endif
 	cmpl $ia32_sysenter_target,(%esp)
 	je nmi_stack_fixup
 	pushl_cfi %eax
@@ -1381,6 +1391,7 @@ nmi_debug_stack_check:
 	FIX_STACK 24, nmi_stack_correct, 1
 	jmp nmi_stack_correct
 
+#ifdef CONFIG_X86_ESPFIX32
 nmi_espfix_stack:
 	/* We have a RING0_INT_FRAME here.
 	 *
@@ -1402,6 +1413,7 @@ nmi_espfix_stack:
 	lss 12+4(%esp), %esp		# back to espfix stack
 	CFI_ADJUST_CFA_OFFSET -24
 	jmp irq_return
+#endif
 	CFI_ENDPROC
 END(nmi)
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b44acb51ac8b..b7178d06ab54 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1060,8 +1060,10 @@ irq_return:
 	 * Are we returning to a stack segment from the LDT?  Note: in
 	 * 64-bit mode SS:RSP on the exception stack is always valid.
 	 */
+#ifdef CONFIG_X86_ESPFIX64
 	testb $4,(SS-RIP)(%rsp)
 	jnz irq_return_ldt
+#endif
 
 irq_return_iret:
 	INTERRUPT_RETURN
@@ -1073,6 +1075,7 @@ ENTRY(native_iret)
 	_ASM_EXTABLE(native_iret, bad_iret)
 #endif
 
+#ifdef CONFIG_X86_ESPFIX64
 irq_return_ldt:
 	pushq_cfi %rax
 	pushq_cfi %rdi
@@ -1096,6 +1099,7 @@ irq_return_ldt:
 	movq %rax,%rsp
 	popq_cfi %rax
 	jmp irq_return_iret
+#endif
 
 	.section .fixup,"ax"
 bad_iret:
@@ -1169,6 +1173,7 @@ END(common_interrupt)
 	 * modify the stack to make it look like we just entered
 	 * the #GP handler from user space, similar to bad_iret.
 	 */
+#ifdef CONFIG_X86_ESPFIX64
 	ALIGN
 __do_double_fault:
 	XCPT_FRAME 1 RDI+8
@@ -1194,6 +1199,9 @@ __do_double_fault:
 	retq
 	CFI_ENDPROC
 END(__do_double_fault)
+#else
+# define __do_double_fault do_double_fault
+#endif
 
 /*
  * End of kprobes section
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index ebc987398923..c37886d759cc 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -229,6 +229,11 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 		}
 	}
 
+	if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
+		error = -EINVAL;
+		goto out_unlock;
+	}
+
 	fill_ldt(&ldt, &ldt_info);
 	if (oldmode)
 		ldt.avl = 0;

From e824880110a452575e28a1547b95c5eb2bbb90e8 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Wed, 23 Jul 2014 08:34:11 -0700
Subject: [PATCH 0130/1185] x86_64/entry/xen: Do not invoke espfix64 on Xen

commit 7209a75d2009dbf7745e2fd354abf25c3deb3ca3 upstream.

This moves the espfix64 logic into native_iret.  To make this work,
it gets rid of the native patch for INTERRUPT_RETURN:
INTERRUPT_RETURN on native kernels is now 'jmp native_iret'.

This changes the 16-bit SS behavior on Xen from OOPSing to leaking
some bits of the Xen hypervisor's RSP (I think).

[ hpa: this is a nonzero cost on native, but probably not enough to
  measure. Xen needs to fix this in their own code, probably doing
  something equivalent to espfix64. ]

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/7b8f1d8ef6597cb16ae004a43c56980a7de3cf94.1406129132.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/irqflags.h     |  2 +-
 arch/x86/kernel/entry_64.S          | 28 ++++++++++------------------
 arch/x86/kernel/paravirt_patch_64.c |  2 --
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index bba3cf88e624..0a8b519226b8 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void)
 
 #define PARAVIRT_ADJUST_EXCEPTION_FRAME	/*  */
 
-#define INTERRUPT_RETURN	iretq
+#define INTERRUPT_RETURN	jmp native_iret
 #define USERGS_SYSRET64				\
 	swapgs;					\
 	sysretq;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b7178d06ab54..39ba6914bbc6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1056,27 +1056,24 @@ restore_args:
 	RESTORE_ARGS 1,8,1
 
 irq_return:
+	INTERRUPT_RETURN
+
+ENTRY(native_iret)
 	/*
 	 * Are we returning to a stack segment from the LDT?  Note: in
 	 * 64-bit mode SS:RSP on the exception stack is always valid.
 	 */
 #ifdef CONFIG_X86_ESPFIX64
 	testb $4,(SS-RIP)(%rsp)
-	jnz irq_return_ldt
+	jnz native_irq_return_ldt
 #endif
 
-irq_return_iret:
-	INTERRUPT_RETURN
-	_ASM_EXTABLE(irq_return_iret, bad_iret)
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
+native_irq_return_iret:
 	iretq
-	_ASM_EXTABLE(native_iret, bad_iret)
-#endif
+	_ASM_EXTABLE(native_irq_return_iret, bad_iret)
 
 #ifdef CONFIG_X86_ESPFIX64
-irq_return_ldt:
+native_irq_return_ldt:
 	pushq_cfi %rax
 	pushq_cfi %rdi
 	SWAPGS
@@ -1098,7 +1095,7 @@ irq_return_ldt:
 	SWAPGS
 	movq %rax,%rsp
 	popq_cfi %rax
-	jmp irq_return_iret
+	jmp native_irq_return_iret
 #endif
 
 	.section .fixup,"ax"
@@ -1184,13 +1181,8 @@ __do_double_fault:
 	cmpl $__KERNEL_CS,CS(%rdi)
 	jne do_double_fault
 	movq RIP(%rdi),%rax
-	cmpq $irq_return_iret,%rax
-#ifdef CONFIG_PARAVIRT
-	je 1f
-	cmpq $native_iret,%rax
-#endif
+	cmpq $native_irq_return_iret,%rax
 	jne do_double_fault		/* This shouldn't happen... */
-1:
 	movq PER_CPU_VAR(kernel_stack),%rax
 	subq $(6*8-KERNEL_STACK_OFFSET),%rax	/* Reset to original stack */
 	movq %rax,RSP(%rdi)
@@ -1658,7 +1650,7 @@ error_sti:
  */
 error_kernelspace:
 	incl %ebx
-	leaq irq_return_iret(%rip),%rcx
+	leaq native_irq_return_iret(%rip),%rcx
 	cmpq %rcx,RIP+8(%rsp)
 	je error_swapgs
 	movl %ecx,%eax	/* zero extend */
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34f93eb..a1da6737ba5b 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
 DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
 DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(pv_cpu_ops, iret, "iretq");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, save_fl);
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, irq_disable);
-		PATCH_SITE(pv_cpu_ops, iret);
 		PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret32);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret64);

From 66d0cfad7474f7f306589dab9cf83b47167e7cd0 Mon Sep 17 00:00:00 2001
From: Malcolm Priestley <tvboxspy@gmail.com>
Date: Wed, 23 Jul 2014 21:35:11 +0100
Subject: [PATCH 0131/1185] staging: vt6655: Fix Warning on boot
 handle_irq_event_percpu.

commit 6cff1f6ad4c615319c1a146b2aa0af1043c5e9f5 upstream.

WARNING: CPU: 0 PID: 929 at /home/apw/COD/linux/kernel/irq/handle.c:147 handle_irq_event_percpu+0x1d1/0x1e0()
irq 17 handler device_intr+0x0/0xa80 [vt6655_stage] enabled interrupts

Using spin_lock_irqsave appears to fix this.

Signed-off-by: Malcolm Priestley <tvboxspy@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vt6655/device_main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
index 08b250f01dae..d170b6f9db7c 100644
--- a/drivers/staging/vt6655/device_main.c
+++ b/drivers/staging/vt6655/device_main.c
@@ -2434,6 +2434,7 @@ static  irqreturn_t  device_intr(int irq,  void *dev_instance) {
 	int             handled = 0;
 	unsigned char byData = 0;
 	int             ii = 0;
+	unsigned long flags;
 //    unsigned char byRSSI;
 
 	MACvReadISR(pDevice->PortOffset, &pDevice->dwIsr);
@@ -2459,7 +2460,8 @@ static  irqreturn_t  device_intr(int irq,  void *dev_instance) {
 
 	handled = 1;
 	MACvIntDisable(pDevice->PortOffset);
-	spin_lock_irq(&pDevice->lock);
+
+	spin_lock_irqsave(&pDevice->lock, flags);
 
 	//Make sure current page is 0
 	VNSvInPortB(pDevice->PortOffset + MAC_REG_PAGE1SEL, &byOrgPageSel);
@@ -2700,7 +2702,8 @@ static  irqreturn_t  device_intr(int irq,  void *dev_instance) {
 		MACvSelectPage1(pDevice->PortOffset);
 	}
 
-	spin_unlock_irq(&pDevice->lock);
+	spin_unlock_irqrestore(&pDevice->lock, flags);
+
 	MACvIntEnable(pDevice->PortOffset, IMR_MASK_VALUE);
 
 	return IRQ_RETVAL(handled);

From f62ec7353efa042524f769d0ada4cd18a9348433 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Jul 2014 12:01:11 +0200
Subject: [PATCH 0132/1185] Revert "mac80211: move "bufferable MMPDU" check to
 fix AP mode scan"

commit 08b9939997df30e42a228e1ecb97f99e9c8ea84e upstream.

This reverts commit 277d916fc2e959c3f106904116bb4f7b1148d47a as it was
at least breaking iwlwifi by setting the IEEE80211_TX_CTL_NO_PS_BUFFER
flag in all kinds of interface modes, not only for AP mode where it is
appropriate.

To avoid reintroducing the original problem, explicitly check for probe
request frames in the multicast buffering code.

Fixes: 277d916fc2e9 ("mac80211: move "bufferable MMPDU" check to fix AP mode scan")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/tx.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d566cdba24ec..10eea2326022 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -398,6 +398,9 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 	if (ieee80211_has_order(hdr->frame_control))
 		return TX_CONTINUE;
 
+	if (ieee80211_is_probe_req(hdr->frame_control))
+		return TX_CONTINUE;
+
 	/* no stations in PS mode */
 	if (!atomic_read(&ps->num_sta_ps))
 		return TX_CONTINUE;
@@ -447,6 +450,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 {
 	struct sta_info *sta = tx->sta;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
 	struct ieee80211_local *local = tx->local;
 
 	if (unlikely(!sta))
@@ -457,6 +461,15 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 		     !(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER))) {
 		int ac = skb_get_queue_mapping(tx->skb);
 
+		/* only deauth, disassoc and action are bufferable MMPDUs */
+		if (ieee80211_is_mgmt(hdr->frame_control) &&
+		    !ieee80211_is_deauth(hdr->frame_control) &&
+		    !ieee80211_is_disassoc(hdr->frame_control) &&
+		    !ieee80211_is_action(hdr->frame_control)) {
+			info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
+			return TX_CONTINUE;
+		}
+
 		ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n",
 		       sta->sta.addr, sta->sta.aid, ac);
 		if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
@@ -514,22 +527,8 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
 {
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-
 	if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED))
 		return TX_CONTINUE;
-
-	/* only deauth, disassoc and action are bufferable MMPDUs */
-	if (ieee80211_is_mgmt(hdr->frame_control) &&
-	    !ieee80211_is_deauth(hdr->frame_control) &&
-	    !ieee80211_is_disassoc(hdr->frame_control) &&
-	    !ieee80211_is_action(hdr->frame_control)) {
-		if (tx->flags & IEEE80211_TX_UNICAST)
-			info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
-		return TX_CONTINUE;
-	}
-
 	if (tx->flags & IEEE80211_TX_UNICAST)
 		return ieee80211_tx_h_unicast_ps_buf(tx);
 	else

From 5831364f63388662b37b92b2ff6c21a63e82d60d Mon Sep 17 00:00:00 2001
From: willy tarreau <w@1wt.eu>
Date: Thu, 16 Jan 2014 08:20:07 +0100
Subject: [PATCH 0133/1185] net: mvneta: increase the 64-bit rx/tx stats out of
 the hot path

commit dc4277dd41a80fd5f29a90412ea04bc3ba54fbf1 upstream.

Better count packets and bytes in the stack and on 32 bit then
accumulate them at the end for once. This saves two memory writes
and two memory barriers per packet. The incoming packet rate was
increased by 4.7% on the Openblocks AX3 thanks to this.

Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Tested-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 658613021919..0c0b5763549a 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1354,6 +1354,8 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 {
 	struct net_device *dev = pp->dev;
 	int rx_done, rx_filled;
+	u32 rcvd_pkts = 0;
+	u32 rcvd_bytes = 0;
 
 	/* Get number of received packets */
 	rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
@@ -1391,10 +1393,8 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 
 		rx_bytes = rx_desc->data_size -
 			(ETH_FCS_LEN + MVNETA_MH_SIZE);
-		u64_stats_update_begin(&pp->rx_stats.syncp);
-		pp->rx_stats.packets++;
-		pp->rx_stats.bytes += rx_bytes;
-		u64_stats_update_end(&pp->rx_stats.syncp);
+		rcvd_pkts++;
+		rcvd_bytes += rx_bytes;
 
 		/* Linux processing */
 		skb_reserve(skb, MVNETA_MH_SIZE);
@@ -1415,6 +1415,13 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 		}
 	}
 
+	if (rcvd_pkts) {
+		u64_stats_update_begin(&pp->rx_stats.syncp);
+		pp->rx_stats.packets += rcvd_pkts;
+		pp->rx_stats.bytes   += rcvd_bytes;
+		u64_stats_update_end(&pp->rx_stats.syncp);
+	}
+
 	/* Update rxq management counters */
 	mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_filled);
 

From 936e9bc5d7f64c98574bb864c4028378f5a01d0c Mon Sep 17 00:00:00 2001
From: willy tarreau <w@1wt.eu>
Date: Thu, 16 Jan 2014 08:20:08 +0100
Subject: [PATCH 0134/1185] net: mvneta: use per_cpu stats to fix an SMP lock
 up

commit 74c41b048db1073a04827d7f39e95ac1935524cc upstream.

Stats writers are mvneta_rx() and mvneta_tx(). They don't lock anything
when they update the stats, and as a result, it randomly happens that
the stats freeze on SMP if two updates happen during stats retrieval.
This is very easily reproducible by starting two HTTP servers and binding
each of them to a different CPU, then consulting /proc/net/dev in loops
during transfers, the interface should immediately lock up. This issue
also randomly happens upon link state changes during transfers, because
the stats are collected in this situation, but it takes more attempts to
reproduce it.

The comments in netdevice.h suggest using per_cpu stats instead to get
rid of this issue.

This patch implements this. It merges both rx_stats and tx_stats into
a single "stats" member with a single syncp. Both mvneta_rx() and
mvneta_rx() now only update the a single CPU's counters.

In turn, mvneta_get_stats64() does the summing by iterating over all CPUs
to get their respective stats.

With this change, stats are still correct and no more lockup is encountered.

Note that this bug was present since the first import of the mvneta
driver.  It might make sense to backport it to some stable trees. If
so, it depends on "d33dc73 net: mvneta: increase the 64-bit rx/tx stats
out of the hot path".

Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Tested-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
[wt: port to 3.10 : u64_stats_init() does not exist in 3.10 and is not needed]
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 74 +++++++++++++++++----------
 1 file changed, 48 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 0c0b5763549a..49642c086109 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -219,10 +219,12 @@
 
 #define MVNETA_RX_BUF_SIZE(pkt_size)   ((pkt_size) + NET_SKB_PAD)
 
-struct mvneta_stats {
+struct mvneta_pcpu_stats {
 	struct	u64_stats_sync syncp;
-	u64	packets;
-	u64	bytes;
+	u64	rx_packets;
+	u64	rx_bytes;
+	u64	tx_packets;
+	u64	tx_bytes;
 };
 
 struct mvneta_port {
@@ -248,8 +250,7 @@ struct mvneta_port {
 	u8 mcast_count[256];
 	u16 tx_ring_size;
 	u16 rx_ring_size;
-	struct mvneta_stats tx_stats;
-	struct mvneta_stats rx_stats;
+	struct mvneta_pcpu_stats *stats;
 
 	struct mii_bus *mii_bus;
 	struct phy_device *phy_dev;
@@ -428,21 +429,29 @@ struct rtnl_link_stats64 *mvneta_get_stats64(struct net_device *dev,
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 	unsigned int start;
+	int cpu;
 
-	memset(stats, 0, sizeof(struct rtnl_link_stats64));
+	for_each_possible_cpu(cpu) {
+		struct mvneta_pcpu_stats *cpu_stats;
+		u64 rx_packets;
+		u64 rx_bytes;
+		u64 tx_packets;
+		u64 tx_bytes;
 
-	do {
-		start = u64_stats_fetch_begin_bh(&pp->rx_stats.syncp);
-		stats->rx_packets = pp->rx_stats.packets;
-		stats->rx_bytes	= pp->rx_stats.bytes;
-	} while (u64_stats_fetch_retry_bh(&pp->rx_stats.syncp, start));
+		cpu_stats = per_cpu_ptr(pp->stats, cpu);
+		do {
+			start = u64_stats_fetch_begin_bh(&cpu_stats->syncp);
+			rx_packets = cpu_stats->rx_packets;
+			rx_bytes   = cpu_stats->rx_bytes;
+			tx_packets = cpu_stats->tx_packets;
+			tx_bytes   = cpu_stats->tx_bytes;
+		} while (u64_stats_fetch_retry_bh(&cpu_stats->syncp, start));
 
-
-	do {
-		start = u64_stats_fetch_begin_bh(&pp->tx_stats.syncp);
-		stats->tx_packets = pp->tx_stats.packets;
-		stats->tx_bytes	= pp->tx_stats.bytes;
-	} while (u64_stats_fetch_retry_bh(&pp->tx_stats.syncp, start));
+		stats->rx_packets += rx_packets;
+		stats->rx_bytes   += rx_bytes;
+		stats->tx_packets += tx_packets;
+		stats->tx_bytes   += tx_bytes;
+	}
 
 	stats->rx_errors	= dev->stats.rx_errors;
 	stats->rx_dropped	= dev->stats.rx_dropped;
@@ -1416,10 +1425,12 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 	}
 
 	if (rcvd_pkts) {
-		u64_stats_update_begin(&pp->rx_stats.syncp);
-		pp->rx_stats.packets += rcvd_pkts;
-		pp->rx_stats.bytes   += rcvd_bytes;
-		u64_stats_update_end(&pp->rx_stats.syncp);
+		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
+		u64_stats_update_begin(&stats->syncp);
+		stats->rx_packets += rcvd_pkts;
+		stats->rx_bytes   += rcvd_bytes;
+		u64_stats_update_end(&stats->syncp);
 	}
 
 	/* Update rxq management counters */
@@ -1552,11 +1563,12 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 
 out:
 	if (frags > 0) {
-		u64_stats_update_begin(&pp->tx_stats.syncp);
-		pp->tx_stats.packets++;
-		pp->tx_stats.bytes += skb->len;
-		u64_stats_update_end(&pp->tx_stats.syncp);
+		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
 
+		u64_stats_update_begin(&stats->syncp);
+		stats->tx_packets++;
+		stats->tx_bytes  += skb->len;
+		u64_stats_update_end(&stats->syncp);
 	} else {
 		dev->stats.tx_dropped++;
 		dev_kfree_skb_any(skb);
@@ -2758,6 +2770,13 @@ static int mvneta_probe(struct platform_device *pdev)
 
 	clk_prepare_enable(pp->clk);
 
+	/* Alloc per-cpu stats */
+	pp->stats = alloc_percpu(struct mvneta_pcpu_stats);
+	if (!pp->stats) {
+		err = -ENOMEM;
+		goto err_clk;
+	}
+
 	pp->tx_done_timer.data = (unsigned long)dev;
 
 	pp->tx_ring_size = MVNETA_MAX_TXD;
@@ -2769,7 +2788,7 @@ static int mvneta_probe(struct platform_device *pdev)
 	err = mvneta_init(pp, phy_addr);
 	if (err < 0) {
 		dev_err(&pdev->dev, "can't init eth hal\n");
-		goto err_clk;
+		goto err_free_stats;
 	}
 	mvneta_port_power_up(pp, phy_mode);
 
@@ -2798,6 +2817,8 @@ static int mvneta_probe(struct platform_device *pdev)
 
 err_deinit:
 	mvneta_deinit(pp);
+err_free_stats:
+	free_percpu(pp->stats);
 err_clk:
 	clk_disable_unprepare(pp->clk);
 err_unmap:
@@ -2818,6 +2839,7 @@ static int mvneta_remove(struct platform_device *pdev)
 	unregister_netdev(dev);
 	mvneta_deinit(pp);
 	clk_disable_unprepare(pp->clk);
+	free_percpu(pp->stats);
 	iounmap(pp->base);
 	irq_dispose_mapping(dev->irq);
 	free_netdev(dev);

From aaf7035af079d55b52179c6b5bd5f8d82fec696b Mon Sep 17 00:00:00 2001
From: willy tarreau <w@1wt.eu>
Date: Thu, 16 Jan 2014 08:20:09 +0100
Subject: [PATCH 0135/1185] net: mvneta: do not schedule in mvneta_tx_timeout

commit 290213667ab53a95456397763205e4b1e30f46b5 upstream.

If a queue timeout is reported, we can oops because of some
schedules while the caller is atomic, as shown below :

  mvneta d0070000.ethernet eth0: tx timeout
  BUG: scheduling while atomic: bash/1528/0x00000100
  Modules linked in: slhttp_ethdiv(C) [last unloaded: slhttp_ethdiv]
  CPU: 2 PID: 1528 Comm: bash Tainted: G        WC   3.13.0-rc4-mvebu-nf #180
  [<c0011bd9>] (unwind_backtrace+0x1/0x98) from [<c000f1ab>] (show_stack+0xb/0xc)
  [<c000f1ab>] (show_stack+0xb/0xc) from [<c02ad323>] (dump_stack+0x4f/0x64)
  [<c02ad323>] (dump_stack+0x4f/0x64) from [<c02abe67>] (__schedule_bug+0x37/0x4c)
  [<c02abe67>] (__schedule_bug+0x37/0x4c) from [<c02ae261>] (__schedule+0x325/0x3ec)
  [<c02ae261>] (__schedule+0x325/0x3ec) from [<c02adb97>] (schedule_timeout+0xb7/0x118)
  [<c02adb97>] (schedule_timeout+0xb7/0x118) from [<c0020a67>] (msleep+0xf/0x14)
  [<c0020a67>] (msleep+0xf/0x14) from [<c01dcbe5>] (mvneta_stop_dev+0x21/0x194)
  [<c01dcbe5>] (mvneta_stop_dev+0x21/0x194) from [<c01dcfe9>] (mvneta_tx_timeout+0x19/0x24)
  [<c01dcfe9>] (mvneta_tx_timeout+0x19/0x24) from [<c024afc7>] (dev_watchdog+0x18b/0x1c4)
  [<c024afc7>] (dev_watchdog+0x18b/0x1c4) from [<c0020b53>] (call_timer_fn.isra.27+0x17/0x5c)
  [<c0020b53>] (call_timer_fn.isra.27+0x17/0x5c) from [<c0020cad>] (run_timer_softirq+0x115/0x170)
  [<c0020cad>] (run_timer_softirq+0x115/0x170) from [<c001ccb9>] (__do_softirq+0xbd/0x1a8)
  [<c001ccb9>] (__do_softirq+0xbd/0x1a8) from [<c001cfad>] (irq_exit+0x61/0x98)
  [<c001cfad>] (irq_exit+0x61/0x98) from [<c000d4bf>] (handle_IRQ+0x27/0x60)
  [<c000d4bf>] (handle_IRQ+0x27/0x60) from [<c000843b>] (armada_370_xp_handle_irq+0x33/0xc8)
  [<c000843b>] (armada_370_xp_handle_irq+0x33/0xc8) from [<c000fba9>] (__irq_usr+0x49/0x60)

Ben Hutchings attempted to propose a better fix consisting in using a
scheduled work for this, but while it fixed this panic, it caused other
random freezes and panics proving that the reset sequence in the driver
is unreliable and that additional fixes should be investigated.

When sending multiple streams over a link limited to 100 Mbps, Tx timeouts
happen from time to time, and the driver correctly recovers only when the
function is disabled.

Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Tested-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 49642c086109..d3459d81ca2f 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2207,16 +2207,6 @@ static void mvneta_stop_dev(struct mvneta_port *pp)
 	mvneta_rx_reset(pp);
 }
 
-/* tx timeout callback - display a message and stop/start the network device */
-static void mvneta_tx_timeout(struct net_device *dev)
-{
-	struct mvneta_port *pp = netdev_priv(dev);
-
-	netdev_info(dev, "tx timeout\n");
-	mvneta_stop_dev(pp);
-	mvneta_start_dev(pp);
-}
-
 /* Return positive if MTU is valid */
 static int mvneta_check_mtu_valid(struct net_device *dev, int mtu)
 {
@@ -2567,7 +2557,6 @@ static const struct net_device_ops mvneta_netdev_ops = {
 	.ndo_set_rx_mode     = mvneta_set_rx_mode,
 	.ndo_set_mac_address = mvneta_set_mac_addr,
 	.ndo_change_mtu      = mvneta_change_mtu,
-	.ndo_tx_timeout      = mvneta_tx_timeout,
 	.ndo_get_stats64     = mvneta_get_stats64,
 };
 

From 15ca23fcd05bd77099da3ced60ea64a21d7e9252 Mon Sep 17 00:00:00 2001
From: willy tarreau <w@1wt.eu>
Date: Thu, 16 Jan 2014 08:20:10 +0100
Subject: [PATCH 0136/1185] net: mvneta: add missing bit descriptions for
 interrupt masks and causes

commit 40ba35e74fa56866918d2f3bc0528b5b92725d5e upstream.

Marvell has not published the chip's datasheet yet, so it's very hard
to find the relevant bits to manipulate to change the IRQ behaviour.
Fortunately, these bits are described in the proprietary LSP patch set
which is publicly available here :

    http://www.plugcomputer.org/downloads/mirabox/

So let's put them back in the driver in order to reduce the burden of
current and future maintenance.

Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
Tested-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 44 +++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d3459d81ca2f..05906a19af81 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -99,16 +99,56 @@
 #define      MVNETA_CPU_RXQ_ACCESS_ALL_MASK      0x000000ff
 #define      MVNETA_CPU_TXQ_ACCESS_ALL_MASK      0x0000ff00
 #define MVNETA_RXQ_TIME_COAL_REG(q)              (0x2580 + ((q) << 2))
+
+/* Exception Interrupt Port/Queue Cause register */
+
 #define MVNETA_INTR_NEW_CAUSE                    0x25a0
-#define      MVNETA_RX_INTR_MASK(nr_rxqs)        (((1 << nr_rxqs) - 1) << 8)
 #define MVNETA_INTR_NEW_MASK                     0x25a4
+
+/* bits  0..7  = TXQ SENT, one bit per queue.
+ * bits  8..15 = RXQ OCCUP, one bit per queue.
+ * bits 16..23 = RXQ FREE, one bit per queue.
+ * bit  29 = OLD_REG_SUM, see old reg ?
+ * bit  30 = TX_ERR_SUM, one bit for 4 ports
+ * bit  31 = MISC_SUM,   one bit for 4 ports
+ */
+#define      MVNETA_TX_INTR_MASK(nr_txqs)        (((1 << nr_txqs) - 1) << 0)
+#define      MVNETA_TX_INTR_MASK_ALL             (0xff << 0)
+#define      MVNETA_RX_INTR_MASK(nr_rxqs)        (((1 << nr_rxqs) - 1) << 8)
+#define      MVNETA_RX_INTR_MASK_ALL             (0xff << 8)
+
 #define MVNETA_INTR_OLD_CAUSE                    0x25a8
 #define MVNETA_INTR_OLD_MASK                     0x25ac
+
+/* Data Path Port/Queue Cause Register */
 #define MVNETA_INTR_MISC_CAUSE                   0x25b0
 #define MVNETA_INTR_MISC_MASK                    0x25b4
+
+#define      MVNETA_CAUSE_PHY_STATUS_CHANGE      BIT(0)
+#define      MVNETA_CAUSE_LINK_CHANGE            BIT(1)
+#define      MVNETA_CAUSE_PTP                    BIT(4)
+
+#define      MVNETA_CAUSE_INTERNAL_ADDR_ERR      BIT(7)
+#define      MVNETA_CAUSE_RX_OVERRUN             BIT(8)
+#define      MVNETA_CAUSE_RX_CRC_ERROR           BIT(9)
+#define      MVNETA_CAUSE_RX_LARGE_PKT           BIT(10)
+#define      MVNETA_CAUSE_TX_UNDERUN             BIT(11)
+#define      MVNETA_CAUSE_PRBS_ERR               BIT(12)
+#define      MVNETA_CAUSE_PSC_SYNC_CHANGE        BIT(13)
+#define      MVNETA_CAUSE_SERDES_SYNC_ERR        BIT(14)
+
+#define      MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT    16
+#define      MVNETA_CAUSE_BMU_ALLOC_ERR_ALL_MASK   (0xF << MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT)
+#define      MVNETA_CAUSE_BMU_ALLOC_ERR_MASK(pool) (1 << (MVNETA_CAUSE_BMU_ALLOC_ERR_SHIFT + (pool)))
+
+#define      MVNETA_CAUSE_TXQ_ERROR_SHIFT        24
+#define      MVNETA_CAUSE_TXQ_ERROR_ALL_MASK     (0xFF << MVNETA_CAUSE_TXQ_ERROR_SHIFT)
+#define      MVNETA_CAUSE_TXQ_ERROR_MASK(q)      (1 << (MVNETA_CAUSE_TXQ_ERROR_SHIFT + (q)))
+
 #define MVNETA_INTR_ENABLE                       0x25b8
 #define      MVNETA_TXQ_INTR_ENABLE_ALL_MASK     0x0000ff00
-#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0xff000000
+#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0xff000000  // note: neta says it's 0x000000FF
+
 #define MVNETA_RXQ_CMD                           0x2680
 #define      MVNETA_RXQ_DISABLE_SHIFT            8
 #define      MVNETA_RXQ_ENABLE_MASK              0x000000ff

From a733b535a48be3b3bbf04de22f048d850dd5b0c6 Mon Sep 17 00:00:00 2001
From: willy tarreau <w@1wt.eu>
Date: Thu, 16 Jan 2014 08:20:11 +0100
Subject: [PATCH 0137/1185] net: mvneta: replace Tx timer with a real interrupt

commit 71f6d1b31fb1f278a345a30a2180515adc7d80ae upstream.

Right now the mvneta driver doesn't handle Tx IRQ, and relies on two
mechanisms to flush Tx descriptors : a flush at the end of mvneta_tx()
and a timer. If a burst of packets is emitted faster than the device
can send them, then the queue is stopped until next wake-up of the
timer 10ms later. This causes jerky output traffic with bursts and
pauses, making it difficult to reach line rate with very few streams.

A test on UDP traffic shows that it's not possible to go beyond 134
Mbps / 12 kpps of outgoing traffic with 1500-bytes IP packets. Routed
traffic tends to observe pauses as well if the traffic is bursty,
making it even burstier after the wake-up.

It seems that this feature was inherited from the original driver but
nothing there mentions any reason for not using the interrupt instead,
which the chip supports.

Thus, this patch enables Tx interrupts and removes the timer. It does
the two at once because it's not really possible to make the two
mechanisms coexist, so a split patch doesn't make sense.

First tests performed on a Mirabox (Armada 370) show that less CPU
seems to be used when sending traffic. One reason might be that we now
call the mvneta_tx_done_gbe() with a mask indicating which queues have
been done instead of looping over all of them.

The same UDP test above now happily reaches 987 Mbps / 87.7 kpps.
Single-stream TCP traffic can now more easily reach line rate. HTTP
transfers of 1 MB objects over a single connection went from 730 to
840 Mbps. It is even possible to go significantly higher (>900 Mbps)
by tweaking tcp_tso_win_divisor.

Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
Cc: Arnaud Ebalard <arno@natisbad.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 72 +++++----------------------
 1 file changed, 12 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 05906a19af81..f8821ce27802 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -214,9 +214,6 @@
 #define MVNETA_RX_COAL_PKTS		32
 #define MVNETA_RX_COAL_USEC		100
 
-/* Timer */
-#define MVNETA_TX_DONE_TIMER_PERIOD	10
-
 /* Napi polling weight */
 #define MVNETA_RX_POLL_WEIGHT		64
 
@@ -272,16 +269,11 @@ struct mvneta_port {
 	void __iomem *base;
 	struct mvneta_rx_queue *rxqs;
 	struct mvneta_tx_queue *txqs;
-	struct timer_list tx_done_timer;
 	struct net_device *dev;
 
 	u32 cause_rx_tx;
 	struct napi_struct napi;
 
-	/* Flags */
-	unsigned long flags;
-#define MVNETA_F_TX_DONE_TIMER_BIT  0
-
 	/* Napi weight */
 	int weight;
 
@@ -1112,17 +1104,6 @@ static void mvneta_tx_done_pkts_coal_set(struct mvneta_port *pp,
 	txq->done_pkts_coal = value;
 }
 
-/* Trigger tx done timer in MVNETA_TX_DONE_TIMER_PERIOD msecs */
-static void mvneta_add_tx_done_timer(struct mvneta_port *pp)
-{
-	if (test_and_set_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags) == 0) {
-		pp->tx_done_timer.expires = jiffies +
-			msecs_to_jiffies(MVNETA_TX_DONE_TIMER_PERIOD);
-		add_timer(&pp->tx_done_timer);
-	}
-}
-
-
 /* Handle rx descriptor fill by setting buf_cookie and buf_phys_addr */
 static void mvneta_rx_desc_fill(struct mvneta_rx_desc *rx_desc,
 				u32 phys_addr, u32 cookie)
@@ -1614,15 +1595,6 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 		dev_kfree_skb_any(skb);
 	}
 
-	if (txq->count >= MVNETA_TXDONE_COAL_PKTS)
-		mvneta_txq_done(pp, txq);
-
-	/* If after calling mvneta_txq_done, count equals
-	 * frags, we need to set the timer
-	 */
-	if (txq->count == frags && frags > 0)
-		mvneta_add_tx_done_timer(pp);
-
 	return NETDEV_TX_OK;
 }
 
@@ -1898,14 +1870,22 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 
 	/* Read cause register */
 	cause_rx_tx = mvreg_read(pp, MVNETA_INTR_NEW_CAUSE) &
-		MVNETA_RX_INTR_MASK(rxq_number);
+		(MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number));
+
+	/* Release Tx descriptors */
+	if (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL) {
+		int tx_todo = 0;
+
+		mvneta_tx_done_gbe(pp, (cause_rx_tx & MVNETA_TX_INTR_MASK_ALL), &tx_todo);
+		cause_rx_tx &= ~MVNETA_TX_INTR_MASK_ALL;
+	}
 
 	/* For the case where the last mvneta_poll did not process all
 	 * RX packets
 	 */
 	cause_rx_tx |= pp->cause_rx_tx;
 	if (rxq_number > 1) {
-		while ((cause_rx_tx != 0) && (budget > 0)) {
+		while ((cause_rx_tx & MVNETA_RX_INTR_MASK_ALL) && (budget > 0)) {
 			int count;
 			struct mvneta_rx_queue *rxq;
 			/* get rx queue number from cause_rx_tx */
@@ -1937,7 +1917,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 		napi_complete(napi);
 		local_irq_save(flags);
 		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-			    MVNETA_RX_INTR_MASK(rxq_number));
+			    MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number));
 		local_irq_restore(flags);
 	}
 
@@ -1945,26 +1925,6 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 	return rx_done;
 }
 
-/* tx done timer callback */
-static void mvneta_tx_done_timer_callback(unsigned long data)
-{
-	struct net_device *dev = (struct net_device *)data;
-	struct mvneta_port *pp = netdev_priv(dev);
-	int tx_done = 0, tx_todo = 0;
-
-	if (!netif_running(dev))
-		return ;
-
-	clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
-
-	tx_done = mvneta_tx_done_gbe(pp,
-				     (((1 << txq_number) - 1) &
-				      MVNETA_CAUSE_TXQ_SENT_DESC_ALL_MASK),
-				     &tx_todo);
-	if (tx_todo > 0)
-		mvneta_add_tx_done_timer(pp);
-}
-
 /* Handle rxq fill: allocates rxq skbs; called when initializing a port */
 static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 			   int num)
@@ -2214,7 +2174,7 @@ static void mvneta_start_dev(struct mvneta_port *pp)
 
 	/* Unmask interrupts */
 	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-		    MVNETA_RX_INTR_MASK(rxq_number));
+		    MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number));
 
 	phy_start(pp->phy_dev);
 	netif_tx_start_all_queues(pp->dev);
@@ -2475,8 +2435,6 @@ static int mvneta_stop(struct net_device *dev)
 	free_irq(dev->irq, pp);
 	mvneta_cleanup_rxqs(pp);
 	mvneta_cleanup_txqs(pp);
-	del_timer(&pp->tx_done_timer);
-	clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
 
 	return 0;
 }
@@ -2777,10 +2735,6 @@ static int mvneta_probe(struct platform_device *pdev)
 
 	pp = netdev_priv(dev);
 
-	pp->tx_done_timer.function = mvneta_tx_done_timer_callback;
-	init_timer(&pp->tx_done_timer);
-	clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
-
 	pp->weight = MVNETA_RX_POLL_WEIGHT;
 	pp->phy_node = phy_node;
 	pp->phy_interface = phy_mode;
@@ -2806,8 +2760,6 @@ static int mvneta_probe(struct platform_device *pdev)
 		goto err_clk;
 	}
 
-	pp->tx_done_timer.data = (unsigned long)dev;
-
 	pp->tx_ring_size = MVNETA_MAX_TXD;
 	pp->rx_ring_size = MVNETA_MAX_RXD;
 

From 6718de2340c5865323f38644627b6b382e25fe26 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 14 Jul 2014 17:02:31 -0700
Subject: [PATCH 0138/1185] net/l2tp: don't fall back on UDP [get|set]sockopt

commit 3cf521f7dc87c031617fd47e4b7aa2593c2f3daf upstream.

The l2tp [get|set]sockopt() code has fallen back to the UDP functions
for socket option levels != SOL_PPPOL2TP since day one, but that has
never actually worked, since the l2tp socket isn't an inet socket.

As David Miller points out:

  "If we wanted this to work, it'd have to look up the tunnel and then
   use tunnel->sk, but I wonder how useful that would be"

Since this can never have worked so nobody could possibly have depended
on that functionality, just remove the broken code and return -EINVAL.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Acked-by: James Chapman <jchapman@katalix.com>
Acked-by: David Miller <davem@davemloft.net>
Cc: Phil Turnbull <phil.turnbull@oracle.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/l2tp/l2tp_ppp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 9a0e5874e73e..164fa9dcd97d 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1365,7 +1365,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
 	int err;
 
 	if (level != SOL_PPPOL2TP)
-		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+		return -EINVAL;
 
 	if (optlen < sizeof(int))
 		return -EINVAL;
@@ -1491,7 +1491,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
 	struct pppol2tp_session *ps;
 
 	if (level != SOL_PPPOL2TP)
-		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+		return -EINVAL;
 
 	if (get_user(len, optlen))
 		return -EFAULT;

From 81513d14716eb555b443e7e2e2b862f094f8b731 Mon Sep 17 00:00:00 2001
From: Minfei Huang <huangminfei@ucloud.cn>
Date: Wed, 4 Jun 2014 16:11:53 -0700
Subject: [PATCH 0139/1185] lib/btree.c: fix leak of whole btree nodes

commit c75b53af2f0043aff500af0a6f878497bef41bca upstream.

I use btree from 3.14-rc2 in my own module.  When the btree module is
removed, a warning arises:

 kmem_cache_destroy btree_node: Slab cache still has objects
 CPU: 13 PID: 9150 Comm: rmmod Tainted: GF          O 3.14.0-rc2 #1
 Hardware name: Inspur NF5270M3/NF5270M3, BIOS CHEETAH_2.1.3 09/10/2013
 Call Trace:
   dump_stack+0x49/0x5d
   kmem_cache_destroy+0xcf/0xe0
   btree_module_exit+0x10/0x12 [btree]
   SyS_delete_module+0x198/0x1f0
   system_call_fastpath+0x16/0x1b

The cause is that it doesn't release the last btree node, when height = 1
and fill = 1.

[akpm@linux-foundation.org: remove unneeded test of NULL]
Signed-off-by: Minfei Huang <huangminfei@ucloud.cn>
Cc: Joern Engel <joern@logfs.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/btree.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/btree.c b/lib/btree.c
index f9a484676cb6..4264871ea1a0 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -198,6 +198,7 @@ EXPORT_SYMBOL_GPL(btree_init);
 
 void btree_destroy(struct btree_head *head)
 {
+	mempool_free(head->node, head->mempool);
 	mempool_destroy(head->mempool);
 	head->mempool = NULL;
 }

From d12cb4b5cd29230b0def8c8fec29f59c33592a46 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Wed, 9 Jul 2014 13:18:18 -0400
Subject: [PATCH 0140/1185] x86/espfix/xen: Fix allocation of pages for
 paravirt page tables

commit 8762e5092828c4dc0f49da5a47a644c670df77f3 upstream.

init_espfix_ap() is currently off by one level when informing hypervisor
that allocated pages will be used for ministacks' page tables.

The most immediate effect of this on a PV guest is that if
'stack_page = __get_free_page()' returns a non-zeroed-out page the hypervisor
will refuse to use it for a page table (which it shouldn't be anyway). This will
result in warnings by both Xen and Linux.

More importantly, a subsequent write to that page (again, by a PV guest) is
likely to result in fatal page fault.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: http://lkml.kernel.org/r/1404926298-5565-1-git-send-email-boris.ostrovsky@oracle.com
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/espfix_64.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 6afbb16e9b79..94d857fb1033 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -175,7 +175,7 @@ void init_espfix_ap(void)
 	if (!pud_present(pud)) {
 		pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
 		pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
-		paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
+		paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PUD_CLONES; n++)
 			set_pud(&pud_p[n], pud);
 	}
@@ -185,7 +185,7 @@ void init_espfix_ap(void)
 	if (!pmd_present(pmd)) {
 		pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
 		pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
-		paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
+		paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PMD_CLONES; n++)
 			set_pmd(&pmd_p[n], pmd);
 	}
@@ -193,7 +193,6 @@ void init_espfix_ap(void)
 	pte_p = pte_offset_kernel(&pmd, addr);
 	stack_page = (void *)__get_free_page(GFP_KERNEL);
 	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
-	paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT);
 	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
 

From 4d36ba13d3adbeca71312803a98cf3793c8a0703 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 7 Aug 2014 14:42:40 -0700
Subject: [PATCH 0141/1185] Linux 3.10.52

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f9f6ee59c61a..b94f00938acc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 51
+SUBLEVEL = 52
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From bbefc61c48f1ecb277112c56a6f1be37c7a4a7b9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Mon, 11 Aug 2014 13:07:09 +0100
Subject: [PATCH 0142/1185] configs: Always enable DEBUG_INFO for perf tests

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 linaro/configs/linaro-base.conf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/linaro/configs/linaro-base.conf b/linaro/configs/linaro-base.conf
index eb9fe266ca04..0620d5ec3e1c 100644
--- a/linaro/configs/linaro-base.conf
+++ b/linaro/configs/linaro-base.conf
@@ -114,3 +114,4 @@ CONFIG_LSM_MMAP_MIN_ADDR=4096
 CONFIG_SECURITY_SELINUX=y
 CONFIG_EXT4_FS_SECURITY=y
 CONFIG_BLK_DEV_LOOP=y
+CONFIG_DEBUG_INFO=y

From 0beec5004ce4325838b327f45ddf716706890e75 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Fri, 24 Jan 2014 15:19:49 -0800
Subject: [PATCH 0143/1185] staging: android: ashmem: Avoid deadlock with
 mmap/shrink

Both ashmem_mmap and ashmem_shrink take the ashmem_lock. It may
be possible for ashmem_mmap to invoke ashmem_shrink:

-000|mutex_lock(lock = 0x0)
-001|ashmem_shrink(?, sc = 0x0) <--- try to take ashmem_mutex again
-002|shrink_slab(shrink = 0xDA5F1CC0, nr_pages_scanned = 0, lru_pages
-002|=
-002|124)
-003|try_to_free_pages(zonelist = 0x0, ?, ?, ?)
-004|__alloc_pages_nodemask(gfp_mask = 21200, order = 1, zonelist =
-004|0xC11D0940,
-005|new_slab(s = 0xE4841E80, ?, node = -1)
-006|__slab_alloc.isra.43.constprop.50(s = 0xE4841E80, gfpflags =
-006|2148925462, ad
-007|kmem_cache_alloc(s = 0xE4841E80, gfpflags = 208)
-008|shmem_alloc_inode(?)
-009|alloc_inode(sb = 0xE480E800)
-010|new_inode_pseudo(?)
-011|new_inode(?)
-012|shmem_get_inode(sb = 0xE480E800, dir = 0x0, ?, dev = 0, flags =
-012|187)
-013|shmem_file_setup(?, ?, flags = 187)
-014|ashmem_mmap(?, vma = 0xC5D64210) <---- Acquire ashmem_mutex
-015|mmap_region(file = 0xDF8E2C00, addr = 1772974080, len = 233472,
-015|flags = 57,
-016|sys_mmap_pgoff(addr = 0, len = 230400, prot = 3, flags = 1, fd =
-016|157, pgoff
-017|ret_fast_syscall(asm)
-->|exception
-018|NUR:0x40097508(asm)
---|end of frame

Avoid this deadlock by using mutex_trylock in ashmem_shrink; if the mutex
is already held, do not attempt to shrink.

Change-Id: I222bbf55856d5849da813b730de0636c80966c8e
Reported-by: Matt Wagantall <mattw@codeaurora.org>
Reported-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org>
Reported-by: Osvaldo Banuelos <osvaldob@codeaurora.org>
Reported-by: Subbaraman Narayanamurthy <subbaram@codeaurora.org>
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---
 drivers/staging/android/ashmem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 3511b0840362..ccaef8b48eba 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -363,7 +363,9 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc)
 	if (!sc->nr_to_scan)
 		return lru_count;
 
-	mutex_lock(&ashmem_mutex);
+	if (!mutex_trylock(&ashmem_mutex))
+		return -1;
+
 	list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) {
 		loff_t start = range->pgstart * PAGE_SIZE;
 		loff_t end = (range->pgend + 1) * PAGE_SIZE;

From f83262408293795e5186e9d1bf66d525b24fdb12 Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Tue, 12 Aug 2014 14:50:54 +0100
Subject: [PATCH 0144/1185] HMP: Do not fork-boost tasks coming from PIDs <= 2

System services are generally started by init, whilst kernel threads
are started by kthreadd. We do not want to give those tasks a head
start, as this costs power for very little benefit. We do however
wish to do that for tasks which the user launches.

Further, some tasks allocate per-cpu timers directly after launch
which can lead to those tasks being always scheduled on a big CPU
when there is no computational need to do so. Not promoting services
to big CPUs on launch will prevent that unless a service allocates
their per-cpu resources after a period of intense computation, which
is not a common pattern.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 include/linux/sched.h | 8 ++++++++
 kernel/sched/core.c   | 6 +++---
 kernel/sched/fair.c   | 2 +-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0e2a546cdade..b36dd2de437d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -946,6 +946,14 @@ struct sched_avg {
 	u32 usage_avg_sum;
 };
 
+#ifdef CONFIG_SCHED_HMP
+/*
+ * We want to avoid boosting any processes forked from init (PID 1)
+ * and kthreadd (assumed to be PID 2).
+ */
+#define hmp_task_should_forkboost(task) ((task->parent && task->parent->pid > 2))
+#endif
+
 #ifdef CONFIG_SCHEDSTATS
 struct sched_statistics {
 	u64			wait_start;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5f242330ef85..65aaa1c78ca1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1629,9 +1629,9 @@ static void __sched_fork(struct task_struct *p)
 #ifdef CONFIG_SCHED_HMP
 	/* keep LOAD_AVG_MAX in sync with fair.c if load avg series is changed */
 #define LOAD_AVG_MAX 47742
-	if (p->mm) {
-		p->se.avg.hmp_last_up_migration = 0;
-		p->se.avg.hmp_last_down_migration = 0;
+	p->se.avg.hmp_last_up_migration = 0;
+	p->se.avg.hmp_last_down_migration = 0;
+	if (hmp_task_should_forkboost(p)) {
 		p->se.avg.load_avg_ratio = 1023;
 		p->se.avg.load_avg_contrib =
 				(1023 * scale_load_down(p->se.load.weight));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 161da1ab3995..74a5adfefeb7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4358,7 +4358,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 
 #ifdef CONFIG_SCHED_HMP
 	/* always put non-kernel forking tasks on a big domain */
-	if (p->mm && (sd_flag & SD_BALANCE_FORK)) {
+	if (unlikely(sd_flag & SD_BALANCE_FORK) && hmp_task_should_forkboost(p)) {
 		new_cpu = hmp_select_faster_cpu(p, prev_cpu);
 		if (new_cpu != NR_CPUS) {
 			hmp_next_up_delay(&p->se, new_cpu);

From e482d95c1d1888f34cc3f7e6778806cfda6174ff Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Tue, 12 Aug 2014 14:50:55 +0100
Subject: [PATCH 0145/1185] hmp: Restrict ILB events if no CPU has > 1 task

Frequently in HMP, the big CPUs are only active with one task per
CPU and there may be idle CPUs in the big cluster. This patch avoids
triggering an idle balance in situations where none of the active
CPUs in the current HMP domain have > 1 tasks running.

When packing is enabled, only enforce this behaviour when we are
not in the smallest domain - there we idle balance whenever a CPU
is over the up_threshold regardless of tasks in case one needs to
be moved.

Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 kernel/sched/fair.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 74a5adfefeb7..fd57f0be5b4e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6510,16 +6510,16 @@ static int nohz_test_cpu(int cpu)
  * Decide if the tasks on the busy CPUs in the
  * littlest domain would benefit from an idle balance
  */
-static int hmp_packing_ilb_needed(int cpu)
+static int hmp_packing_ilb_needed(int cpu, int ilb_needed)
 {
 	struct hmp_domain *hmp;
-	/* always allow ilb on non-slowest domain */
+	/* allow previous decision on non-slowest domain */
 	if (!hmp_cpu_is_slowest(cpu))
-		return 1;
+		return ilb_needed;
 
 	/* if disabled, use normal ILB behaviour */
 	if (!hmp_packing_enabled)
-		return 1;
+		return ilb_needed;
 
 	hmp = hmp_cpu_domain(cpu);
 	for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) {
@@ -6531,19 +6531,34 @@ static int hmp_packing_ilb_needed(int cpu)
 }
 #endif
 
+DEFINE_PER_CPU(cpumask_var_t, ilb_tmpmask);
+
 static inline int find_new_ilb(int call_cpu)
 {
 	int ilb = cpumask_first(nohz.idle_cpus_mask);
 #ifdef CONFIG_SCHED_HMP
-	int ilb_needed = 1;
+	int ilb_needed = 0;
+	int cpu;
+	struct cpumask* tmp = per_cpu(ilb_tmpmask, smp_processor_id());
 
 	/* restrict nohz balancing to occur in the same hmp domain */
 	ilb = cpumask_first_and(nohz.idle_cpus_mask,
 			&((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus);
 
+	/* check to see if it's necessary within this domain */
+	cpumask_andnot(tmp,
+			&((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus,
+			nohz.idle_cpus_mask);
+	for_each_cpu(cpu, tmp) {
+		if (cpu_rq(cpu)->nr_running > 1) {
+			ilb_needed = 1;
+			break;
+		}
+	}
+
 #ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
 	if (ilb < nr_cpu_ids)
-		ilb_needed = hmp_packing_ilb_needed(ilb);
+		ilb_needed = hmp_packing_ilb_needed(ilb, ilb_needed);
 #endif
 
 	if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb))

From 28cb7116eebfa0e4b36b154df0f1c6a251f3b97b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 22 May 2013 18:28:38 +0200
Subject: [PATCH 0146/1185] firmware: Avoid deadlock of usermodehelper lock at
 shutdown

When a system goes to reboot/shutdown, it tries to disable the
usermode helper via usermodehelper_disable().  This might be blocked
when a driver tries to load a firmware beforehand and it's stuck by
some reason.  For example, dell_rbu driver loads the firmware in
non-hotplug mode and waits for user-space clearing the loading sysfs
flag.  If user-space doesn't clear the flag, it waits forever, thus
blocks the reboot, too.

As a workaround, in this patch, the firmware class driver registers a
reboot notifier so that it can abort all pending f/w bufs before
issuing usermodehelper_disable().

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Acked-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
	drivers/base/firmware_class.c

Change-Id: I7ff6c198cd34090e55845b9d4035b1e5dc86226b
---
 drivers/base/firmware_class.c | 43 ++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 01e21037d8fe..55d682e6ecea 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -27,6 +27,7 @@
 #include <linux/pm.h>
 #include <linux/suspend.h>
 #include <linux/syscore_ops.h>
+#include <linux/reboot.h>
 
 #include <generated/utsrelease.h>
 
@@ -130,6 +131,7 @@ struct firmware_buf {
 	struct page **pages;
 	int nr_pages;
 	int page_array_size;
+	struct list_head pending_list;
 #endif
 	char fw_id[];
 };
@@ -171,6 +173,9 @@ static struct firmware_buf *__allocate_fw_buf(const char *fw_name,
 	strcpy(buf->fw_id, fw_name);
 	buf->fwc = fwc;
 	init_completion(&buf->completion);
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	INIT_LIST_HEAD(&buf->pending_list);
+#endif
 
 	pr_debug("%s: fw-%s buf=%p\n", __func__, fw_name, buf);
 
@@ -446,10 +451,8 @@ static struct firmware_priv *to_firmware_priv(struct device *dev)
 	return container_of(dev, struct firmware_priv, dev);
 }
 
-static void fw_load_abort(struct firmware_priv *fw_priv)
+static void __fw_load_abort(struct firmware_buf *buf)
 {
-	struct firmware_buf *buf = fw_priv->buf;
-
 	/*
 	 * There is a small window in which user can write to 'loading'
 	 * between loading done and disappearance of 'loading'
@@ -457,8 +460,16 @@ static void fw_load_abort(struct firmware_priv *fw_priv)
 	if (test_bit(FW_STATUS_DONE, &buf->status))
 		return;
 
+	list_del_init(&buf->pending_list);
 	set_bit(FW_STATUS_ABORT, &buf->status);
 	complete_all(&buf->completion);
+}
+
+static void fw_load_abort(struct firmware_priv *fw_priv)
+{
+	struct firmware_buf *buf = fw_priv->buf;
+
+	__fw_load_abort(buf);
 
 	/* avoid user action after loading abort */
 	fw_priv->buf = NULL;
@@ -467,6 +478,25 @@ static void fw_load_abort(struct firmware_priv *fw_priv)
 #define is_fw_load_aborted(buf)	\
 	test_bit(FW_STATUS_ABORT, &(buf)->status)
 
+static LIST_HEAD(pending_fw_head);
+
+/* reboot notifier for avoid deadlock with usermode_lock */
+static int fw_shutdown_notify(struct notifier_block *unused1,
+			      unsigned long unused2, void *unused3)
+{
+	mutex_lock(&fw_lock);
+	while (!list_empty(&pending_fw_head))
+		__fw_load_abort(list_first_entry(&pending_fw_head,
+					       struct firmware_buf,
+					       pending_list));
+	mutex_unlock(&fw_lock);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block fw_shutdown_nb = {
+	.notifier_call = fw_shutdown_notify,
+};
+
 static ssize_t firmware_timeout_show(struct class *class,
 				     struct class_attribute *attr,
 				     char *buf)
@@ -619,6 +649,7 @@ static ssize_t firmware_loading_store(struct device *dev,
 			 * is completed.
 			 * */
 			fw_map_pages_buf(fw_buf);
+			list_del_init(&fw_buf->pending_list);
 			complete_all(&fw_buf->completion);
 			break;
 		}
@@ -868,6 +899,10 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
 		kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD);
 	}
 
+	mutex_lock(&fw_lock);
+	list_add(&buf->pending_list, &pending_fw_head);
+	mutex_unlock(&fw_lock);
+
 	wait_for_completion(&buf->completion);
 
 	cancel_delayed_work_sync(&fw_priv->timeout_work);
@@ -1526,6 +1561,7 @@ static int __init firmware_class_init(void)
 {
 	fw_cache_init();
 #ifdef CONFIG_FW_LOADER_USER_HELPER
+	register_reboot_notifier(&fw_shutdown_nb);
 	return class_register(&firmware_class);
 #else
 	return 0;
@@ -1539,6 +1575,7 @@ static void __exit firmware_class_exit(void)
 	unregister_pm_notifier(&fw_cache.pm_notify);
 #endif
 #ifdef CONFIG_FW_LOADER_USER_HELPER
+	unregister_reboot_notifier(&fw_shutdown_nb);
 	class_unregister(&firmware_class);
 #endif
 }

From f5980d099295d4113635ded88425834a38d17125 Mon Sep 17 00:00:00 2001
From: Dmitry Kravkov <Dmitry.Kravkov@qlogic.com>
Date: Thu, 24 Jul 2014 18:54:47 +0300
Subject: [PATCH 0147/1185] bnx2x: fix crash during TSO tunneling

[ Upstream commit fe26566d8a05151ba1dce75081f6270f73ec4ae1 ]

When TSO packet is transmitted additional BD w/o mapping is used
to describe the packed. The BD needs special handling in tx
completion.

kernel: Call Trace:
kernel: <IRQ>  [<ffffffff815e19ba>] dump_stack+0x19/0x1b
kernel: [<ffffffff8105dee1>] warn_slowpath_common+0x61/0x80
kernel: [<ffffffff8105df5c>] warn_slowpath_fmt+0x5c/0x80
kernel: [<ffffffff814a8c0d>] ? find_iova+0x4d/0x90
kernel: [<ffffffff814ab0e2>] intel_unmap_page.part.36+0x142/0x160
kernel: [<ffffffff814ad0e6>] intel_unmap_page+0x26/0x30
kernel: [<ffffffffa01f55d7>] bnx2x_free_tx_pkt+0x157/0x2b0 [bnx2x]
kernel: [<ffffffffa01f8dac>] bnx2x_tx_int+0xac/0x220 [bnx2x]
kernel: [<ffffffff8101a0d9>] ? read_tsc+0x9/0x20
kernel: [<ffffffffa01f8fdb>] bnx2x_poll+0xbb/0x3c0 [bnx2x]
kernel: [<ffffffff814d041a>] net_rx_action+0x15a/0x250
kernel: [<ffffffff81067047>] __do_softirq+0xf7/0x290
kernel: [<ffffffff815f3a5c>] call_softirq+0x1c/0x30
kernel: [<ffffffff81014d25>] do_softirq+0x55/0x90
kernel: [<ffffffff810673e5>] irq_exit+0x115/0x120
kernel: [<ffffffff815f4358>] do_IRQ+0x58/0xf0
kernel: [<ffffffff815e94ad>] common_interrupt+0x6d/0x6d
kernel: <EOI>  [<ffffffff810bbff7>] ? clockevents_notify+0x127/0x140
kernel: [<ffffffff814834df>] ? cpuidle_enter_state+0x4f/0xc0
kernel: [<ffffffff81483615>] cpuidle_idle_call+0xc5/0x200
kernel: [<ffffffff8101bc7e>] arch_cpu_idle+0xe/0x30
kernel: [<ffffffff810b4725>] cpu_startup_entry+0xf5/0x290
kernel: [<ffffffff815cfee1>] start_secondary+0x265/0x27b
kernel: ---[ end trace 11aa7726f18d7e80 ]---

Fixes: a848ade408b ("bnx2x: add CSUM and TSO support for encapsulation protocols")
Reported-by: Yulong Pei <ypei@redhat.com>
Cc: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: Dmitry Kravkov <Dmitry.Kravkov@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h     | 1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 3dba2a70a00e..ec86177be1df 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -312,6 +312,7 @@ struct sw_tx_bd {
 	u8		flags;
 /* Set on the first BD descriptor when there is a split BD */
 #define BNX2X_TSO_SPLIT_BD		(1<<0)
+#define BNX2X_HAS_SECOND_PBD		(1<<1)
 };
 
 struct sw_rx_page {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index b04f7f128f49..372a7557e1fa 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -180,6 +180,12 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
 	--nbd;
 	bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
 
+	if (tx_buf->flags & BNX2X_HAS_SECOND_PBD) {
+		/* Skip second parse bd... */
+		--nbd;
+		bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
+	}
+
 	/* TSO headers+data bds share a common mapping. See bnx2x_tx_split() */
 	if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) {
 		tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd;
@@ -3755,6 +3761,9 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			/* set encapsulation flag in start BD */
 			SET_FLAG(tx_start_bd->general_data,
 				 ETH_TX_START_BD_TUNNEL_EXIST, 1);
+
+			tx_buf->flags |= BNX2X_HAS_SECOND_PBD;
+
 			nbd++;
 		} else if (xmit_type & XMIT_CSUM) {
 			/* Set PBD in checksum offload case w/o encapsulation */

From ff1f69a89a613223c57c13190a6c9be928ac4b9d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 2 Jun 2014 05:26:03 -0700
Subject: [PATCH 0148/1185] inetpeer: get rid of ip_id_count

[ Upstream commit 73f156a6e8c1074ac6327e0abd1169e95eb66463 ]

Ideally, we would need to generate IP ID using a per destination IP
generator.

linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.

1) each inet_peer struct consumes 192 bytes

2) inetpeer cache uses a binary tree of inet_peer structs,
   with a nominal size of ~66000 elements under load.

3) lookups in this tree are hitting a lot of cache lines, as tree depth
   is about 20.

4) If server deals with many tcp flows, we have a high probability of
   not finding the inet_peer, allocating a fresh one, inserting it in
   the tree with same initial ip_id_count, (cf secure_ip_id())

5) We garbage collect inet_peer aggressively.

IP ID generation do not have to be 'perfect'

Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.

We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.

ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)

secure_ip_id() and secure_ipv6_id() no longer are needed.

Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ppp/pptp.c          |  2 +-
 drivers/net/vxlan.c             |  2 +-
 include/net/inetpeer.h          | 16 ++---------
 include/net/ip.h                | 40 +++++++++++++++------------
 include/net/ipv6.h              | 11 +++++---
 include/net/secure_seq.h        |  2 --
 net/core/secure_seq.c           | 25 -----------------
 net/ipv4/igmp.c                 |  4 +--
 net/ipv4/inetpeer.c             | 18 ------------
 net/ipv4/ip_output.c            |  7 ++---
 net/ipv4/ip_tunnel.c            |  2 +-
 net/ipv4/ipmr.c                 |  2 +-
 net/ipv4/raw.c                  |  2 +-
 net/ipv4/route.c                | 49 ++++++++++++++-------------------
 net/ipv4/xfrm4_mode_tunnel.c    |  2 +-
 net/ipv6/ip6_output.c           | 15 ++++++++++
 net/ipv6/output_core.c          | 23 ----------------
 net/ipv6/sit.c                  |  2 +-
 net/netfilter/ipvs/ip_vs_xmit.c |  2 +-
 19 files changed, 81 insertions(+), 145 deletions(-)

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 7f10588fe668..8161c3f066a3 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	nf_reset(skb);
 
 	skb->ip_summed = CHECKSUM_NONE;
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 	ip_send_check(iph);
 
 	ip_local_out(skb);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index fcbd4eee52cc..a1dc186c6f66 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1093,7 +1093,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	iph->daddr	= dst;
 	iph->saddr	= fl4.saddr;
 	iph->ttl	= ttl ? : ip4_dst_hoplimit(&rt->dst);
-	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+	__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
 
 	nf_reset(skb);
 
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 6ca347a0717e..bb06fd26a7bd 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -41,14 +41,13 @@ struct inet_peer {
 		struct rcu_head     gc_rcu;
 	};
 	/*
-	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
-	 * are not available: rid, ip_id_count
+	 * Once inet_peer is queued for deletion (refcnt == -1), following field
+	 * is not available: rid
 	 * We can share memory with rcu_head to help keep inet_peer small.
 	 */
 	union {
 		struct {
 			atomic_t			rid;		/* Frag reception counter */
-			atomic_t			ip_id_count;	/* IP ID for the next packet */
 		};
 		struct rcu_head         rcu;
 		struct inet_peer	*gc_next;
@@ -166,7 +165,7 @@ extern void inetpeer_invalidate_tree(struct inet_peer_base *);
 extern void inetpeer_invalidate_family(int family);
 
 /*
- * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
+ * temporary check to make sure we dont access rid, tcp_ts,
  * tcp_ts_stamp if no refcount is taken on inet_peer
  */
 static inline void inet_peer_refcheck(const struct inet_peer *p)
@@ -174,13 +173,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p)
 	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
 }
 
-
-/* can be called with or without local BH being disabled */
-static inline int inet_getid(struct inet_peer *p, int more)
-{
-	more++;
-	inet_peer_refcheck(p);
-	return atomic_add_return(more, &p->ip_id_count) - more;
-}
-
 #endif /* _NET_INETPEER_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 788f1d8a796f..dd72c8f93797 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -252,9 +252,19 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
 		 !(dst_metric_locked(dst, RTAX_MTU)));
 }
 
-extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
+#define IP_IDENTS_SZ 2048u
+extern atomic_t *ip_idents;
 
-static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)
+static inline u32 ip_idents_reserve(u32 hash, int segs)
+{
+	atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;
+
+	return atomic_add_return(segs, id_ptr) - segs;
+}
+
+void __ip_select_ident(struct iphdr *iph, int segs);
+
+static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
 {
 	struct iphdr *iph = ip_hdr(skb);
 
@@ -264,24 +274,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s
 		 * does not change, they drop every other packet in
 		 * a TCP stream using header compression.
 		 */
-		iph->id = (sk && inet_sk(sk)->inet_daddr) ?
-					htons(inet_sk(sk)->inet_id++) : 0;
-	} else
-		__ip_select_ident(iph, dst, 0);
-}
-
-static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
-{
-	struct iphdr *iph = ip_hdr(skb);
-
-	if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
 		if (sk && inet_sk(sk)->inet_daddr) {
 			iph->id = htons(inet_sk(sk)->inet_id);
-			inet_sk(sk)->inet_id += 1 + more;
-		} else
+			inet_sk(sk)->inet_id += segs;
+		} else {
 			iph->id = 0;
-	} else
-		__ip_select_ident(iph, dst, more);
+		}
+	} else {
+		__ip_select_ident(iph, segs);
+	}
+}
+
+static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
+{
+	ip_select_ident_segs(skb, sk, 1);
 }
 
 /*
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9e093fc33dab..087370ff05f1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -530,14 +530,19 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a)
 }
 
 /* more secured version of ipv6_addr_hash() */
-static inline u32 ipv6_addr_jhash(const struct in6_addr *a)
+static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval)
 {
 	u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1];
 
 	return jhash_3words(v,
 			    (__force u32)a->s6_addr32[2],
 			    (__force u32)a->s6_addr32[3],
-			    ipv6_hash_secret);
+			    initval);
+}
+
+static inline u32 ipv6_addr_jhash(const struct in6_addr *a)
+{
+	return __ipv6_addr_jhash(a, ipv6_hash_secret);
 }
 
 static inline bool ipv6_addr_loopback(const struct in6_addr *a)
@@ -649,8 +654,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
 	return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
 }
 
-extern void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
-
 /*
  *	Header manipulation
  */
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index c2e542b27a5a..b1c3d1c63c4e 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -3,8 +3,6 @@
 
 #include <linux/types.h>
 
-extern __u32 secure_ip_id(__be32 daddr);
-extern __u32 secure_ipv6_id(const __be32 daddr[4]);
 extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 				      __be16 dport);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 8d9d05edd2eb..d0afc322b961 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -95,31 +95,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #endif
 
 #ifdef CONFIG_INET
-__u32 secure_ip_id(__be32 daddr)
-{
-	u32 hash[MD5_DIGEST_WORDS];
-
-	net_secret_init();
-	hash[0] = (__force __u32) daddr;
-	hash[1] = net_secret[13];
-	hash[2] = net_secret[14];
-	hash[3] = net_secret[15];
-
-	md5_transform(hash, net_secret);
-
-	return hash[0];
-}
-
-__u32 secure_ipv6_id(const __be32 daddr[4])
-{
-	__u32 hash[4];
-
-	net_secret_init();
-	memcpy(hash, daddr, 16);
-	md5_transform(hash, net_secret);
-
-	return hash[0];
-}
 
 __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 				 __be16 sport, __be16 dport)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 38d63ca8a6b5..155adf8729c2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -343,7 +343,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	pip->saddr    = fl4.saddr;
 	pip->protocol = IPPROTO_IGMP;
 	pip->tot_len  = 0;	/* filled in later */
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 	((u8 *)&pip[1])[0] = IPOPT_RA;
 	((u8 *)&pip[1])[1] = 4;
 	((u8 *)&pip[1])[2] = 0;
@@ -687,7 +687,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	iph->daddr    = dst;
 	iph->saddr    = fl4.saddr;
 	iph->protocol = IPPROTO_IGMP;
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 	((u8 *)&iph[1])[0] = IPOPT_RA;
 	((u8 *)&iph[1])[1] = 4;
 	((u8 *)&iph[1])[2] = 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 33d5537881ed..67140efc15fd 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -26,20 +26,7 @@
  *  Theory of operations.
  *  We keep one entry for each peer IP address.  The nodes contains long-living
  *  information about the peer which doesn't depend on routes.
- *  At this moment this information consists only of ID field for the next
- *  outgoing IP packet.  This field is incremented with each packet as encoded
- *  in inet_getid() function (include/net/inetpeer.h).
- *  At the moment of writing this notes identifier of IP packets is generated
- *  to be unpredictable using this code only for packets subjected
- *  (actually or potentially) to defragmentation.  I.e. DF packets less than
- *  PMTU in size when local fragmentation is disabled use a constant ID and do
- *  not use this code (see ip_select_ident() in include/net/ip.h).
  *
- *  Route cache entries hold references to our nodes.
- *  New cache entries get references via lookup by destination IP address in
- *  the avl tree.  The reference is grabbed only when it's needed i.e. only
- *  when we try to output IP packet which needs an unpredictable ID (see
- *  __ip_select_ident() in net/ipv4/route.c).
  *  Nodes are removed only when reference counter goes to 0.
  *  When it's happened the node may be removed when a sufficient amount of
  *  time has been passed since its last use.  The less-recently-used entry can
@@ -62,7 +49,6 @@
  *		refcnt: atomically against modifications on other CPU;
  *		   usually under some other lock to prevent node disappearing
  *		daddr: unchangeable
- *		ip_id_count: atomic value (no lock needed)
  */
 
 static struct kmem_cache *peer_cachep __read_mostly;
@@ -504,10 +490,6 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 		p->daddr = *daddr;
 		atomic_set(&p->refcnt, 1);
 		atomic_set(&p->rid, 0);
-		atomic_set(&p->ip_id_count,
-				(daddr->family == AF_INET) ?
-					secure_ip_id(daddr->addr.a4) :
-					secure_ipv6_id(daddr->addr.a6));
 		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 		p->rate_tokens = 0;
 		/* 60*HZ is arbitrary, but chosen enough high so that the first
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6ca5873d6175..5afbbbe03b0e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
 	iph->saddr    = saddr;
 	iph->protocol = sk->sk_protocol;
-	ip_select_ident(skb, &rt->dst, sk);
+	ip_select_ident(skb, sk);
 
 	if (opt && opt->opt.optlen) {
 		iph->ihl += opt->opt.optlen>>2;
@@ -394,8 +394,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
 		ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
 	}
 
-	ip_select_ident_more(skb, &rt->dst, sk,
-			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+	ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
@@ -1332,7 +1331,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	ip_copy_addrs(iph, fl4);
-	ip_select_ident(skb, &rt->dst, sk);
+	ip_select_ident(skb, sk);
 
 	if (opt) {
 		iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5642374cb751..84aa69caee59 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -691,7 +691,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	iph->daddr	=	fl4.daddr;
 	iph->saddr	=	fl4.saddr;
 	iph->ttl	=	ttl;
-	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+	__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
 
 	iptunnel_xmit(skb, dev);
 	return;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 49797ed0917c..56d079b63ad3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1661,7 +1661,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 	iph->protocol	=	IPPROTO_IPIP;
 	iph->ihl	=	5;
 	iph->tot_len	=	htons(skb->len);
-	ip_select_ident(skb, skb_dst(skb), NULL);
+	ip_select_ident(skb, NULL);
 	ip_send_check(iph);
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 402870fdfa0e..b4a1c42a627f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -387,7 +387,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 		iph->check   = 0;
 		iph->tot_len = htons(length);
 		if (!iph->id)
-			ip_select_ident(skb, &rt->dst, NULL);
+			ip_select_ident(skb, NULL);
 
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2b9887becb5c..da2d4efc0bf7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -89,6 +89,7 @@
 #include <linux/rcupdate.h>
 #include <linux/times.h>
 #include <linux/slab.h>
+#include <linux/jhash.h>
 #include <net/dst.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
@@ -464,39 +465,23 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 	return neigh_create(&arp_tbl, pkey, dev);
 }
 
-/*
- * Peer allocation may fail only in serious out-of-memory conditions.  However
- * we still can generate some output.
- * Random ID selection looks a bit dangerous because we have no chances to
- * select ID being unique in a reasonable period of time.
- * But broken packet identifier may be better than no packet at all.
- */
-static void ip_select_fb_ident(struct iphdr *iph)
+atomic_t *ip_idents __read_mostly;
+EXPORT_SYMBOL(ip_idents);
+
+void __ip_select_ident(struct iphdr *iph, int segs)
 {
-	static DEFINE_SPINLOCK(ip_fb_id_lock);
-	static u32 ip_fallback_id;
-	u32 salt;
+	static u32 ip_idents_hashrnd __read_mostly;
+	static bool hashrnd_initialized = false;
+	u32 hash, id;
 
-	spin_lock_bh(&ip_fb_id_lock);
-	salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
-	iph->id = htons(salt & 0xFFFF);
-	ip_fallback_id = salt;
-	spin_unlock_bh(&ip_fb_id_lock);
-}
-
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
-{
-	struct net *net = dev_net(dst->dev);
-	struct inet_peer *peer;
-
-	peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
-	if (peer) {
-		iph->id = htons(inet_getid(peer, more));
-		inet_putpeer(peer);
-		return;
+	if (unlikely(!hashrnd_initialized)) {
+		hashrnd_initialized = true;
+		get_random_bytes(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
 	}
 
-	ip_select_fb_ident(iph);
+	hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd);
+	id = ip_idents_reserve(hash, segs);
+	iph->id = htons(id);
 }
 EXPORT_SYMBOL(__ip_select_ident);
 
@@ -2656,6 +2641,12 @@ int __init ip_rt_init(void)
 {
 	int rc = 0;
 
+	ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
+	if (!ip_idents)
+		panic("IP: failed to allocate ip_idents\n");
+
+	prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
 	if (!ip_rt_acct)
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index b5663c37f089..e3f64831bc36 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -117,12 +117,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
 		0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
-	ip_select_ident(skb, dst->child, NULL);
 
 	top_iph->ttl = ip4_dst_hoplimit(dst->child);
 
 	top_iph->saddr = x->props.saddr.a4;
 	top_iph->daddr = x->id.daddr.a4;
+	ip_select_ident(skb, NULL);
 
 	return 0;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ffa8d295c56c..a425052ac274 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -540,6 +540,21 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	skb_copy_secmark(to, from);
 }
 
+static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+	static u32 ip6_idents_hashrnd __read_mostly;
+	static bool hashrnd_initialized = false;
+	u32 hash, id;
+
+	if (unlikely(!hashrnd_initialized)) {
+		hashrnd_initialized = true;
+		get_random_bytes(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+	}
+	hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+	id = ip_idents_reserve(hash, 1);
+	fhdr->identification = htonl(id);
+}
+
 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
 	struct sk_buff *frag;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 3d2c81a66d6a..a5d465105b69 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -6,29 +6,6 @@
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
-	static atomic_t ipv6_fragmentation_id;
-	int ident;
-
-#if IS_ENABLED(CONFIG_IPV6)
-	if (rt && !(rt->dst.flags & DST_NOPEER)) {
-		struct inet_peer *peer;
-		struct net *net;
-
-		net = dev_net(rt->dst.dev);
-		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
-		if (peer) {
-			fhdr->identification = htonl(inet_getid(peer, 0));
-			inet_putpeer(peer);
-			return;
-		}
-	}
-#endif
-	ident = atomic_inc_return(&ipv6_fragmentation_id);
-	fhdr->identification = htonl(ident);
-}
-EXPORT_SYMBOL(ipv6_select_ident);
 
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 540d58921007..8d22460a811b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -919,7 +919,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		iph->ttl	=	iph6->hop_limit;
 
 	skb->ip_summed = CHECKSUM_NONE;
-	ip_select_ident(skb, skb_dst(skb), NULL);
+	ip_select_ident(skb, NULL);
 	iptunnel_xmit(skb, dev);
 	return NETDEV_TX_OK;
 
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index c47444e4cf8c..7f0e1cf2d7e8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->daddr		=	cp->daddr.ip;
 	iph->saddr		=	saddr;
 	iph->ttl		=	old_iph->ttl;
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;

From 4176df017abbe847f0c09e5b839feb40e04b25ba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 26 Jul 2014 08:58:10 +0200
Subject: [PATCH 0149/1185] ip: make IP identifiers less predictable

[ Upstream commit 04ca6973f7c1a0d8537f2d9906a0cf8e69886d75 ]

In "Counting Packets Sent Between Arbitrary Internet Hosts", Jeffrey and
Jedidiah describe ways exploiting linux IP identifier generation to
infer whether two machines are exchanging packets.

With commit 73f156a6e8c1 ("inetpeer: get rid of ip_id_count"), we
changed IP id generation, but this does not really prevent this
side-channel technique.

This patch adds a random amount of perturbation so that IP identifiers
for a given destination [1] are no longer monotonically increasing after
an idle period.

Note that prandom_u32_max(1) returns 0, so if generator is used at most
once per jiffy, this patch inserts no hole in the ID suite and do not
increase collision probability.

This is jiffies based, so in the worst case (HZ=1000), the id can
rollover after ~65 seconds of idle time, which should be fine.

We also change the hash used in __ip_select_ident() to not only hash
on daddr, but also saddr and protocol, so that ICMP probes can not be
used to infer information for other protocols.

For IPv6, adds saddr into the hash as well, but not nexthdr.

If I ping the patched target, we can see ID are now hard to predict.

21:57:11.008086 IP (...)
    A > target: ICMP echo request, seq 1, length 64
21:57:11.010752 IP (... id 2081 ...)
    target > A: ICMP echo reply, seq 1, length 64

21:57:12.013133 IP (...)
    A > target: ICMP echo request, seq 2, length 64
21:57:12.015737 IP (... id 3039 ...)
    target > A: ICMP echo reply, seq 2, length 64

21:57:13.016580 IP (...)
    A > target: ICMP echo request, seq 3, length 64
21:57:13.019251 IP (... id 3437 ...)
    target > A: ICMP echo reply, seq 3, length 64

[1] TCP sessions uses a per flow ID generator not changed by this patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jeffrey Knockel <jeffk@cs.unm.edu>
Reported-by: Jedidiah R. Crandall <crandall@cs.unm.edu>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Hannes Frederic Sowa <hannes@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ip.h      | 11 +----------
 net/ipv4/route.c      | 36 +++++++++++++++++++++++++++++++++---
 net/ipv6/ip6_output.c |  2 ++
 3 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index dd72c8f93797..8695359982d1 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -252,16 +252,7 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
 		 !(dst_metric_locked(dst, RTAX_MTU)));
 }
 
-#define IP_IDENTS_SZ 2048u
-extern atomic_t *ip_idents;
-
-static inline u32 ip_idents_reserve(u32 hash, int segs)
-{
-	atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;
-
-	return atomic_add_return(segs, id_ptr) - segs;
-}
-
+u32 ip_idents_reserve(u32 hash, int segs);
 void __ip_select_ident(struct iphdr *iph, int segs);
 
 static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index da2d4efc0bf7..d4d162eac4df 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -465,8 +465,35 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 	return neigh_create(&arp_tbl, pkey, dev);
 }
 
-atomic_t *ip_idents __read_mostly;
-EXPORT_SYMBOL(ip_idents);
+#define IP_IDENTS_SZ 2048u
+struct ip_ident_bucket {
+	atomic_t	id;
+	u32		stamp32;
+};
+
+static struct ip_ident_bucket *ip_idents __read_mostly;
+
+/* In order to protect privacy, we add a perturbation to identifiers
+ * if one generator is seldom used. This makes hard for an attacker
+ * to infer how many packets were sent between two points in time.
+ */
+u32 ip_idents_reserve(u32 hash, int segs)
+{
+	struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
+	u32 old = ACCESS_ONCE(bucket->stamp32);
+	u32 now = (u32)jiffies;
+	u32 delta = 0;
+
+	if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) {
+		u64 x = prandom_u32();
+
+		x *= (now - old);
+		delta = (u32)(x >> 32);
+	}
+
+	return atomic_add_return(segs + delta, &bucket->id) - segs;
+}
+EXPORT_SYMBOL(ip_idents_reserve);
 
 void __ip_select_ident(struct iphdr *iph, int segs)
 {
@@ -479,7 +506,10 @@ void __ip_select_ident(struct iphdr *iph, int segs)
 		get_random_bytes(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
 	}
 
-	hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd);
+	hash = jhash_3words((__force u32)iph->daddr,
+			    (__force u32)iph->saddr,
+			    iph->protocol,
+			    ip_idents_hashrnd);
 	id = ip_idents_reserve(hash, segs);
 	iph->id = htons(id);
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a425052ac274..071edcba4158 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -551,6 +551,8 @@ static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 		get_random_bytes(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
 	}
 	hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+	hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
+
 	id = ip_idents_reserve(hash, 1);
 	fhdr->identification = htonl(id);
 }

From 15229fa9d4588b2d0e91ee81954c3a4f3c30dcb8 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Date: Sat, 26 Jul 2014 21:26:58 +0400
Subject: [PATCH 0150/1185] net: sendmsg: fix NULL pointer dereference

[ Upstream commit 40eea803c6b2cfaab092f053248cbeab3f368412 ]

Sasha's report:
	> While fuzzing with trinity inside a KVM tools guest running the latest -next
	> kernel with the KASAN patchset, I've stumbled on the following spew:
	>
	> [ 4448.949424] ==================================================================
	> [ 4448.951737] AddressSanitizer: user-memory-access on address 0
	> [ 4448.952988] Read of size 2 by thread T19638:
	> [ 4448.954510] CPU: 28 PID: 19638 Comm: trinity-c76 Not tainted 3.16.0-rc4-next-20140711-sasha-00046-g07d3099-dirty #813
	> [ 4448.956823]  ffff88046d86ca40 0000000000000000 ffff880082f37e78 ffff880082f37a40
	> [ 4448.958233]  ffffffffb6e47068 ffff880082f37a68 ffff880082f37a58 ffffffffb242708d
	> [ 4448.959552]  0000000000000000 ffff880082f37a88 ffffffffb24255b1 0000000000000000
	> [ 4448.961266] Call Trace:
	> [ 4448.963158] dump_stack (lib/dump_stack.c:52)
	> [ 4448.964244] kasan_report_user_access (mm/kasan/report.c:184)
	> [ 4448.965507] __asan_load2 (mm/kasan/kasan.c:352)
	> [ 4448.966482] ? netlink_sendmsg (net/netlink/af_netlink.c:2339)
	> [ 4448.967541] netlink_sendmsg (net/netlink/af_netlink.c:2339)
	> [ 4448.968537] ? get_parent_ip (kernel/sched/core.c:2555)
	> [ 4448.970103] sock_sendmsg (net/socket.c:654)
	> [ 4448.971584] ? might_fault (mm/memory.c:3741)
	> [ 4448.972526] ? might_fault (./arch/x86/include/asm/current.h:14 mm/memory.c:3740)
	> [ 4448.973596] ? verify_iovec (net/core/iovec.c:64)
	> [ 4448.974522] ___sys_sendmsg (net/socket.c:2096)
	> [ 4448.975797] ? put_lock_stats.isra.13 (./arch/x86/include/asm/preempt.h:98 kernel/locking/lockdep.c:254)
	> [ 4448.977030] ? lock_release_holdtime (kernel/locking/lockdep.c:273)
	> [ 4448.978197] ? lock_release_non_nested (kernel/locking/lockdep.c:3434 (discriminator 1))
	> [ 4448.979346] ? check_chain_key (kernel/locking/lockdep.c:2188)
	> [ 4448.980535] __sys_sendmmsg (net/socket.c:2181)
	> [ 4448.981592] ? trace_hardirqs_on_caller (kernel/locking/lockdep.c:2600)
	> [ 4448.982773] ? trace_hardirqs_on (kernel/locking/lockdep.c:2607)
	> [ 4448.984458] ? syscall_trace_enter (arch/x86/kernel/ptrace.c:1500 (discriminator 2))
	> [ 4448.985621] ? trace_hardirqs_on_caller (kernel/locking/lockdep.c:2600)
	> [ 4448.986754] SyS_sendmmsg (net/socket.c:2201)
	> [ 4448.987708] tracesys (arch/x86/kernel/entry_64.S:542)
	> [ 4448.988929] ==================================================================

This reports means that we've come to netlink_sendmsg() with msg->msg_name == NULL and msg->msg_namelen > 0.

After this report there was no usual "Unable to handle kernel NULL pointer dereference"
and this gave me a clue that address 0 is mapped and contains valid socket address structure in it.

This bug was introduced in f3d3342602f8bcbf37d7c46641cb9bca7618eb1c
(net: rework recvmsg handler msg_name and msg_namelen logic).
Commit message states that:
	"Set msg->msg_name = NULL if user specified a NULL in msg_name but had a
	 non-null msg_namelen in verify_iovec/verify_compat_iovec. This doesn't
	 affect sendto as it would bail out earlier while trying to copy-in the
	 address."
But in fact this affects sendto when address 0 is mapped and contains
socket address structure in it. In such case copy-in address will succeed,
verify_iovec() function will successfully exit with msg->msg_namelen > 0
and msg->msg_name == NULL.

This patch fixes it by setting msg_namelen to 0 if msg_name == NULL.

Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: <stable@vger.kernel.org>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/compat.c     | 9 +++++----
 net/core/iovec.c | 6 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/compat.c b/net/compat.c
index f50161fb812e..cbc1a2a26587 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -85,7 +85,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 {
 	int tot_len;
 
-	if (kern_msg->msg_namelen) {
+	if (kern_msg->msg_name && kern_msg->msg_namelen) {
 		if (mode == VERIFY_READ) {
 			int err = move_addr_to_kernel(kern_msg->msg_name,
 						      kern_msg->msg_namelen,
@@ -93,10 +93,11 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 			if (err < 0)
 				return err;
 		}
-		if (kern_msg->msg_name)
-			kern_msg->msg_name = kern_address;
-	} else
+		kern_msg->msg_name = kern_address;
+	} else {
 		kern_msg->msg_name = NULL;
+		kern_msg->msg_namelen = 0;
+	}
 
 	tot_len = iov_from_user_compat_to_kern(kern_iov,
 					  (struct compat_iovec __user *)kern_msg->msg_iov,
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 9a31515fb8e3..2145b7150beb 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -39,7 +39,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
 {
 	int size, ct, err;
 
-	if (m->msg_namelen) {
+	if (m->msg_name && m->msg_namelen) {
 		if (mode == VERIFY_READ) {
 			void __user *namep;
 			namep = (void __user __force *) m->msg_name;
@@ -48,10 +48,10 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
 			if (err < 0)
 				return err;
 		}
-		if (m->msg_name)
-			m->msg_name = address;
+		m->msg_name = address;
 	} else {
 		m->msg_name = NULL;
+		m->msg_namelen = 0;
 	}
 
 	size = m->msg_iovlen * sizeof(struct iovec);

From 822fc8dd9dec94a3505998c5febdcb41729f7d57 Mon Sep 17 00:00:00 2001
From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Tue, 29 Jul 2014 12:07:27 +0200
Subject: [PATCH 0151/1185] tcp: Fix integer-overflows in TCP veno

[ Upstream commit 45a07695bc64b3ab5d6d2215f9677e5b8c05a7d0 ]

In veno we do a multiplication of the cwnd and the rtt. This
may overflow and thus their result is stored in a u64. However, we first
need to cast the cwnd so that actually 64-bit arithmetic is done.

A first attempt at fixing 76f1017757aa0 ([TCP]: TCP Veno congestion
control) was made by 159131149c2 (tcp: Overflow bug in Vegas), but it
failed to add the required cast in tcp_veno_cong_avoid().

Fixes: 76f1017757aa0 ([TCP]: TCP Veno congestion control)
Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_veno.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index ac43cd747bce..b4d1858be550 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -144,7 +144,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 
 		rtt = veno->minrtt;
 
-		target_cwnd = (tp->snd_cwnd * veno->basertt);
+		target_cwnd = (u64)tp->snd_cwnd * veno->basertt;
 		target_cwnd <<= V_PARAM_SHIFT;
 		do_div(target_cwnd, rtt);
 

From 5191776cd6a25948c6a6622d40e6fe4c2442794a Mon Sep 17 00:00:00 2001
From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Tue, 29 Jul 2014 13:40:57 +0200
Subject: [PATCH 0152/1185] tcp: Fix integer-overflow in TCP vegas

[ Upstream commit 1f74e613ded11517db90b2bd57e9464d9e0fb161 ]

In vegas we do a multiplication of the cwnd and the rtt. This
may overflow and thus their result is stored in a u64. However, we first
need to cast the cwnd so that actually 64-bit arithmetic is done.

Then, we need to do do_div to allow this to be used on 32-bit arches.

Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Doug Leith <doug.leith@nuim.ie>
Fixes: 8d3a564da34e (tcp: tcp_vegas cong avoid fix)
Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_vegas.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 80fa2bfd7ede..c042e529a11e 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -218,7 +218,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 			 * This is:
 			 *     (actual rate in segments) * baseRTT
 			 */
-			target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt;
+			target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT;
+			do_div(target_cwnd, rtt);
 
 			/* Calculate the difference between the window we had,
 			 * and the window we would like to have. This quantity

From 495d049f3e499227d28c89800bca27cc726b3bb2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 22 Jul 2014 15:22:45 +0200
Subject: [PATCH 0153/1185] net: sctp: inherit auth_capable on INIT collisions

[ Upstream commit 1be9a950c646c9092fb3618197f7b6bfb50e82aa ]

Jason reported an oops caused by SCTP on his ARM machine with
SCTP authentication enabled:

Internal error: Oops: 17 [#1] ARM
CPU: 0 PID: 104 Comm: sctp-test Not tainted 3.13.0-68744-g3632f30c9b20-dirty #1
task: c6eefa40 ti: c6f52000 task.ti: c6f52000
PC is at sctp_auth_calculate_hmac+0xc4/0x10c
LR is at sg_init_table+0x20/0x38
pc : [<c024bb80>]    lr : [<c00f32dc>]    psr: 40000013
sp : c6f538e8  ip : 00000000  fp : c6f53924
r10: c6f50d80  r9 : 00000000  r8 : 00010000
r7 : 00000000  r6 : c7be4000  r5 : 00000000  r4 : c6f56254
r3 : c00c8170  r2 : 00000001  r1 : 00000008  r0 : c6f1e660
Flags: nZcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
Control: 0005397f  Table: 06f28000  DAC: 00000015
Process sctp-test (pid: 104, stack limit = 0xc6f521c0)
Stack: (0xc6f538e8 to 0xc6f54000)
[...]
Backtrace:
[<c024babc>] (sctp_auth_calculate_hmac+0x0/0x10c) from [<c0249af8>] (sctp_packet_transmit+0x33c/0x5c8)
[<c02497bc>] (sctp_packet_transmit+0x0/0x5c8) from [<c023e96c>] (sctp_outq_flush+0x7fc/0x844)
[<c023e170>] (sctp_outq_flush+0x0/0x844) from [<c023ef78>] (sctp_outq_uncork+0x24/0x28)
[<c023ef54>] (sctp_outq_uncork+0x0/0x28) from [<c0234364>] (sctp_side_effects+0x1134/0x1220)
[<c0233230>] (sctp_side_effects+0x0/0x1220) from [<c02330b0>] (sctp_do_sm+0xac/0xd4)
[<c0233004>] (sctp_do_sm+0x0/0xd4) from [<c023675c>] (sctp_assoc_bh_rcv+0x118/0x160)
[<c0236644>] (sctp_assoc_bh_rcv+0x0/0x160) from [<c023d5bc>] (sctp_inq_push+0x6c/0x74)
[<c023d550>] (sctp_inq_push+0x0/0x74) from [<c024a6b0>] (sctp_rcv+0x7d8/0x888)

While we already had various kind of bugs in that area
ec0223ec48a9 ("net: sctp: fix sctp_sf_do_5_1D_ce to verify if
we/peer is AUTH capable") and b14878ccb7fa ("net: sctp: cache
auth_enable per endpoint"), this one is a bit of a different
kind.

Giving a bit more background on why SCTP authentication is
needed can be found in RFC4895:

  SCTP uses 32-bit verification tags to protect itself against
  blind attackers. These values are not changed during the
  lifetime of an SCTP association.

  Looking at new SCTP extensions, there is the need to have a
  method of proving that an SCTP chunk(s) was really sent by
  the original peer that started the association and not by a
  malicious attacker.

To cause this bug, we're triggering an INIT collision between
peers; normal SCTP handshake where both sides intent to
authenticate packets contains RANDOM; CHUNKS; HMAC-ALGO
parameters that are being negotiated among peers:

  ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ---------->
  <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] ---------
  -------------------- COOKIE-ECHO -------------------->
  <-------------------- COOKIE-ACK ---------------------

RFC4895 says that each endpoint therefore knows its own random
number and the peer's random number *after* the association
has been established. The local and peer's random number along
with the shared key are then part of the secret used for
calculating the HMAC in the AUTH chunk.

Now, in our scenario, we have 2 threads with 1 non-blocking
SEQ_PACKET socket each, setting up common shared SCTP_AUTH_KEY
and SCTP_AUTH_ACTIVE_KEY properly, and each of them calling
sctp_bindx(3), listen(2) and connect(2) against each other,
thus the handshake looks similar to this, e.g.:

  ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ---------->
  <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] ---------
  <--------- INIT[RANDOM; CHUNKS; HMAC-ALGO] -----------
  -------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] -------->
  ...

Since such collisions can also happen with verification tags,
the RFC4895 for AUTH rather vaguely says under section 6.1:

  In case of INIT collision, the rules governing the handling
  of this Random Number follow the same pattern as those for
  the Verification Tag, as explained in Section 5.2.4 of
  RFC 2960 [5]. Therefore, each endpoint knows its own Random
  Number and the peer's Random Number after the association
  has been established.

In RFC2960, section 5.2.4, we're eventually hitting Action B:

  B) In this case, both sides may be attempting to start an
     association at about the same time but the peer endpoint
     started its INIT after responding to the local endpoint's
     INIT. Thus it may have picked a new Verification Tag not
     being aware of the previous Tag it had sent this endpoint.
     The endpoint should stay in or enter the ESTABLISHED
     state but it MUST update its peer's Verification Tag from
     the State Cookie, stop any init or cookie timers that may
     running and send a COOKIE ACK.

In other words, the handling of the Random parameter is the
same as behavior for the Verification Tag as described in
Action B of section 5.2.4.

Looking at the code, we exactly hit the sctp_sf_do_dupcook_b()
case which triggers an SCTP_CMD_UPDATE_ASSOC command to the
side effect interpreter, and in fact it properly copies over
peer_{random, hmacs, chunks} parameters from the newly created
association to update the existing one.

Also, the old asoc_shared_key is being released and based on
the new params, sctp_auth_asoc_init_active_key() updated.
However, the issue observed in this case is that the previous
asoc->peer.auth_capable was 0, and has *not* been updated, so
that instead of creating a new secret, we're doing an early
return from the function sctp_auth_asoc_init_active_key()
leaving asoc->asoc_shared_key as NULL. However, we now have to
authenticate chunks from the updated chunk list (e.g. COOKIE-ACK).

That in fact causes the server side when responding with ...

  <------------------ AUTH; COOKIE-ACK -----------------

... to trigger a NULL pointer dereference, since in
sctp_packet_transmit(), it discovers that an AUTH chunk is
being queued for xmit, and thus it calls sctp_auth_calculate_hmac().

Since the asoc->active_key_id is still inherited from the
endpoint, and the same as encoded into the chunk, it uses
asoc->asoc_shared_key, which is still NULL, as an asoc_key
and dereferences it in ...

  crypto_hash_setkey(desc.tfm, &asoc_key->data[0], asoc_key->len)

... causing an oops. All this happens because sctp_make_cookie_ack()
called with the *new* association has the peer.auth_capable=1
and therefore marks the chunk with auth=1 after checking
sctp_auth_send_cid(), but it is *actually* sent later on over
the then *updated* association's transport that didn't initialize
its shared key due to peer.auth_capable=0. Since control chunks
in that case are not sent by the temporary association which
are scheduled for deletion, they are issued for xmit via
SCTP_CMD_REPLY in the interpreter with the context of the
*updated* association. peer.auth_capable was 0 in the updated
association (which went from COOKIE_WAIT into ESTABLISHED state),
since all previous processing that performed sctp_process_init()
was being done on temporary associations, that we eventually
throw away each time.

The correct fix is to update to the new peer.auth_capable
value as well in the collision case via sctp_assoc_update(),
so that in case the collision migrated from 0 -> 1,
sctp_auth_asoc_init_active_key() can properly recalculate
the secret. This therefore fixes the observed server panic.

Fixes: 730fc3d05cd4 ("[SCTP]: Implete SCTP-AUTH parameter processing")
Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Tested-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/associola.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 229b3c3fb6c9..62e86d98bc36 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1213,6 +1213,7 @@ void sctp_assoc_update(struct sctp_association *asoc,
 	asoc->c = new->c;
 	asoc->peer.rwnd = new->peer.rwnd;
 	asoc->peer.sack_needed = new->peer.sack_needed;
+	asoc->peer.auth_capable = new->peer.auth_capable;
 	asoc->peer.i = new->peer.i;
 	sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
 			 asoc->peer.i.initial_tsn, GFP_ATOMIC);

From d09fdc66ecb543136f9e3304c1b6071d3dae9792 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Thu, 31 Jul 2014 10:30:25 -0400
Subject: [PATCH 0154/1185] macvlan: Initialize vlan_features to turn on
 offload support.

[ Upstream commit 081e83a78db9b0ae1f5eabc2dedecc865f509b98 ]

Macvlan devices do not initialize vlan_features.  As a result,
any vlan devices configured on top of macvlans perform very poorly.
Initialize vlan_features based on the vlan features of the lower-level
device.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/macvlan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 155ef4bbde91..9be91cb4f4a3 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -500,6 +500,7 @@ static int macvlan_init(struct net_device *dev)
 				  (lowerdev->state & MACVLAN_STATE_MASK);
 	dev->features 		= lowerdev->features & MACVLAN_FEATURES;
 	dev->features		|= NETIF_F_LLTX;
+	dev->vlan_features	= lowerdev->vlan_features & MACVLAN_FEATURES;
 	dev->gso_max_size	= lowerdev->gso_max_size;
 	dev->iflink		= lowerdev->ifindex;
 	dev->hard_header_len	= lowerdev->hard_header_len;

From c290a4ef1fe38bd8d29573468a189e49dbaa0681 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Thu, 31 Jul 2014 10:33:06 -0400
Subject: [PATCH 0155/1185] net: Correctly set segment mac_len in
 skb_segment().

[ Upstream commit fcdfe3a7fa4cb74391d42b6a26dc07c20dab1d82 ]

When performing segmentation, the mac_len value is copied right
out of the original skb.  However, this value is not always set correctly
(like when the packet is VLAN-tagged) and we'll end up copying a bad
value.

One way to demonstrate this is to configure a VM which tags
packets internally and turn off VLAN acceleration on the forwarding
bridge port.  The packets show up corrupt like this:
16:18:24.985548 52:54:00:ab:be:25 > 52:54:00:26:ce:a3, ethertype 802.1Q
(0x8100), length 1518: vlan 100, p 0, ethertype 0x05e0,
        0x0000:  8cdb 1c7c 8cdb 0064 4006 b59d 0a00 6402 ...|...d@.....d.
        0x0010:  0a00 6401 9e0d b441 0a5e 64ec 0330 14fa ..d....A.^d..0..
        0x0020:  29e3 01c9 f871 0000 0101 080a 000a e833)....q.........3
        0x0030:  000f 8c75 6e65 7470 6572 6600 6e65 7470 ...unetperf.netp
        0x0040:  6572 6600 6e65 7470 6572 6600 6e65 7470 erf.netperf.netp
        0x0050:  6572 6600 6e65 7470 6572 6600 6e65 7470 erf.netperf.netp
        0x0060:  6572 6600 6e65 7470 6572 6600 6e65 7470 erf.netperf.netp
        ...

This also leads to awful throughput as GSO packets are dropped and
cause retransmissions.

The solution is to set the mac_len using the values already available
in then new skb.  We've already adjusted all of the header offset, so we
might as well correctly figure out the mac_len using skb_reset_mac_len().
After this change, packets are segmented correctly and performance
is restored.

CC: Eric Dumazet <edumazet@google.com>
Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9f84a5f7404d..6148716884ae 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2810,7 +2810,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		tail = nskb;
 
 		__copy_skb_header(nskb, skb);
-		nskb->mac_len = skb->mac_len;
 
 		/* nskb and skb might have different headroom */
 		if (nskb->ip_summed == CHECKSUM_PARTIAL)
@@ -2820,6 +2819,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
 					  skb_network_header_len(skb));
+		skb_reset_mac_len(nskb);
 
 		skb_copy_from_linear_data_offset(skb, -tnl_hlen,
 						 nskb->data - tnl_hlen,

From 9d868b94a6f52a17a2a94b34e544800d741b7852 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 31 Jul 2014 23:00:35 -0400
Subject: [PATCH 0156/1185] iovec: make sure the caller actually wants anything
 in memcpy_fromiovecend

[ Upstream commit 06ebb06d49486676272a3c030bfeef4bd969a8e6 ]

Check for cases when the caller requests 0 bytes instead of running off
and dereferencing potentially invalid iovecs.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/iovec.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/core/iovec.c b/net/core/iovec.c
index 2145b7150beb..1117a26a8548 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -107,6 +107,10 @@ EXPORT_SYMBOL(memcpy_toiovecend);
 int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 			int offset, int len)
 {
+	/* No data? Done! */
+	if (len == 0)
+		return 0;
+
 	/* Skip over the finished iovecs */
 	while (offset >= iov->iov_len) {
 		offset -= iov->iov_len;

From 6e5f6266d635809c560f7fb48a710701a1d54139 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Aug 2014 16:49:52 +0200
Subject: [PATCH 0157/1185] sctp: fix possible seqlock seadlock in
 sctp_packet_transmit()

[ Upstream commit 757efd32d5ce31f67193cc0e6a56e4dffcc42fb1 ]

Dave reported following splat, caused by improper use of
IP_INC_STATS_BH() in process context.

BUG: using __this_cpu_add() in preemptible [00000000] code: trinity-c117/14551
caller is __this_cpu_preempt_check+0x13/0x20
CPU: 3 PID: 14551 Comm: trinity-c117 Not tainted 3.16.0+ #33
 ffffffff9ec898f0 0000000047ea7e23 ffff88022d32f7f0 ffffffff9e7ee207
 0000000000000003 ffff88022d32f818 ffffffff9e397eaa ffff88023ee70b40
 ffff88022d32f970 ffff8801c026d580 ffff88022d32f828 ffffffff9e397ee3
Call Trace:
 [<ffffffff9e7ee207>] dump_stack+0x4e/0x7a
 [<ffffffff9e397eaa>] check_preemption_disabled+0xfa/0x100
 [<ffffffff9e397ee3>] __this_cpu_preempt_check+0x13/0x20
 [<ffffffffc0839872>] sctp_packet_transmit+0x692/0x710 [sctp]
 [<ffffffffc082a7f2>] sctp_outq_flush+0x2a2/0xc30 [sctp]
 [<ffffffff9e0d985c>] ? mark_held_locks+0x7c/0xb0
 [<ffffffff9e7f8c6d>] ? _raw_spin_unlock_irqrestore+0x5d/0x80
 [<ffffffffc082b99a>] sctp_outq_uncork+0x1a/0x20 [sctp]
 [<ffffffffc081e112>] sctp_cmd_interpreter.isra.23+0x1142/0x13f0 [sctp]
 [<ffffffffc081c86b>] sctp_do_sm+0xdb/0x330 [sctp]
 [<ffffffff9e0b8f1b>] ? preempt_count_sub+0xab/0x100
 [<ffffffffc083b350>] ? sctp_cname+0x70/0x70 [sctp]
 [<ffffffffc08389ca>] sctp_primitive_ASSOCIATE+0x3a/0x50 [sctp]
 [<ffffffffc083358f>] sctp_sendmsg+0x88f/0xe30 [sctp]
 [<ffffffff9e0d673a>] ? lock_release_holdtime.part.28+0x9a/0x160
 [<ffffffff9e0d62ce>] ? put_lock_stats.isra.27+0xe/0x30
 [<ffffffff9e73b624>] inet_sendmsg+0x104/0x220
 [<ffffffff9e73b525>] ? inet_sendmsg+0x5/0x220
 [<ffffffff9e68ac4e>] sock_sendmsg+0x9e/0xe0
 [<ffffffff9e1c0c09>] ? might_fault+0xb9/0xc0
 [<ffffffff9e1c0bae>] ? might_fault+0x5e/0xc0
 [<ffffffff9e68b234>] SYSC_sendto+0x124/0x1c0
 [<ffffffff9e0136b0>] ? syscall_trace_enter+0x250/0x330
 [<ffffffff9e68c3ce>] SyS_sendto+0xe/0x10
 [<ffffffff9e7f9be4>] tracesys+0xdd/0xe2

This is a followup of commits f1d8cba61c3c4b ("inet: fix possible
seqlock deadlocks") and 7f88c6b23afbd315 ("ipv6: fix possible seqlock
deadlock in ip6_finish_output2")

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Reported-by: Dave Jones <davej@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0beb2f9c8a7c..b6f5fc3127b9 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -618,7 +618,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	return err;
 no_route:
 	kfree_skb(nskb);
-	IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
+	IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
 
 	/* FIXME: Returning the 'err' will effect all the associations
 	 * associated with a socket, although only one of the paths of the

From af25a08645b730f1d327110235d480266fa18acb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 30 Apr 2014 19:37:48 -0700
Subject: [PATCH 0158/1185] sparc64: Fix argument sign extension for
 compat_sys_futex().

[ Upstream commit aa3449ee9c87d9b7660dd1493248abcc57769e31 ]

Only the second argument, 'op', is signed.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/kernel/sys32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index f7c72b6efc27..d066eb18650c 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -44,7 +44,7 @@ SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1)
 SIGN1(sys32_io_submit, compat_sys_io_submit, %o1)
 SIGN1(sys32_mq_open, compat_sys_mq_open, %o1)
 SIGN1(sys32_select, compat_sys_select, %o0)
-SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5)
+SIGN1(sys32_futex, compat_sys_futex, %o1)
 SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0)
 SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0)
 SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0)

From f07030c1e2eadec2ee9863e1b7e06036a2166886 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <tkhai@yandex.ru>
Date: Thu, 17 Apr 2014 00:45:24 +0400
Subject: [PATCH 0159/1185] sparc64: Make itc_sync_lock raw

[ Upstream commit 49b6c01f4c1de3b5e5427ac5aba80f9f6d27837a ]

One more place where we must not be able
to be preempted or to be interrupted in RT.

Always actually disable interrupts during
synchronization cycle.

Signed-off-by: Kirill Tkhai <tkhai@yandex.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/kernel/smp_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 77539eda928c..8565ecd7d48a 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -150,7 +150,7 @@ void cpu_panic(void)
 #define NUM_ROUNDS	64	/* magic value */
 #define NUM_ITERS	5	/* likewise */
 
-static DEFINE_SPINLOCK(itc_sync_lock);
+static DEFINE_RAW_SPINLOCK(itc_sync_lock);
 static unsigned long go[SLAVE + 1];
 
 #define DEBUG_TICK_SYNC	0
@@ -258,7 +258,7 @@ static void smp_synchronize_one_tick(int cpu)
 	go[MASTER] = 0;
 	membar_safe("#StoreLoad");
 
-	spin_lock_irqsave(&itc_sync_lock, flags);
+	raw_spin_lock_irqsave(&itc_sync_lock, flags);
 	{
 		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
 			while (!go[MASTER])
@@ -269,7 +269,7 @@ static void smp_synchronize_one_tick(int cpu)
 			membar_safe("#StoreLoad");
 		}
 	}
-	spin_unlock_irqrestore(&itc_sync_lock, flags);
+	raw_spin_unlock_irqrestore(&itc_sync_lock, flags);
 }
 
 #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)

From ff156a9ba7c160c8fc1c8d4bb0fb935771e105dc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 28 Apr 2014 23:50:08 -0700
Subject: [PATCH 0160/1185] sparc64: Handle 32-bit tasks properly in
 compute_effective_address().

[ Upstream commit d037d16372bbe4d580342bebbb8826821ad9edf0 ]

If we have a 32-bit task we must chop off the top 32-bits of the
64-bit value just as the cpu would.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/kernel/unaligned_64.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
index 8201c25e7669..4db8898199f7 100644
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -163,17 +163,23 @@ static unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs)
 unsigned long compute_effective_address(struct pt_regs *regs,
 					unsigned int insn, unsigned int rd)
 {
+	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
 	unsigned int rs1 = (insn >> 14) & 0x1f;
 	unsigned int rs2 = insn & 0x1f;
-	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
+	unsigned long addr;
 
 	if (insn & 0x2000) {
 		maybe_flush_windows(rs1, 0, rd, from_kernel);
-		return (fetch_reg(rs1, regs) + sign_extend_imm13(insn));
+		addr = (fetch_reg(rs1, regs) + sign_extend_imm13(insn));
 	} else {
 		maybe_flush_windows(rs1, rs2, rd, from_kernel);
-		return (fetch_reg(rs1, regs) + fetch_reg(rs2, regs));
+		addr = (fetch_reg(rs1, regs) + fetch_reg(rs2, regs));
 	}
+
+	if (!from_kernel && test_thread_flag(TIF_32BIT))
+		addr &= 0xffffffff;
+
+	return addr;
 }
 
 /* This is just to make gcc think die_if_kernel does return... */

From f2701cd605156eee1cd5edbc709f989a8bbe218a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 28 Apr 2014 23:52:11 -0700
Subject: [PATCH 0161/1185] sparc64: Fix top-level fault handling bugs.

[ Upstream commit 70ffc6ebaead783ac8dafb1e87df0039bb043596 ]

Make get_user_insn() able to cope with huge PMDs.

Next, make do_fault_siginfo() more robust when get_user_insn() can't
actually fetch the instruction.  In particular, use the MMU announced
fault address when that happens, instead of calling
compute_effective_address() and computing garbage.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/mm/fault_64.c | 84 +++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 31 deletions(-)

diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 5062ff389e83..1992fa04095f 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -95,38 +95,51 @@ static unsigned int get_user_insn(unsigned long tpc)
 	pte_t *ptep, pte;
 	unsigned long pa;
 	u32 insn = 0;
-	unsigned long pstate;
 
-	if (pgd_none(*pgdp))
-		goto outret;
+	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
+		goto out;
 	pudp = pud_offset(pgdp, tpc);
-	if (pud_none(*pudp))
-		goto outret;
-	pmdp = pmd_offset(pudp, tpc);
-	if (pmd_none(*pmdp))
-		goto outret;
-
-	/* This disables preemption for us as well. */
-	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
-	__asm__ __volatile__("wrpr %0, %1, %%pstate"
-				: : "r" (pstate), "i" (PSTATE_IE));
-	ptep = pte_offset_map(pmdp, tpc);
-	pte = *ptep;
-	if (!pte_present(pte))
+	if (pud_none(*pudp) || unlikely(pud_bad(*pudp)))
 		goto out;
 
-	pa  = (pte_pfn(pte) << PAGE_SHIFT);
-	pa += (tpc & ~PAGE_MASK);
+	/* This disables preemption for us as well. */
+	local_irq_disable();
 
-	/* Use phys bypass so we don't pollute dtlb/dcache. */
-	__asm__ __volatile__("lduwa [%1] %2, %0"
-			     : "=r" (insn)
-			     : "r" (pa), "i" (ASI_PHYS_USE_EC));
+	pmdp = pmd_offset(pudp, tpc);
+	if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
+		goto out_irq_enable;
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (pmd_trans_huge(*pmdp)) {
+		if (pmd_trans_splitting(*pmdp))
+			goto out_irq_enable;
+
+		pa  = pmd_pfn(*pmdp) << PAGE_SHIFT;
+		pa += tpc & ~HPAGE_MASK;
+
+		/* Use phys bypass so we don't pollute dtlb/dcache. */
+		__asm__ __volatile__("lduwa [%1] %2, %0"
+				     : "=r" (insn)
+				     : "r" (pa), "i" (ASI_PHYS_USE_EC));
+	} else
+#endif
+	{
+		ptep = pte_offset_map(pmdp, tpc);
+		pte = *ptep;
+		if (pte_present(pte)) {
+			pa  = (pte_pfn(pte) << PAGE_SHIFT);
+			pa += (tpc & ~PAGE_MASK);
+
+			/* Use phys bypass so we don't pollute dtlb/dcache. */
+			__asm__ __volatile__("lduwa [%1] %2, %0"
+					     : "=r" (insn)
+					     : "r" (pa), "i" (ASI_PHYS_USE_EC));
+		}
+		pte_unmap(ptep);
+	}
+out_irq_enable:
+	local_irq_enable();
 out:
-	pte_unmap(ptep);
-	__asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate));
-outret:
 	return insn;
 }
 
@@ -152,7 +165,8 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
 }
 
 static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
-			     unsigned int insn, int fault_code)
+			     unsigned long fault_addr, unsigned int insn,
+			     int fault_code)
 {
 	unsigned long addr;
 	siginfo_t info;
@@ -160,10 +174,18 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
 	info.si_code = code;
 	info.si_signo = sig;
 	info.si_errno = 0;
-	if (fault_code & FAULT_CODE_ITLB)
+	if (fault_code & FAULT_CODE_ITLB) {
 		addr = regs->tpc;
-	else
-		addr = compute_effective_address(regs, insn, 0);
+	} else {
+		/* If we were able to probe the faulting instruction, use it
+		 * to compute a precise fault address.  Otherwise use the fault
+		 * time provided address which may only have page granularity.
+		 */
+		if (insn)
+			addr = compute_effective_address(regs, insn, 0);
+		else
+			addr = fault_addr;
+	}
 	info.si_addr = (void __user *) addr;
 	info.si_trapno = 0;
 
@@ -238,7 +260,7 @@ static void __kprobes do_kernel_fault(struct pt_regs *regs, int si_code,
 		/* The si_code was set to make clear whether
 		 * this was a SEGV_MAPERR or SEGV_ACCERR fault.
 		 */
-		do_fault_siginfo(si_code, SIGSEGV, regs, insn, fault_code);
+		do_fault_siginfo(si_code, SIGSEGV, regs, address, insn, fault_code);
 		return;
 	}
 
@@ -519,7 +541,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 	 * Send a sigbus, regardless of whether we were in kernel
 	 * or user mode.
 	 */
-	do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, insn, fault_code);
+	do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, address, insn, fault_code);
 
 	/* Kernel mode? Handle exceptions or die */
 	if (regs->tstate & TSTATE_PRIV)

From e5bd62b99772098f5123bff888247dbc8e68f602 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 6 May 2014 21:27:37 -0700
Subject: [PATCH 0162/1185] sparc64: Don't bark so loudly about 32-bit tasks
 generating 64-bit fault addresses.

[ Upstream commit e5c460f46ae7ee94831cb55cb980f942aa9e5a85 ]

This was found using Dave Jone's trinity tool.

When a user process which is 32-bit performs a load or a store, the
cpu chops off the top 32-bits of the effective address before
translating it.

This is because we run 32-bit tasks with the PSTATE_AM (address
masking) bit set.

We can't run the kernel with that bit set, so when the kernel accesses
userspace no address masking occurs.

Since a 32-bit process will have no mappings in that region we will
properly fault, so we don't try to handle this using access_ok(),
which can safely just be a NOP on sparc64.

Real faults from 32-bit processes should never generate such addresses
so a bug check was added long ago, and it barks in the logs if this
happens.

But it also barks when a kernel user access causes this condition, and
that _can_ happen.  For example, if a pointer passed into a system call
is "0xfffffffc" and the kernel access 4 bytes offset from that pointer.

Just handle such faults normally via the exception entries.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/mm/fault_64.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 1992fa04095f..ea83f82464da 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -280,18 +280,6 @@ static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs)
 	show_regs(regs);
 }
 
-static void noinline __kprobes bogus_32bit_fault_address(struct pt_regs *regs,
-							 unsigned long addr)
-{
-	static int times;
-
-	if (times++ < 10)
-		printk(KERN_ERR "FAULT[%s:%d]: 32-bit process "
-		       "reports 64-bit fault address [%lx]\n",
-		       current->comm, current->pid, addr);
-	show_regs(regs);
-}
-
 asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 {
 	struct mm_struct *mm = current->mm;
@@ -320,10 +308,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 				goto intr_or_no_mm;
 			}
 		}
-		if (unlikely((address >> 32) != 0)) {
-			bogus_32bit_fault_address(regs, address);
+		if (unlikely((address >> 32) != 0))
 			goto intr_or_no_mm;
-		}
 	}
 
 	if (regs->tstate & TSTATE_PRIV) {

From 720910a60cc2509160ec043459081f274c12ccce Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 7 May 2014 14:07:32 -0700
Subject: [PATCH 0163/1185] sparc64: Fix huge TSB mapping on pre-UltraSPARC-III
 cpus.

[ Upstream commit b18eb2d779240631a098626cb6841ee2dd34fda0 ]

Access to the TSB hash tables during TLB misses requires that there be
an atomic 128-bit quad load available so that we fetch a matching TAG
and DATA field at the same time.

On cpus prior to UltraSPARC-III only virtual address based quad loads
are available.  UltraSPARC-III and later provide physical address
based variants which are easier to use.

When we only have virtual address based quad loads available this
means that we have to lock the TSB into the TLB at a fixed virtual
address on each cpu when it runs that process.  We can't just access
the PAGE_OFFSET based aliased mapping of these TSBs because we cannot
take a recursive TLB miss inside of the TLB miss handler without
risking running out of hardware trap levels (some trap combinations
can be deep, such as those generated by register window spill and fill
traps).

Without huge pages it's working perfectly fine, but when the huge TSB
got added another chunk of fixed virtual address space was not
allocated for this second TSB mapping.

So we were mapping both the 8K and 4MB TSBs to the same exact virtual
address, causing multiple TLB matches which gives undefined behavior.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/include/asm/pgtable_64.h |  6 ++++--
 arch/sparc/mm/tsb.c                 | 14 +++++++++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index dfb0019bf05b..6663604a902a 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -24,7 +24,8 @@
 
 /* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB).
  * The page copy blockops can use 0x6000000 to 0x8000000.
- * The TSB is mapped in the 0x8000000 to 0xa000000 range.
+ * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range.
+ * The 4M TSB is mapped in the 0x8400000 to 0x8800000 range.
  * The PROM resides in an area spanning 0xf0000000 to 0x100000000.
  * The vmalloc area spans 0x100000000 to 0x200000000.
  * Since modules need to be in the lowest 32-bits of the address space,
@@ -33,7 +34,8 @@
  * 0x400000000.
  */
 #define	TLBTEMP_BASE		_AC(0x0000000006000000,UL)
-#define	TSBMAP_BASE		_AC(0x0000000008000000,UL)
+#define	TSBMAP_8K_BASE		_AC(0x0000000008000000,UL)
+#define	TSBMAP_4M_BASE		_AC(0x0000000008400000,UL)
 #define MODULES_VADDR		_AC(0x0000000010000000,UL)
 #define MODULES_LEN		_AC(0x00000000e0000000,UL)
 #define MODULES_END		_AC(0x00000000f0000000,UL)
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 2cc3bce5ee91..71d99a6c75a7 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -133,7 +133,19 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
 	mm->context.tsb_block[tsb_idx].tsb_nentries =
 		tsb_bytes / sizeof(struct tsb);
 
-	base = TSBMAP_BASE;
+	switch (tsb_idx) {
+	case MM_TSB_BASE:
+		base = TSBMAP_8K_BASE;
+		break;
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	case MM_TSB_HUGE:
+		base = TSBMAP_4M_BASE;
+		break;
+#endif
+	default:
+		BUG();
+	}
+
 	tte = pgprot_val(PAGE_KERNEL_LOCKED);
 	tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
 	BUG_ON(tsb_paddr & (tsb_bytes - 1UL));

From 921df8ed4c86310d215b62579703cc89741d6361 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 17 May 2014 11:28:05 -0700
Subject: [PATCH 0164/1185] sparc64: Add membar to Niagara2 memcpy code.

[ Upstream commit 5aa4ecfd0ddb1e6dcd1c886e6c49677550f581aa ]

This is the prevent previous stores from overlapping the block stores
done by the memcpy loop.

Based upon a glibc patch by Jose E. Marchesi

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/lib/NG2memcpy.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
index 2c20ad63ddbf..30eee6e8a81b 100644
--- a/arch/sparc/lib/NG2memcpy.S
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -236,6 +236,7 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 */
 	VISEntryHalf
 
+	membar		#Sync
 	alignaddr	%o1, %g0, %g0
 
 	add		%o1, (64 - 1), %o4

From 5b36a75bc06a00ede43bf8d9e921052357384072 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 4 Aug 2014 16:34:01 -0700
Subject: [PATCH 0165/1185] sparc64: Do not insert non-valid PTEs into the TSB
 hash table.

[ Upstream commit 18f38132528c3e603c66ea464727b29e9bbcb91b ]

The assumption was that update_mmu_cache() (and the equivalent for PMDs) would
only be called when the PTE being installed will be accessible by the user.

This is not true for code paths originating from remove_migration_pte().

There are dire consequences for placing a non-valid PTE into the TSB.  The TLB
miss frramework assumes thatwhen a TSB entry matches we can just load it into
the TLB and return from the TLB miss trap.

So if a non-valid PTE is in there, we will deadlock taking the TLB miss over
and over, never satisfying the miss.

Just exit early from update_mmu_cache() and friends in this situation.

Based upon a report and patch from Christopher Alexander Tobias Schulze.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/mm/init_64.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 04fd55a6e461..d6c1c85ff1c6 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -350,6 +350,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 
 	mm = vma->vm_mm;
 
+	/* Don't insert a non-valid PTE into the TSB, we'll deadlock.  */
+	if (!pte_accessible(mm, pte))
+		return;
+
 	spin_lock_irqsave(&mm->context.lock, flags);
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)

From 38f4577f05042a9151ae920d0107404fdb21e59a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 4 Aug 2014 20:07:37 -0700
Subject: [PATCH 0166/1185] sparc64: Guard against flushing openfirmware
 mappings.

[ Upstream commit 4ca9a23765da3260058db3431faf5b4efd8cf926 ]

Based almost entirely upon a patch by Christopher Alexander Tobias
Schulze.

In commit db64fe02258f1507e13fe5212a989922323685ce ("mm: rewrite vmap
layer") lazy VMAP tlb flushing was added to the vmalloc layer.  This
causes problems on sparc64.

Sparc64 has two VMAP mapped regions and they are not contiguous with
eachother.  First we have the malloc mapping area, then another
unrelated region, then the vmalloc region.

This "another unrelated region" is where the firmware is mapped.

If the lazy TLB flushing logic in the vmalloc code triggers after
we've had both a module unload and a vfree or similar, it will pass an
address range that goes from somewhere inside the malloc region to
somewhere inside the vmalloc region, and thus covering the
openfirmware area entirely.

The sparc64 kernel learns about openfirmware's dynamic mappings in
this region early in the boot, and then services TLB misses in this
area.  But openfirmware has some locked TLB entries which are not
mentioned in those dynamic mappings and we should thus not disturb
them.

These huge lazy TLB flush ranges causes those openfirmware locked TLB
entries to be removed, resulting in all kinds of problems including
hard hangs and crashes during reboot/reset.

Besides causing problems like this, such huge TLB flush ranges are
also incredibly inefficient.  A plea has been made with the author of
the VMAP lazy TLB flushing code, but for now we'll put a safety guard
into our flush_tlb_kernel_range() implementation.

Since the implementation has become non-trivial, stop defining it as a
macro and instead make it a function in a C source file.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/include/asm/tlbflush_64.h | 12 ++----------
 arch/sparc/mm/init_64.c              | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h
index f0d6a9700f4c..1a4bb971e06d 100644
--- a/arch/sparc/include/asm/tlbflush_64.h
+++ b/arch/sparc/include/asm/tlbflush_64.h
@@ -35,6 +35,8 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 {
 }
 
+void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
 #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 
 extern void flush_tlb_pending(void);
@@ -49,11 +51,6 @@ extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
 #ifndef CONFIG_SMP
 
-#define flush_tlb_kernel_range(start,end) \
-do {	flush_tsb_kernel_range(start,end); \
-	__flush_tlb_kernel_range(start,end); \
-} while (0)
-
 static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
 {
 	__flush_tlb_page(CTX_HWBITS(mm->context), vaddr);
@@ -64,11 +61,6 @@ static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vad
 extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 extern void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr);
 
-#define flush_tlb_kernel_range(start, end) \
-do {	flush_tsb_kernel_range(start,end); \
-	smp_flush_tlb_kernel_range(start, end); \
-} while (0)
-
 #define global_flush_tlb_page(mm, vaddr) \
 	smp_flush_tlb_page(mm, vaddr)
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index d6c1c85ff1c6..a751023dbdcd 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2768,3 +2768,26 @@ void hugetlb_setup(struct pt_regs *regs)
 	}
 }
 #endif
+
+#ifdef CONFIG_SMP
+#define do_flush_tlb_kernel_range	smp_flush_tlb_kernel_range
+#else
+#define do_flush_tlb_kernel_range	__flush_tlb_kernel_range
+#endif
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	if (start < HI_OBP_ADDRESS && end > LOW_OBP_ADDRESS) {
+		if (start < LOW_OBP_ADDRESS) {
+			flush_tsb_kernel_range(start, LOW_OBP_ADDRESS);
+			do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS);
+		}
+		if (end > HI_OBP_ADDRESS) {
+			flush_tsb_kernel_range(end, HI_OBP_ADDRESS);
+			do_flush_tlb_kernel_range(end, HI_OBP_ADDRESS);
+		}
+	} else {
+		flush_tsb_kernel_range(start, end);
+		do_flush_tlb_kernel_range(start, end);
+	}
+}

From c1853d6752caa7843dfd51121df1f95697514499 Mon Sep 17 00:00:00 2001
From: Christopher Alexander Tobias Schulze <cat.schulze@alice-dsl.net>
Date: Sun, 3 Aug 2014 15:44:52 +0200
Subject: [PATCH 0167/1185] bbc-i2c: Fix BBC I2C envctrl on SunBlade 2000

[ Upstream commit 5cdceab3d5e02eb69ea0f5d8fa9181800baf6f77 ]

Fix regression in bbc i2c temperature and fan control on some Sun systems
that causes the driver to refuse to load due to the bbc_i2c_bussel resource not
being present on the (second) i2c bus where the temperature sensors and fan
control are located. (The check for the number of resources was removed when
the driver was ported to a pure OF driver in mid 2008.)

Signed-off-by: Christopher Alexander Tobias Schulze <cat.schulze@alice-dsl.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/sbus/char/bbc_envctrl.c |  6 ++++++
 drivers/sbus/char/bbc_i2c.c     | 11 ++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c
index 160e7510aca6..0787b9756165 100644
--- a/drivers/sbus/char/bbc_envctrl.c
+++ b/drivers/sbus/char/bbc_envctrl.c
@@ -452,6 +452,9 @@ static void attach_one_temp(struct bbc_i2c_bus *bp, struct platform_device *op,
 	if (!tp)
 		return;
 
+	INIT_LIST_HEAD(&tp->bp_list);
+	INIT_LIST_HEAD(&tp->glob_list);
+
 	tp->client = bbc_i2c_attach(bp, op);
 	if (!tp->client) {
 		kfree(tp);
@@ -497,6 +500,9 @@ static void attach_one_fan(struct bbc_i2c_bus *bp, struct platform_device *op,
 	if (!fp)
 		return;
 
+	INIT_LIST_HEAD(&fp->bp_list);
+	INIT_LIST_HEAD(&fp->glob_list);
+
 	fp->client = bbc_i2c_attach(bp, op);
 	if (!fp->client) {
 		kfree(fp);
diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c
index c1441ed282eb..e0e6cd605cca 100644
--- a/drivers/sbus/char/bbc_i2c.c
+++ b/drivers/sbus/char/bbc_i2c.c
@@ -301,13 +301,18 @@ static struct bbc_i2c_bus * attach_one_i2c(struct platform_device *op, int index
 	if (!bp)
 		return NULL;
 
+	INIT_LIST_HEAD(&bp->temps);
+	INIT_LIST_HEAD(&bp->fans);
+
 	bp->i2c_control_regs = of_ioremap(&op->resource[0], 0, 0x2, "bbc_i2c_regs");
 	if (!bp->i2c_control_regs)
 		goto fail;
 
-	bp->i2c_bussel_reg = of_ioremap(&op->resource[1], 0, 0x1, "bbc_i2c_bussel");
-	if (!bp->i2c_bussel_reg)
-		goto fail;
+	if (op->num_resources == 2) {
+		bp->i2c_bussel_reg = of_ioremap(&op->resource[1], 0, 0x1, "bbc_i2c_bussel");
+		if (!bp->i2c_bussel_reg)
+			goto fail;
+	}
 
 	bp->waiting = 0;
 	init_waitqueue_head(&bp->wq);

From ff0adae5df0ff94592f03f84c952e624f0945a7c Mon Sep 17 00:00:00 2001
From: Christopher Alexander Tobias Schulze <cat.schulze@alice-dsl.net>
Date: Sun, 3 Aug 2014 16:01:53 +0200
Subject: [PATCH 0168/1185] sunsab: Fix detection of BREAK on sunsab serial
 console

[ Upstream commit fe418231b195c205701c0cc550a03f6c9758fd9e ]

Fix detection of BREAK on sunsab serial console: BREAK detection was only
performed when there were also serial characters received simultaneously.
To handle all BREAKs correctly, the check for BREAK and the corresponding
call to uart_handle_break() must also be done if count == 0, therefore
duplicate this code fragment and pull it out of the loop over the received
characters.

Patch applies to 3.16-rc6.

Signed-off-by: Christopher Alexander Tobias Schulze <cat.schulze@alice-dsl.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sunsab.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c
index a422c8b55a47..aa53fee1df63 100644
--- a/drivers/tty/serial/sunsab.c
+++ b/drivers/tty/serial/sunsab.c
@@ -157,6 +157,15 @@ receive_chars(struct uart_sunsab_port *up,
 	    (up->port.line == up->port.cons->index))
 		saw_console_brk = 1;
 
+	if (count == 0) {
+		if (unlikely(stat->sreg.isr1 & SAB82532_ISR1_BRK)) {
+			stat->sreg.isr0 &= ~(SAB82532_ISR0_PERR |
+					     SAB82532_ISR0_FERR);
+			up->port.icount.brk++;
+			uart_handle_break(&up->port);
+		}
+	}
+
 	for (i = 0; i < count; i++) {
 		unsigned char ch = buf[i], flag;
 

From 87c64964c0f424ec6b992f4d81516d698eb0a413 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Fri, 1 Aug 2014 09:50:40 -0400
Subject: [PATCH 0169/1185] sparc64: ldc_connect() should not return EINVAL
 when handshake is in progress.

[ Upstream commit 4ec1b01029b4facb651b8ef70bc20a4be4cebc63 ]

The LDC handshake could have been asynchronously triggered
after ldc_bind() enables the ldc_rx() receive interrupt-handler
(and thus intercepts incoming control packets)
and before vio_port_up() calls ldc_connect(). If that is the case,
ldc_connect() should return 0 and let the state-machine
progress.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Karl Volz <karl.volz@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/kernel/ldc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 54df554b82d9..fa4c900a0d1f 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -1336,7 +1336,7 @@ int ldc_connect(struct ldc_channel *lp)
 	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
 	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
 	    lp->hs_state != LDC_HS_OPEN)
-		err = -EINVAL;
+		err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
 	else
 		err = start_handshake(lp);
 

From df6023479d431628fff9bd425ecc05ee593561c1 Mon Sep 17 00:00:00 2001
From: Andrey Utkin <andrey.krieger.utkin@gmail.com>
Date: Mon, 4 Aug 2014 23:47:41 +0300
Subject: [PATCH 0170/1185] arch/sparc/math-emu/math_32.c: drop stray break
 operator

[ Upstream commit 093758e3daede29cb4ce6aedb111becf9d4bfc57 ]

This commit is a guesswork, but it seems to make sense to drop this
break, as otherwise the following line is never executed and becomes
dead code. And that following line actually saves the result of
local calculation by the pointer given in function argument. So the
proposed change makes sense if this code in the whole makes sense (but I
am unable to analyze it in the whole).

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=81641
Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Andrey Utkin <andrey.krieger.utkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/math-emu/math_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c
index aa4d55b0bdf0..5ce8f2f64604 100644
--- a/arch/sparc/math-emu/math_32.c
+++ b/arch/sparc/math-emu/math_32.c
@@ -499,7 +499,7 @@ static int do_one_mathemu(u32 insn, unsigned long *pfsr, unsigned long *fregs)
 		case 0: fsr = *pfsr;
 			if (IR == -1) IR = 2;
 			/* fcc is always fcc0 */
-			fsr &= ~0xc00; fsr |= (IR << 10); break;
+			fsr &= ~0xc00; fsr |= (IR << 10);
 			*pfsr = fsr;
 			break;
 		case 1: rd->s = IR; break;

From 7f363d2d04aa06a58619ffb5f22a84ae4f362c17 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 14 Aug 2014 09:24:29 +0800
Subject: [PATCH 0171/1185] Linux 3.10.53

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b94f00938acc..2ac415a7e937 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 52
+SUBLEVEL = 53
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From 3b947a25956554edf17da7d2cdce0828114cee2a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Mon, 18 Aug 2014 11:54:38 -0500
Subject: [PATCH 0172/1185] configs: Enable fanotify

Some LTP tests use it.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 linaro/configs/linaro-base.conf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/linaro/configs/linaro-base.conf b/linaro/configs/linaro-base.conf
index 0620d5ec3e1c..bbd0f160398f 100644
--- a/linaro/configs/linaro-base.conf
+++ b/linaro/configs/linaro-base.conf
@@ -115,3 +115,4 @@ CONFIG_SECURITY_SELINUX=y
 CONFIG_EXT4_FS_SECURITY=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_DEBUG_INFO=y
+CONFIG_FANOTIFY=y

From df77f00e9623348a89970c00bdb0b3d72789bfa8 Mon Sep 17 00:00:00 2001
From: Maxime Bizon <mbizon@freebox.fr>
Date: Thu, 29 Aug 2013 20:28:13 +0200
Subject: [PATCH 0173/1185] firmware loader: fix pending_fw_head list
 corruption

Got the following oops just before reboot:

Unable to handle kernel NULL pointer dereference at virtual address 00000000
[<8028d300>] (__list_del_entry+0x44/0xac)
[<802e3320>] (__fw_load_abort.part.13+0x1c/0x50)
[<802e337c>] (fw_shutdown_notify+0x28/0x50)
[<80034f80>] (notifier_call_chain.isra.1+0x5c/0x9c)
[<800350ec>] (__blocking_notifier_call_chain+0x44/0x58)
[<80035114>] (blocking_notifier_call_chain+0x14/0x18)
[<80035d64>] (kernel_restart_prepare+0x14/0x38)
[<80035d94>] (kernel_restart+0xc/0x50)

The following race condition triggers here:

  _request_firmware_load()
  device_create_file(...)
  kobject_uevent(...)
  (schedule)
                                       (resume)
                                       firmware_loading_store(1)
                                       firmware_loading_store(0)
                                       list_del_init(&buf->pending_list)
                                       (schedule)
  (resume)
  list_add(&buf->pending_list, &pending_fw_head);
  wait_for_completion(&buf->completion);

causing an oops later when walking pending_list after the firmware has
been released.

The proposed fix is to move the list_add() before sysfs attribute
creation.

Signed-off-by: Maxime Bizon <mbizon@freebox.fr>
Acked-by: Ming Lei <ming.lei@canonical.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_class.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 55d682e6ecea..d7872b96019d 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -884,8 +884,15 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
 		goto err_del_dev;
 	}
 
+	mutex_lock(&fw_lock);
+	list_add(&buf->pending_list, &pending_fw_head);
+	mutex_unlock(&fw_lock);
+
 	retval = device_create_file(f_dev, &dev_attr_loading);
 	if (retval) {
+		mutex_lock(&fw_lock);
+		list_del_init(&buf->pending_list);
+		mutex_unlock(&fw_lock);
 		dev_err(f_dev, "%s: device_create_file failed\n", __func__);
 		goto err_del_bin_attr;
 	}
@@ -899,10 +906,6 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
 		kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD);
 	}
 
-	mutex_lock(&fw_lock);
-	list_add(&buf->pending_list, &pending_fw_head);
-	mutex_unlock(&fw_lock);
-
 	wait_for_completion(&buf->completion);
 
 	cancel_delayed_work_sync(&fw_priv->timeout_work);

From c0964a59ad8b55def7f6dbdb0d1b68fddd131226 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 4 Aug 2014 16:42:51 +0100
Subject: [PATCH 0174/1185] usb: Fix H20AHB driver for big-endian

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/usb/host/ehci-h20ahb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/ehci-h20ahb.c b/drivers/usb/host/ehci-h20ahb.c
index 3ee3c7aa6e5b..7724bab1828b 100644
--- a/drivers/usb/host/ehci-h20ahb.c
+++ b/drivers/usb/host/ehci-h20ahb.c
@@ -58,12 +58,12 @@ struct h20ahb_hcd {
 
 static inline void ehci_write(void __iomem *base, u32 reg, u32 val)
 {
-	__raw_writel(val, base + reg);
+	writel_relaxed(val, base + reg);
 }
 
 static inline u32 ehci_read(void __iomem *base, u32 reg)
 {
-	return __raw_readl(base + reg);
+	return readl_relaxed(base + reg);
 }
 
 /* configure so an HC device and id are always provided */

From eb56dbabd13b33a681ff0393b8af108d831ace1d Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Thu, 12 Jun 2014 22:30:34 +0530
Subject: [PATCH 0175/1185] mailbox: rename pl320-ipc specific mailbox.h

The patch 30058677 "ARM / highbank: add support for pl320 IPC"
added a pl320 IPC specific header file as a generic mailbox.h.
This file has been renamed appropriately to allow the
introduction of the generic mailbox API framework.

Acked-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Mark Brown <broonie@linaro.org>

Conflicts:
	arch/arm/mach-highbank/highbank.c
---
 arch/arm/mach-highbank/highbank.c        | 1 +
 drivers/cpufreq/highbank-cpufreq.c       | 2 +-
 drivers/mailbox/pl320-ipc.c              | 2 +-
 include/linux/{mailbox.h => pl320-ipc.h} | 0
 4 files changed, 3 insertions(+), 2 deletions(-)
 rename include/linux/{mailbox.h => pl320-ipc.h} (100%)

diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index e7df2dd43a40..eec13a1fbb25 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
+#include <linux/pl320-ipc.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c
index b61b5a3fad64..3118b87a37bc 100644
--- a/drivers/cpufreq/highbank-cpufreq.c
+++ b/drivers/cpufreq/highbank-cpufreq.c
@@ -19,7 +19,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 #include <linux/platform_device.h>
 
 #define HB_CPUFREQ_CHANGE_NOTE	0x80000001
diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c
index d873cbae2fbb..f3755e0aa935 100644
--- a/drivers/mailbox/pl320-ipc.c
+++ b/drivers/mailbox/pl320-ipc.c
@@ -26,7 +26,7 @@
 #include <linux/device.h>
 #include <linux/amba/bus.h>
 
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 
 #define IPCMxSOURCE(m)		((m) * 0x40)
 #define IPCMxDSET(m)		(((m) * 0x40) + 0x004)
diff --git a/include/linux/mailbox.h b/include/linux/pl320-ipc.h
similarity index 100%
rename from include/linux/mailbox.h
rename to include/linux/pl320-ipc.h

From c3b04f9d63bd8c1366cbe971b38759fa9938830b Mon Sep 17 00:00:00 2001
From: xerox_lin <xerox_lin@htc.com>
Date: Mon, 18 Aug 2014 21:54:23 +0800
Subject: [PATCH 0176/1185] USB: rndis: Free the rndis response queue during
 REMOTE_NDIS_RESET_MSG

When rndis data transfer is in progress, some Windows7 Host PC is not
sending the GET_ENCAPSULATED_RESPONSE command for receiving the response
for the previous SEND_ENCAPSULATED_COMMAND processed.

The rndis function driver appends each response for the
SEND_ENCAPSULATED_COMMAND in a queue. As the above process got corrupted,
the Host sends a REMOTE_NDIS_RESET_MSG command to do a soft-reset.
As the rndis response queue is not freed, the previous response is sent
as a part of this REMOTE_NDIS_RESET_MSG's reset response and the Host
blocks any more Rndis transfers.

Hence free the rndis response queue as a part of this soft-reset so that
the current response for REMOTE_NDIS_RESET_MSG is sent properly during the
response command.

Change-Id: I8eff3849db452fe01b7d1fe4140ef1f1ad3f4fd4
Signed-off-by: Rajkumar Raghupathy <raghup@codeaurora.org>
Signed-off-by: Xerox Lin <xerox_lin@htc.com>
---
 drivers/usb/gadget/rndis.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
index 693f0c24d516..5378dc6574fb 100644
--- a/drivers/usb/gadget/rndis.c
+++ b/drivers/usb/gadget/rndis.c
@@ -686,6 +686,12 @@ static int rndis_reset_response(int configNr, rndis_reset_msg_type *buf)
 	rndis_reset_cmplt_type *resp;
 	rndis_resp_t *r;
 	struct rndis_params *params = rndis_per_dev_params + configNr;
+	u32 length;
+	u8 *xbuf;
+
+	/* drain the response queue */
+	while ((xbuf = rndis_get_next_response(configNr, &length)))
+		rndis_free_response(configNr, xbuf);
 
 	r = rndis_add_response(configNr, sizeof(rndis_reset_cmplt_type));
 	if (!r)

From 919668e64fd0332465b05b4a5aa08cd7a1d94696 Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Thu, 12 Jun 2014 22:31:19 +0530
Subject: [PATCH 0177/1185] mailbox: Introduce framework for mailbox

Introduce common framework for client/protocol drivers and
controller drivers of Inter-Processor-Communication (IPC).

Client driver developers should have a look at
 include/linux/mailbox_client.h to understand the part of
the API exposed to client drivers.
Similarly controller driver developers should have a look
at include/linux/mailbox_controller.h

Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 MAINTAINERS                        |   8 +
 drivers/mailbox/Makefile           |   4 +
 drivers/mailbox/mailbox.c          | 467 +++++++++++++++++++++++++++++
 include/linux/mailbox_client.h     |  46 +++
 include/linux/mailbox_controller.h | 135 +++++++++
 5 files changed, 660 insertions(+)
 create mode 100644 drivers/mailbox/mailbox.c
 create mode 100644 include/linux/mailbox_client.h
 create mode 100644 include/linux/mailbox_controller.h

diff --git a/MAINTAINERS b/MAINTAINERS
index ad7e322ad17b..d5a14e676330 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5152,6 +5152,14 @@ S:	Maintained
 F:	drivers/net/macvlan.c
 F:	include/linux/if_macvlan.h
 
+MAILBOX API
+M:	Jassi Brar <jassisinghbrar@gmail.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/mailbox/
+F:	include/linux/mailbox_client.h
+F:	include/linux/mailbox_controller.h
+
 MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
 M:	Michael Kerrisk <mtk.manpages@gmail.com>
 W:	http://www.kernel.org/doc/man-pages
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 543ad6a79505..fefef7ebcbec 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -1 +1,5 @@
+# Generic MAILBOX API
+
+obj-$(CONFIG_MAILBOX)		+= mailbox.o
+
 obj-$(CONFIG_PL320_MBOX)	+= pl320-ipc.o
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
new file mode 100644
index 000000000000..9a937ef35068
--- /dev/null
+++ b/drivers/mailbox/mailbox.c
@@ -0,0 +1,467 @@
+/*
+ * Mailbox: Common code for Mailbox controllers and users
+ *
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitops.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox_controller.h>
+
+#define TXDONE_BY_IRQ	BIT(0) /* controller has remote RTR irq */
+#define TXDONE_BY_POLL	BIT(1) /* controller can read status of last TX */
+#define TXDONE_BY_ACK	BIT(2) /* S/W ACK recevied by Client ticks the TX */
+
+static LIST_HEAD(mbox_cons);
+static DEFINE_MUTEX(con_mutex);
+
+static int add_to_rbuf(struct mbox_chan *chan, void *mssg)
+{
+	int idx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* See if there is any space left */
+	if (chan->msg_count == MBOX_TX_QUEUE_LEN) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -ENOBUFS;
+	}
+
+	idx = chan->msg_free;
+	chan->msg_data[idx] = mssg;
+	chan->msg_count++;
+
+	if (idx == MBOX_TX_QUEUE_LEN - 1)
+		chan->msg_free = 0;
+	else
+		chan->msg_free++;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return idx;
+}
+
+static void msg_submit(struct mbox_chan *chan)
+{
+	unsigned count, idx;
+	unsigned long flags;
+	void *data;
+	int err;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	if (!chan->msg_count || chan->active_req)
+		goto exit;
+
+	count = chan->msg_count;
+	idx = chan->msg_free;
+	if (idx >= count)
+		idx -= count;
+	else
+		idx += MBOX_TX_QUEUE_LEN - count;
+
+	data = chan->msg_data[idx];
+
+	/* Try to submit a message to the MBOX controller */
+	err = chan->mbox->ops->send_data(chan, data);
+	if (!err) {
+		chan->active_req = data;
+		chan->msg_count--;
+	}
+exit:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void tx_tick(struct mbox_chan *chan, int r)
+{
+	unsigned long flags;
+	void *mssg;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	mssg = chan->active_req;
+	chan->active_req = NULL;
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	/* Submit next message */
+	msg_submit(chan);
+
+	/* Notify the client */
+	if (mssg && chan->cl->tx_done)
+		chan->cl->tx_done(chan->cl, mssg, r);
+
+	if (chan->cl->tx_block)
+		complete(&chan->tx_complete);
+}
+
+static void poll_txdone(unsigned long data)
+{
+	struct mbox_controller *mbox = (struct mbox_controller *)data;
+	bool txdone, resched = false;
+	int i;
+
+	for (i = 0; i < mbox->num_chans; i++) {
+		struct mbox_chan *chan = &mbox->chans[i];
+
+		if (chan->active_req && chan->cl) {
+			resched = true;
+			txdone = chan->mbox->ops->last_tx_done(chan);
+			if (txdone)
+				tx_tick(chan, 0);
+		}
+	}
+
+	if (resched)
+		mod_timer(&mbox->poll, jiffies +
+				msecs_to_jiffies(mbox->period));
+}
+
+/**
+ * mbox_chan_received_data - A way for controller driver to push data
+ *				received from remote to the upper layer.
+ * @chan: Pointer to the mailbox channel on which RX happened.
+ * @mssg: Client specific message typecasted as void *
+ *
+ * After startup and before shutdown any data received on the chan
+ * is passed on to the API via atomic mbox_chan_received_data().
+ * The controller should ACK the RX only after this call returns.
+ */
+void mbox_chan_received_data(struct mbox_chan *chan, void *mssg)
+{
+	/* No buffering the received data */
+	if (chan->cl->rx_callback)
+		chan->cl->rx_callback(chan->cl, mssg);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_received_data);
+
+/**
+ * mbox_chan_txdone - A way for controller driver to notify the
+ *			framework that the last TX has completed.
+ * @chan: Pointer to the mailbox chan on which TX happened.
+ * @r: Status of last TX - OK or ERROR
+ *
+ * The controller that has IRQ for TX ACK calls this atomic API
+ * to tick the TX state machine. It works only if txdone_irq
+ * is set by the controller.
+ */
+void mbox_chan_txdone(struct mbox_chan *chan, int r)
+{
+	if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) {
+		dev_err(chan->mbox->dev,
+		       "Controller can't run the TX ticker\n");
+		return;
+	}
+
+	tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_txdone);
+
+/**
+ * mbox_client_txdone - The way for a client to run the TX state machine.
+ * @chan: Mailbox channel assigned to this client.
+ * @r: Success status of last transmission.
+ *
+ * The client/protocol had received some 'ACK' packet and it notifies
+ * the API that the last packet was sent successfully. This only works
+ * if the controller can't sense TX-Done.
+ */
+void mbox_client_txdone(struct mbox_chan *chan, int r)
+{
+	if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) {
+		dev_err(chan->mbox->dev, "Client can't run the TX ticker\n");
+		return;
+	}
+
+	tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_client_txdone);
+
+/**
+ * mbox_client_peek_data - A way for client driver to pull data
+ *			received from remote by the controller.
+ * @chan: Mailbox channel assigned to this client.
+ *
+ * A poke to controller driver for any received data.
+ * The data is actually passed onto client via the
+ * mbox_chan_received_data()
+ * The call can be made from atomic context, so the controller's
+ * implementation of peek_data() must not sleep.
+ *
+ * Return: True, if controller has, and is going to push after this,
+ *          some data.
+ *         False, if controller doesn't have any data to be read.
+ */
+bool mbox_client_peek_data(struct mbox_chan *chan)
+{
+	if (chan->mbox->ops->peek_data)
+		return chan->mbox->ops->peek_data(chan);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(mbox_client_peek_data);
+
+/**
+ * mbox_send_message -	For client to submit a message to be
+ *				sent to the remote.
+ * @chan: Mailbox channel assigned to this client.
+ * @mssg: Client specific message typecasted.
+ *
+ * For client to submit data to the controller destined for a remote
+ * processor. If the client had set 'tx_block', the call will return
+ * either when the remote receives the data or when 'tx_tout' millisecs
+ * run out.
+ *  In non-blocking mode, the requests are buffered by the API and a
+ * non-negative token is returned for each queued request. If the request
+ * is not queued, a negative token is returned. Upon failure or successful
+ * TX, the API calls 'tx_done' from atomic context, from which the client
+ * could submit yet another request.
+ * The pointer to message should be preserved until it is sent
+ * over the chan, i.e, tx_done() is made.
+ * This function could be called from atomic context as it simply
+ * queues the data and returns a token against the request.
+ *
+ * Return: Non-negative integer for successful submission (non-blocking mode)
+ *	or transmission over chan (blocking mode).
+ *	Negative value denotes failure.
+ */
+int mbox_send_message(struct mbox_chan *chan, void *mssg)
+{
+	int t;
+
+	if (!chan || !chan->cl)
+		return -EINVAL;
+
+	t = add_to_rbuf(chan, mssg);
+	if (t < 0) {
+		dev_err(chan->mbox->dev, "Try increasing MBOX_TX_QUEUE_LEN\n");
+		return t;
+	}
+
+	msg_submit(chan);
+
+	INIT_COMPLETION(chan->tx_complete);
+
+	if (chan->txdone_method	== TXDONE_BY_POLL)
+		poll_txdone((unsigned long)chan->mbox);
+
+	if (chan->cl->tx_block && chan->active_req) {
+		unsigned long wait;
+		int ret;
+
+		if (!chan->cl->tx_tout) /* wait forever */
+			wait = msecs_to_jiffies(3600000);
+		else
+			wait = msecs_to_jiffies(chan->cl->tx_tout);
+
+		ret = wait_for_completion_timeout(&chan->tx_complete, wait);
+		if (ret == 0) {
+			t = -EIO;
+			tx_tick(chan, -EIO);
+		}
+	}
+
+	return t;
+}
+EXPORT_SYMBOL_GPL(mbox_send_message);
+
+/**
+ * mbox_request_channel - Request a mailbox channel.
+ * @cl: Identity of the client requesting the channel.
+ * @index: Index of mailbox specifier in 'mboxes' property.
+ *
+ * The Client specifies its requirements and capabilities while asking for
+ * a mailbox channel. It can't be called from atomic context.
+ * The channel is exclusively allocated and can't be used by another
+ * client before the owner calls mbox_free_channel.
+ * After assignment, any packet received on this channel will be
+ * handed over to the client via the 'rx_callback'.
+ * The framework holds reference to the client, so the mbox_client
+ * structure shouldn't be modified until the mbox_free_channel returns.
+ *
+ * Return: Pointer to the channel assigned to the client if successful.
+ *		ERR_PTR for request failure.
+ */
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
+{
+	struct device *dev = cl->dev;
+	struct mbox_controller *mbox;
+	struct of_phandle_args spec;
+	struct mbox_chan *chan;
+	unsigned long flags;
+	int ret;
+
+	if (!dev || !dev->of_node) {
+		pr_debug("%s: No owner device node\n", __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
+	mutex_lock(&con_mutex);
+
+	if (of_parse_phandle_with_args(dev->of_node, "mboxes",
+				       "#mbox-cells", index, &spec)) {
+		dev_dbg(dev, "%s: can't parse \"mboxes\" property\n", __func__);
+		mutex_unlock(&con_mutex);
+		return ERR_PTR(-ENODEV);
+	}
+
+	chan = NULL;
+	list_for_each_entry(mbox, &mbox_cons, node)
+		if (mbox->dev->of_node == spec.np) {
+			chan = mbox->of_xlate(mbox, &spec);
+			break;
+		}
+
+	of_node_put(spec.np);
+
+	if (!chan || chan->cl || !try_module_get(mbox->dev->driver->owner)) {
+		dev_dbg(dev, "%s: mailbox not free\n", __func__);
+		mutex_unlock(&con_mutex);
+		return ERR_PTR(-EBUSY);
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->msg_free = 0;
+	chan->msg_count = 0;
+	chan->active_req = NULL;
+	chan->cl = cl;
+	init_completion(&chan->tx_complete);
+
+	if (chan->txdone_method	== TXDONE_BY_POLL && cl->knows_txdone)
+		chan->txdone_method |= TXDONE_BY_ACK;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	ret = chan->mbox->ops->startup(chan);
+	if (ret) {
+		dev_err(dev, "Unable to startup the chan (%d)\n", ret);
+		mbox_free_channel(chan);
+		chan = ERR_PTR(ret);
+	}
+
+	mutex_unlock(&con_mutex);
+	return chan;
+}
+EXPORT_SYMBOL_GPL(mbox_request_channel);
+
+/**
+ * mbox_free_channel - The client relinquishes control of a mailbox
+ *			channel by this call.
+ * @chan: The mailbox channel to be freed.
+ */
+void mbox_free_channel(struct mbox_chan *chan)
+{
+	unsigned long flags;
+
+	if (!chan || !chan->cl)
+		return;
+
+	chan->mbox->ops->shutdown(chan);
+
+	/* The queued TX requests are simply aborted, no callbacks are made */
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->cl = NULL;
+	chan->active_req = NULL;
+	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
+		chan->txdone_method = TXDONE_BY_POLL;
+
+	module_put(chan->mbox->dev->driver->owner);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mbox_free_channel);
+
+static struct mbox_chan *
+of_mbox_index_xlate(struct mbox_controller *mbox,
+		    const struct of_phandle_args *sp)
+{
+	int ind = sp->args[0];
+
+	if (ind >= mbox->num_chans)
+		return NULL;
+
+	return &mbox->chans[ind];
+}
+
+/**
+ * mbox_controller_register - Register the mailbox controller
+ * @mbox:	Pointer to the mailbox controller.
+ *
+ * The controller driver registers its communication channels
+ */
+int mbox_controller_register(struct mbox_controller *mbox)
+{
+	int i, txdone;
+
+	/* Sanity check */
+	if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans)
+		return -EINVAL;
+
+	if (mbox->txdone_irq)
+		txdone = TXDONE_BY_IRQ;
+	else if (mbox->txdone_poll)
+		txdone = TXDONE_BY_POLL;
+	else /* It has to be ACK then */
+		txdone = TXDONE_BY_ACK;
+
+	if (txdone == TXDONE_BY_POLL) {
+		mbox->poll.function = &poll_txdone;
+		mbox->poll.data = (unsigned long)mbox;
+		init_timer(&mbox->poll);
+	}
+
+	for (i = 0; i < mbox->num_chans; i++) {
+		struct mbox_chan *chan = &mbox->chans[i];
+
+		chan->cl = NULL;
+		chan->mbox = mbox;
+		chan->txdone_method = txdone;
+		spin_lock_init(&chan->lock);
+	}
+
+	if (!mbox->of_xlate)
+		mbox->of_xlate = of_mbox_index_xlate;
+
+	mutex_lock(&con_mutex);
+	list_add_tail(&mbox->node, &mbox_cons);
+	mutex_unlock(&con_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mbox_controller_register);
+
+/**
+ * mbox_controller_unregister - Unregister the mailbox controller
+ * @mbox:	Pointer to the mailbox controller.
+ */
+void mbox_controller_unregister(struct mbox_controller *mbox)
+{
+	int i;
+
+	if (!mbox)
+		return;
+
+	mutex_lock(&con_mutex);
+
+	list_del(&mbox->node);
+
+	for (i = 0; i < mbox->num_chans; i++)
+		mbox_free_channel(&mbox->chans[i]);
+
+	if (mbox->txdone_poll)
+		del_timer_sync(&mbox->poll);
+
+	mutex_unlock(&con_mutex);
+}
+EXPORT_SYMBOL_GPL(mbox_controller_unregister);
diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
new file mode 100644
index 000000000000..307d9cab2026
--- /dev/null
+++ b/include/linux/mailbox_client.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CLIENT_H
+#define __MAILBOX_CLIENT_H
+
+#include <linux/of.h>
+#include <linux/device.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_client - User of a mailbox
+ * @dev:		The client device
+ * @tx_block:		If the mbox_send_message should block until data is
+ *			transmitted.
+ * @tx_tout:		Max block period in ms before TX is assumed failure
+ * @knows_txdone:	If the client could run the TX state machine. Usually
+ *			if the client receives some ACK packet for transmission.
+ *			Unused if the controller already has TX_Done/RTR IRQ.
+ * @rx_callback:	Atomic callback to provide client the data received
+ * @tx_done:		Atomic callback to tell client of data transmission
+ */
+struct mbox_client {
+	struct device *dev;
+	bool tx_block;
+	unsigned long tx_tout;
+	bool knows_txdone;
+
+	void (*rx_callback)(struct mbox_client *cl, void *mssg);
+	void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
+};
+
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index);
+int mbox_send_message(struct mbox_chan *chan, void *mssg);
+void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */
+bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */
+void mbox_free_channel(struct mbox_chan *chan); /* may sleep */
+
+#endif /* __MAILBOX_CLIENT_H */
diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
new file mode 100644
index 000000000000..9ee195b02444
--- /dev/null
+++ b/include/linux/mailbox_controller.h
@@ -0,0 +1,135 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CONTROLLER_H
+#define __MAILBOX_CONTROLLER_H
+
+#include <linux/of.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#include <linux/completion.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_chan_ops - methods to control mailbox channels
+ * @send_data:	The API asks the MBOX controller driver, in atomic
+ *		context try to transmit a message on the bus. Returns 0 if
+ *		data is accepted for transmission, -EBUSY while rejecting
+ *		if the remote hasn't yet read the last data sent. Actual
+ *		transmission of data is reported by the controller via
+ *		mbox_chan_txdone (if it has some TX ACK irq). It must not
+ *		sleep.
+ * @startup:	Called when a client requests the chan. The controller
+ *		could ask clients for additional parameters of communication
+ *		to be provided via client's chan_data. This call may
+ *		block. After this call the Controller must forward any
+ *		data received on the chan by calling mbox_chan_received_data.
+ *		The controller may do stuff that need to sleep.
+ * @shutdown:	Called when a client relinquishes control of a chan.
+ *		This call may block too. The controller must not forward
+ *		any received data anymore.
+ *		The controller may do stuff that need to sleep.
+ * @last_tx_done: If the controller sets 'txdone_poll', the API calls
+ *		  this to poll status of last TX. The controller must
+ *		  give priority to IRQ method over polling and never
+ *		  set both txdone_poll and txdone_irq. Only in polling
+ *		  mode 'send_data' is expected to return -EBUSY.
+ *		  The controller may do stuff that need to sleep/block.
+ *		  Used only if txdone_poll:=true && txdone_irq:=false
+ * @peek_data: Atomic check for any received data. Return true if controller
+ *		  has some data to push to the client. False otherwise.
+ */
+struct mbox_chan_ops {
+	int (*send_data)(struct mbox_chan *chan, void *data);
+	int (*startup)(struct mbox_chan *chan);
+	void (*shutdown)(struct mbox_chan *chan);
+	bool (*last_tx_done)(struct mbox_chan *chan);
+	bool (*peek_data)(struct mbox_chan *chan);
+};
+
+/**
+ * struct mbox_controller - Controller of a class of communication channels
+ * @dev:		Device backing this controller
+ * @ops:		Operators that work on each communication chan
+ * @chans:		Array of channels
+ * @num_chans:		Number of channels in the 'chans' array.
+ * @txdone_irq:		Indicates if the controller can report to API when
+ *			the last transmitted data was read by the remote.
+ *			Eg, if it has some TX ACK irq.
+ * @txdone_poll:	If the controller can read but not report the TX
+ *			done. Ex, some register shows the TX status but
+ *			no interrupt rises. Ignored if 'txdone_irq' is set.
+ * @txpoll_period:	If 'txdone_poll' is in effect, the API polls for
+ *			last TX's status after these many millisecs
+ * @of_xlate:		Controller driver specific mapping of channel via DT
+ * @poll:		API private. Used to poll for TXDONE on all channels.
+ * @period:		API private. Polling period.
+ * @node:		API private. To hook into list of controllers.
+ */
+struct mbox_controller {
+	struct device *dev;
+	struct mbox_chan_ops *ops;
+	struct mbox_chan *chans;
+	int num_chans;
+	bool txdone_irq;
+	bool txdone_poll;
+	unsigned txpoll_period;
+	struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox,
+				      const struct of_phandle_args *sp);
+	/* Internal to API */
+	struct timer_list poll;
+	unsigned period;
+	struct list_head node;
+};
+
+/*
+ * The length of circular buffer for queuing messages from a client.
+ * 'msg_count' tracks the number of buffered messages while 'msg_free'
+ * is the index where the next message would be buffered.
+ * We shouldn't need it too big because every transfer is interrupt
+ * triggered and if we have lots of data to transfer, the interrupt
+ * latencies are going to be the bottleneck, not the buffer length.
+ * Besides, mbox_send_message could be called from atomic context and
+ * the client could also queue another message from the notifier 'tx_done'
+ * of the last transfer done.
+ * REVISIT: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN"
+ * print, it needs to be taken from config option or somesuch.
+ */
+#define MBOX_TX_QUEUE_LEN	20
+
+/**
+ * struct mbox_chan - s/w representation of a communication chan
+ * @mbox:		Pointer to the parent/provider of this channel
+ * @txdone_method:	Way to detect TXDone chosen by the API
+ * @cl:			Pointer to the current owner of this channel
+ * @tx_complete:	Transmission completion
+ * @active_req:		Currently active request hook
+ * @msg_count:		No. of mssg currently queued
+ * @msg_free:		Index of next available mssg slot
+ * @msg_data:		Hook for data packet
+ * @lock:		Serialise access to the channel
+ * @con_priv:		Hook for controller driver to attach private data
+ */
+struct mbox_chan {
+	struct mbox_controller *mbox;
+	unsigned txdone_method;
+	struct mbox_client *cl;
+	struct completion tx_complete;
+	void *active_req;
+	unsigned msg_count, msg_free;
+	void *msg_data[MBOX_TX_QUEUE_LEN];
+	spinlock_t lock; /* Serialise access to the channel */
+	void *con_priv;
+};
+
+int mbox_controller_register(struct mbox_controller *mbox); /* can sleep */
+void mbox_controller_unregister(struct mbox_controller *mbox); /* can sleep */
+void mbox_chan_received_data(struct mbox_chan *chan, void *data); /* atomic */
+void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */
+
+#endif /* __MAILBOX_CONTROLLER_H */

From 9177ccfca43770f6b69938be465da01a8cb72176 Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Tue, 22 Jul 2014 20:05:58 +0530
Subject: [PATCH 0178/1185] doc: add documentation for mailbox framework

 Some explanations with examples of how to write to implement users
and providers of the mailbox framework.

Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 Documentation/mailbox.txt | 122 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 Documentation/mailbox.txt

diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt
new file mode 100644
index 000000000000..60f43ff629aa
--- /dev/null
+++ b/Documentation/mailbox.txt
@@ -0,0 +1,122 @@
+		The Common Mailbox Framework
+		Jassi Brar <jaswinder.singh@linaro.org>
+
+ This document aims to help developers write client and controller
+drivers for the API. But before we start, let us note that the
+client (especially) and controller drivers are likely going to be
+very platform specific because the remote firmware is likely to be
+proprietary and implement non-standard protocol. So even if two
+platforms employ, say, PL320 controller, the client drivers can't
+be shared across them. Even the PL320 driver might need to accommodate
+some platform specific quirks. So the API is meant mainly to avoid
+similar copies of code written for each platform. Having said that,
+nothing prevents the remote f/w to also be Linux based and use the
+same api there. However none of that helps us locally because we only
+ever deal at client's protocol level.
+ Some of the choices made during implementation are the result of this
+peculiarity of this "common" framework.
+
+
+
+	Part 1 - Controller Driver (See include/linux/mailbox_controller.h)
+
+ Allocate mbox_controller and the array of mbox_chan.
+Populate mbox_chan_ops, except peek_data() all are mandatory.
+The controller driver might know a message has been consumed
+by the remote by getting an IRQ or polling some hardware flag
+or it can never know (the client knows by way of the protocol).
+The method in order of preference is IRQ -> Poll -> None, which
+the controller driver should set via 'txdone_irq' or 'txdone_poll'
+or neither.
+
+
+	Part 2 - Client Driver (See include/linux/mailbox_client.h)
+
+ The client might want to operate in blocking mode (synchronously
+send a message through before returning) or non-blocking/async mode (submit
+a message and a callback function to the API and return immediately).
+
+
+struct demo_client {
+	struct mbox_client cl;
+	struct mbox_chan *mbox;
+	struct completion c;
+	bool async;
+	/* ... */
+};
+
+/*
+ * This is the handler for data received from remote. The behaviour is purely
+ * dependent upon the protocol. This is just an example.
+ */
+static void message_from_remote(struct mbox_client *cl, void *mssg)
+{
+	struct demo_client *dc = container_of(mbox_client,
+						struct demo_client, cl);
+	if (dc->aysnc) {
+		if (is_an_ack(mssg)) {
+			/* An ACK to our last sample sent */
+			return; /* Or do something else here */
+		} else { /* A new message from remote */
+			queue_req(mssg);
+		}
+	} else {
+		/* Remote f/w sends only ACK packets on this channel */
+		return;
+	}
+}
+
+static void sample_sent(struct mbox_client *cl, void *mssg, int r)
+{
+	struct demo_client *dc = container_of(mbox_client,
+						struct demo_client, cl);
+	complete(&dc->c);
+}
+
+static void client_demo(struct platform_device *pdev)
+{
+	struct demo_client *dc_sync, *dc_async;
+	/* The controller already knows async_pkt and sync_pkt */
+	struct async_pkt ap;
+	struct sync_pkt sp;
+
+	dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL);
+	dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL);
+
+	/* Populate non-blocking mode client */
+	dc_async->cl.dev = &pdev->dev;
+	dc_async->cl.rx_callback = message_from_remote;
+	dc_async->cl.tx_done = sample_sent;
+	dc_async->cl.tx_block = false;
+	dc_async->cl.tx_tout = 0; /* doesn't matter here */
+	dc_async->cl.knows_txdone = false; /* depending upon protocol */
+	dc_async->async = true;
+	init_completion(&dc_async->c);
+
+	/* Populate blocking mode client */
+	dc_sync->cl.dev = &pdev->dev;
+	dc_sync->cl.rx_callback = message_from_remote;
+	dc_sync->cl.tx_done = NULL; /* operate in blocking mode */
+	dc_sync->cl.tx_block = true;
+	dc_sync->cl.tx_tout = 500; /* by half a second */
+	dc_sync->cl.knows_txdone = false; /* depending upon protocol */
+	dc_sync->async = false;
+
+	/* ASync mailbox is listed second in 'mboxes' property */
+	dc_async->mbox = mbox_request_channel(&dc_async->cl, 1);
+	/* Populate data packet */
+	/* ap.xxx = 123; etc */
+	/* Send async message to remote */
+	mbox_send_message(dc_async->mbox, &ap);
+
+	/* Sync mailbox is listed first in 'mboxes' property */
+	dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0);
+	/* Populate data packet */
+	/* sp.abc = 123; etc */
+	/* Send message to remote in blocking mode */
+	mbox_send_message(dc_sync->mbox, &sp);
+	/* At this point 'sp' has been sent */
+
+	/* Now wait for async chan to be done */
+	wait_for_completion(&dc_async->c);
+}

From 35fe1a70140269da083f3b64d459a4b0dba83608 Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Tue, 22 Jul 2014 20:40:04 +0530
Subject: [PATCH 0179/1185] dt: mailbox: add generic bindings

Define generic bindings for the framework clients to
request mailbox channels.

Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 .../devicetree/bindings/mailbox/mailbox.txt   | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mailbox/mailbox.txt

diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt
new file mode 100644
index 000000000000..1a2cd3d266db
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt
@@ -0,0 +1,38 @@
+* Generic Mailbox Controller and client driver bindings
+
+Generic binding to provide a way for Mailbox controller drivers to
+assign appropriate mailbox channel to client drivers.
+
+* Mailbox Controller
+
+Required property:
+- #mbox-cells: Must be at least 1. Number of cells in a mailbox
+		specifier.
+
+Example:
+	mailbox: mailbox {
+		...
+		#mbox-cells = <1>;
+	};
+
+
+* Mailbox Client
+
+Required property:
+- mboxes: List of phandle and mailbox channel specifiers.
+
+Optional property:
+- mbox-names: List of identifier strings for each mailbox channel
+		required by the client. The use of this property
+		is discouraged in favor of using index in list of
+		'mboxes' while requesting a mailbox. Instead the
+		platforms may define channel indices, in DT headers,
+		to something legible.
+
+Example:
+	pwr_cntrl: power {
+		...
+		mbox-names = "pwr-ctrl", "rpc";
+		mboxes = <&mailbox 0
+			&mailbox 1>;
+	};

From 9bc0c15675840178cee1486c2a7f25faead1518e Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Tue, 5 Aug 2014 12:05:17 -0700
Subject: [PATCH 0180/1185] mm: fix prctl_set_vma_anon_name

prctl_set_vma_anon_name could attempt to set the name across
two vmas at the same time due to a typo, which might corrupt
the vma list.  Fix it to use tmp instead of end to limit
the name setting to a single vma at a time.

Change-Id: Ie32d8ddb0fd547efbeedd6528acdab5ca5b308b4
Reported-by: Jed Davis <jld@mozilla.com>
Signed-off-by: Colin Cross <ccross@android.com>
---
 kernel/sys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index ab7fda5fbe18..65d3e55bd282 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2187,7 +2187,7 @@ static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
 			tmp = end;
 
 		/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
-		error = prctl_update_vma_anon_name(vma, &prev, start, end,
+		error = prctl_update_vma_anon_name(vma, &prev, start, tmp,
 				(const char __user *)arg);
 		if (error)
 			return error;

From a3d5a3f5affd45d9aa21daf04d4eb543fd3ed444 Mon Sep 17 00:00:00 2001
From: Dmitry Shmidt <dimitrysh@google.com>
Date: Fri, 22 Aug 2014 14:40:18 -0700
Subject: [PATCH 0181/1185] Add flags parameter to get_country_code template

Change-Id: Ic3f173db144a301ea104f544fc8ec723241c1d59
Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>
---
 include/linux/wlan_plat.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/wlan_plat.h b/include/linux/wlan_plat.h
index 40ec3482d1ef..8ad2dbd0c296 100644
--- a/include/linux/wlan_plat.h
+++ b/include/linux/wlan_plat.h
@@ -15,13 +15,15 @@
 #ifndef _LINUX_WLAN_PLAT_H_
 #define _LINUX_WLAN_PLAT_H_
 
+#define WLAN_PLAT_NODFS_FLAG	0x01
+
 struct wifi_platform_data {
 	int (*set_power)(int val);
 	int (*set_reset)(int val);
 	int (*set_carddetect)(int val);
 	void *(*mem_prealloc)(int section, unsigned long size);
 	int (*get_mac_addr)(unsigned char *buf);
-	void *(*get_country_code)(char *ccode);
+	void *(*get_country_code)(char *ccode, u32 flags);
 };
 
 #endif

From a327898fa354ae18e9fe07d3e203304f4ef5c0d2 Mon Sep 17 00:00:00 2001
From: xerox_lin <xerox_lin@htc.com>
Date: Thu, 14 Aug 2014 14:48:44 +0800
Subject: [PATCH 0182/1185] usb: Add support for rndis uplink aggregation

RNDIS protocol supports data aggregation on uplink and can help
reduce mips by reducing number of interrupts on device. Throughput
also improved by 20-30%. Aggregation is disabled by setting
aggregation packet size to 1. To help better UL throughput, set
as ul aggregation support to 3 rndis packets by default. It can be
configured via module parameter: rndis_ul_max_pkt_per_xfer.

Change-Id: I0b62a21a5c3ceb6b04933d0d6da33301dbafe493
Signed-off-by: Vamsi Krishna <vskrishn@codeaurora.org>
Signed-off-by: Xerox Lin <xerox_lin@htc.com>
---
 drivers/usb/gadget/f_rndis.c |  13 ++++-
 drivers/usb/gadget/rndis.c   | 105 +++++++++++++++++++++++++++++------
 drivers/usb/gadget/rndis.h   |   2 +
 drivers/usb/gadget/u_ether.c |   6 ++
 drivers/usb/gadget/u_ether.h |   4 ++
 5 files changed, 111 insertions(+), 19 deletions(-)

diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 21c5ee2482d6..7646a564bfda 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -25,7 +25,6 @@
 #include "u_ether.h"
 #include "rndis.h"
 
-
 /*
  * This function is an RNDIS Ethernet port -- a Microsoft protocol that's
  * been promoted instead of the standard CDC Ethernet.  The published RNDIS
@@ -67,6 +66,16 @@
  *   - MS-Windows drivers sometimes emit undocumented requests.
  */
 
+static bool rndis_multipacket_dl_disable;
+module_param(rndis_multipacket_dl_disable, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(rndis_multipacket_dl_disable,
+	"Disable RNDIS Multi-packet support in DownLink");
+
+static unsigned int rndis_ul_max_pkt_per_xfer = 3;
+module_param(rndis_ul_max_pkt_per_xfer, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(rndis_ul_max_pkt_per_xfer,
+       "Maximum packets per transfer for UL aggregation");
+
 struct f_rndis {
 	struct gether			port;
 	u8				ctrl_id, data_id;
@@ -748,6 +757,7 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f)
 
 	rndis_set_param_medium(rndis->config, RNDIS_MEDIUM_802_3, 0);
 	rndis_set_host_mac(rndis->config, rndis->ethaddr);
+	rndis_set_max_pkt_xfer(rndis->config, rndis_ul_max_pkt_per_xfer);
 
 	if (rndis->manufacturer && rndis->vendorID &&
 			rndis_set_param_vendor(rndis->config, rndis->vendorID,
@@ -854,6 +864,7 @@ rndis_bind_config_vendor(struct usb_configuration *c, u8 ethaddr[ETH_ALEN],
 	rndis->port.header_len = sizeof(struct rndis_packet_msg_type);
 	rndis->port.wrap = rndis_add_header;
 	rndis->port.unwrap = rndis_rm_hdr;
+	rndis->port.ul_max_pkts_per_xfer = rndis_ul_max_pkt_per_xfer;
 
 	rndis->port.func.name = "rndis";
 	rndis->port.func.strings = rndis_strings;
diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
index 5378dc6574fb..cb2767df3fba 100644
--- a/drivers/usb/gadget/rndis.c
+++ b/drivers/usb/gadget/rndis.c
@@ -59,6 +59,16 @@ MODULE_PARM_DESC (rndis_debug, "enable debugging");
 
 #define RNDIS_MAX_CONFIGS	1
 
+int rndis_ul_max_pkt_per_xfer_rcvd;
+module_param(rndis_ul_max_pkt_per_xfer_rcvd, int, S_IRUGO);
+MODULE_PARM_DESC(rndis_ul_max_pkt_per_xfer_rcvd,
+		"Max num of REMOTE_NDIS_PACKET_MSGs received in a single transfer");
+
+int rndis_ul_max_xfer_size_rcvd;
+module_param(rndis_ul_max_xfer_size_rcvd, int, S_IRUGO);
+MODULE_PARM_DESC(rndis_ul_max_xfer_size_rcvd,
+		"Max size of bus transfer received");
+
 
 static rndis_params rndis_per_dev_params[RNDIS_MAX_CONFIGS];
 
@@ -585,12 +595,12 @@ static int rndis_init_response(int configNr, rndis_init_msg_type *buf)
 	resp->MinorVersion = cpu_to_le32(RNDIS_MINOR_VERSION);
 	resp->DeviceFlags = cpu_to_le32(RNDIS_DF_CONNECTIONLESS);
 	resp->Medium = cpu_to_le32(RNDIS_MEDIUM_802_3);
-	resp->MaxPacketsPerTransfer = cpu_to_le32(1);
-	resp->MaxTransferSize = cpu_to_le32(
-		  params->dev->mtu
+	resp->MaxPacketsPerTransfer = cpu_to_le32(params->max_pkt_per_xfer);
+	resp->MaxTransferSize = cpu_to_le32(params->max_pkt_per_xfer *
+		(params->dev->mtu
 		+ sizeof(struct ethhdr)
 		+ sizeof(struct rndis_packet_msg_type)
-		+ 22);
+		+ 22));
 	resp->PacketAlignmentFactor = cpu_to_le32(0);
 	resp->AFListOffset = cpu_to_le32(0);
 	resp->AFListSize = cpu_to_le32(0);
@@ -916,6 +926,8 @@ int rndis_set_param_dev(u8 configNr, struct net_device *dev, u16 *cdc_filter)
 	rndis_per_dev_params[configNr].dev = dev;
 	rndis_per_dev_params[configNr].filter = cdc_filter;
 
+	rndis_ul_max_xfer_size_rcvd = 0;
+	rndis_ul_max_pkt_per_xfer_rcvd = 0;
 	return 0;
 }
 
@@ -942,6 +954,13 @@ int rndis_set_param_medium(u8 configNr, u32 medium, u32 speed)
 	return 0;
 }
 
+void rndis_set_max_pkt_xfer(u8 configNr, u8 max_pkt_per_xfer)
+{
+	pr_debug("%s:\n", __func__);
+
+	rndis_per_dev_params[configNr].max_pkt_per_xfer = max_pkt_per_xfer;
+}
+
 void rndis_add_hdr(struct sk_buff *skb)
 {
 	struct rndis_packet_msg_type *header;
@@ -1014,23 +1033,73 @@ int rndis_rm_hdr(struct gether *port,
 			struct sk_buff *skb,
 			struct sk_buff_head *list)
 {
-	/* tmp points to a struct rndis_packet_msg_type */
-	__le32 *tmp = (void *)skb->data;
+	int num_pkts = 1;
 
-	/* MessageType, MessageLength */
-	if (cpu_to_le32(RNDIS_MSG_PACKET)
-			!= get_unaligned(tmp++)) {
-		dev_kfree_skb_any(skb);
-		return -EINVAL;
-	}
-	tmp++;
+	if (skb->len > rndis_ul_max_xfer_size_rcvd)
+		rndis_ul_max_xfer_size_rcvd = skb->len;
 
-	/* DataOffset, DataLength */
-	if (!skb_pull(skb, get_unaligned_le32(tmp++) + 8)) {
-		dev_kfree_skb_any(skb);
-		return -EOVERFLOW;
+	while (skb->len) {
+		struct rndis_packet_msg_type *hdr;
+		struct sk_buff          *skb2;
+		u32             msg_len, data_offset, data_len;
+
+		/* some rndis hosts send extra byte to avoid zlp, ignore it */
+		if (skb->len == 1) {
+			dev_kfree_skb_any(skb);
+			return 0;
+		}
+
+		if (skb->len < sizeof *hdr) {
+			pr_err("invalid rndis pkt: skblen:%u hdr_len:%u",
+					skb->len, sizeof *hdr);
+			dev_kfree_skb_any(skb);
+			return -EINVAL;
+		}
+
+		hdr = (void *)skb->data;
+		msg_len = le32_to_cpu(hdr->MessageLength);
+		data_offset = le32_to_cpu(hdr->DataOffset);
+		data_len = le32_to_cpu(hdr->DataLength);
+
+		if (skb->len < msg_len ||
+				((data_offset + data_len + 8) > msg_len)) {
+			pr_err("invalid rndis message: %d/%d/%d/%d, len:%d\n",
+					le32_to_cpu(hdr->MessageType),
+					msg_len, data_offset, data_len, skb->len);
+			dev_kfree_skb_any(skb);
+			return -EOVERFLOW;
+		}
+		if (le32_to_cpu(hdr->MessageType) != RNDIS_MSG_PACKET) {
+			pr_err("invalid rndis message: %d/%d/%d/%d, len:%d\n",
+					le32_to_cpu(hdr->MessageType),
+					msg_len, data_offset, data_len, skb->len);
+			dev_kfree_skb_any(skb);
+			return -EINVAL;
+		}
+
+		skb_pull(skb, data_offset + 8);
+
+		if (msg_len == skb->len) {
+			skb_trim(skb, data_len);
+			break;
+		}
+
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (!skb2) {
+			pr_err("%s:skb clone failed\n", __func__);
+			dev_kfree_skb_any(skb);
+			return -ENOMEM;
+		}
+
+		skb_pull(skb, msg_len - sizeof *hdr);
+		skb_trim(skb2, data_len);
+		skb_queue_tail(list, skb2);
+
+		num_pkts++;
 	}
-	skb_trim(skb, get_unaligned_le32(tmp++));
+
+	if (num_pkts > rndis_ul_max_pkt_per_xfer_rcvd)
+		rndis_ul_max_pkt_per_xfer_rcvd = num_pkts;
 
 	skb_queue_tail(list, skb);
 	return 0;
diff --git a/drivers/usb/gadget/rndis.h b/drivers/usb/gadget/rndis.h
index 0647f2f34e89..12045b31a311 100644
--- a/drivers/usb/gadget/rndis.h
+++ b/drivers/usb/gadget/rndis.h
@@ -189,6 +189,7 @@ typedef struct rndis_params
 	struct net_device	*dev;
 
 	u32			vendorID;
+	u8			max_pkt_per_xfer;
 	const char		*vendorDescr;
 	void			(*resp_avail)(void *v);
 	void			*v;
@@ -204,6 +205,7 @@ int  rndis_set_param_dev (u8 configNr, struct net_device *dev,
 int  rndis_set_param_vendor (u8 configNr, u32 vendorID,
 			    const char *vendorDescr);
 int  rndis_set_param_medium (u8 configNr, u32 medium, u32 speed);
+void rndis_set_max_pkt_xfer(u8 configNr, u8 max_pkt_per_xfer);
 void rndis_add_hdr (struct sk_buff *skb);
 int rndis_rm_hdr(struct gether *port, struct sk_buff *skb,
 			struct sk_buff_head *list);
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 4b76124ce96b..801e326e54c0 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -64,6 +64,7 @@ struct eth_dev {
 	struct sk_buff_head	rx_frames;
 
 	unsigned		header_len;
+	unsigned		ul_max_pkts_per_xfer;
 	struct sk_buff		*(*wrap)(struct gether *, struct sk_buff *skb);
 	int			(*unwrap)(struct gether *,
 						struct sk_buff *skb,
@@ -226,9 +227,13 @@ rx_submit(struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags)
 	size += out->maxpacket - 1;
 	size -= size % out->maxpacket;
 
+	if (dev->ul_max_pkts_per_xfer)
+		size *= dev->ul_max_pkts_per_xfer;
+
 	if (dev->port_usb->is_fixed)
 		size = max_t(size_t, size, dev->port_usb->fixed_out_len);
 
+	DBG(dev, "%s: size: %d\n", __func__, size);
 	skb = alloc_skb(size + NET_IP_ALIGN, gfp_flags);
 	if (skb == NULL) {
 		DBG(dev, "no rx skb\n");
@@ -882,6 +887,7 @@ struct net_device *gether_connect(struct gether *link)
 		dev->header_len = link->header_len;
 		dev->unwrap = link->unwrap;
 		dev->wrap = link->wrap;
+		dev->ul_max_pkts_per_xfer = link->ul_max_pkts_per_xfer;
 
 		spin_lock(&dev->lock);
 		dev->port_usb = link;
diff --git a/drivers/usb/gadget/u_ether.h b/drivers/usb/gadget/u_ether.h
index 02522338a708..ce803d415887 100644
--- a/drivers/usb/gadget/u_ether.h
+++ b/drivers/usb/gadget/u_ether.h
@@ -54,6 +54,10 @@ struct gether {
 	bool				is_fixed;
 	u32				fixed_out_len;
 	u32				fixed_in_len;
+	unsigned		ul_max_pkts_per_xfer;
+/* Max number of SKB packets to be used to create Multi Packet RNDIS */
+#define TX_SKB_HOLD_THRESHOLD		3
+	bool				multi_pkt_xfer;
 	struct sk_buff			*(*wrap)(struct gether *port,
 						struct sk_buff *skb);
 	int				(*unwrap)(struct gether *port,

From 945a225e77ffa18377e397654867620c71f82998 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 9 Nov 2013 00:51:56 +0100
Subject: [PATCH 0183/1185] ARM: 7888/1: seccomp: not compatible with ARM OABI

Make sure that seccomp filter won't be built when ARM OABI is in use,
since there is work needed to distinguish calling conventions. Until
that is done (which is likely never since OABI is deprecated), make
sure seccomp filter is unavailable in the OABI world.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Will Drewry <wad@chromium.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 99887aaa04bb..0dde9b9e91c2 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -22,7 +22,7 @@ config ARM
 	select HAVE_AOUT
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
-	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_BPF_JIT
 	select HAVE_C_RECORDMCOUNT
@@ -1681,6 +1681,11 @@ config OABI_COMPAT
 	  in memory differs between the legacy ABI and the new ARM EABI
 	  (only for non "thumb" binaries). This option adds a tiny
 	  overhead to all syscalls and produces a slightly larger kernel.
+
+	  The seccomp filter system will not be available when this is
+	  selected, since there is no way yet to sensibly distinguish
+	  between calling conventions during filtering.
+
 	  If you know you'll be using only pure EABI user space then you
 	  can say N here. If this option is not selected and you attempt
 	  to execute a legacy ABI binary then the result will be

From c2da3eba6ac4b2df16d4aa13ce0f9bd360000baa Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Apr 2014 10:54:34 -0700
Subject: [PATCH 0184/1185] seccomp: fix memory leak on filter attach

This sets the correct error code when final filter memory is unavailable,
and frees the raw filter no matter what.

unreferenced object 0xffff8800d6ea4000 (size 512):
  comm "sshd", pid 278, jiffies 4294898315 (age 46.653s)
  hex dump (first 32 bytes):
    21 00 00 00 04 00 00 00 15 00 01 00 3e 00 00 c0  !...........>...
    06 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00  ........!.......
  backtrace:
    [<ffffffff8151414e>] kmemleak_alloc+0x4e/0xb0
    [<ffffffff811a3a40>] __kmalloc+0x280/0x320
    [<ffffffff8110842e>] prctl_set_seccomp+0x11e/0x3b0
    [<ffffffff8107bb6b>] SyS_prctl+0x3bb/0x4a0
    [<ffffffff8152ef2d>] system_call_fastpath+0x1a/0x1f
    [<ffffffffffffffff>] 0xffffffffffffffff

Reported-by: Masami Ichikawa <masami256@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Masami Ichikawa <masami256@gmail.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Conflicts:
	kernel/seccomp.c

Change-Id: Ide3c27bf378397f8faf4218e75c31e4b8bc43c4c
---
 kernel/seccomp.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b7a10048a32c..260ff1fce63d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -273,7 +273,23 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	/* Check and rewrite the fprog for seccomp use */
 	ret = seccomp_check_filter(filter->insns, filter->len);
 	if (ret)
-		goto fail;
+		goto free_prog;
+
+	/* Allocate a new seccomp_filter */
+	ret = -ENOMEM;
+	filter = kzalloc(sizeof(struct seccomp_filter) +
+			 sizeof(struct sock_filter_int) * new_len,
+			 GFP_KERNEL|__GFP_NOWARN);
+	if (!filter)
+		goto free_prog;
+
+	ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
+	if (ret)
+		goto free_filter;
+	kfree(fp);
+
+	atomic_set(&filter->usage, 1);
+	filter->len = new_len;
 
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its

From 1ba2ccbc8cdcb0881dd91212b2f04318884dfe4a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 18 Jul 2014 11:28:33 -0700
Subject: [PATCH 0185/1185] MAINTAINERS: create seccomp entry

Add myself as seccomp maintainer.

Suggested-by: James Morris <jmorris@namei.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 MAINTAINERS | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index ad7e322ad17b..614483608ba4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7131,6 +7131,16 @@ S:	Maintained
 F:	drivers/mmc/host/sdhci.*
 F:	drivers/mmc/host/sdhci-pltfm.[ch]
 
+SECURE COMPUTING
+M:	Kees Cook <keescook@chromium.org>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git seccomp
+S:	Supported
+F:	kernel/seccomp.c
+F:	include/uapi/linux/seccomp.h
+F:	include/linux/seccomp.h
+K:	\bsecure_computing
+K:	\bTIF_SECCOMP\b
+
 SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
 M:	Anton Vorontsov <avorontsov@ru.mvista.com>
 L:	linuxppc-dev@lists.ozlabs.org

From 6862b0143649033ca56fd460684e632b35a59abc Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 21 May 2014 15:02:11 -0700
Subject: [PATCH 0186/1185] seccomp: create internal mode-setting function

In preparation for having other callers of the seccomp mode setting
logic, split the prctl entry point away from the core logic that performs
seccomp mode setting.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 260ff1fce63d..bf1cd927707f 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -483,7 +483,7 @@ long prctl_get_seccomp(void)
 }
 
 /**
- * prctl_set_seccomp: configures current->seccomp.mode
+ * seccomp_set_mode: internal function for setting seccomp mode
  * @seccomp_mode: requested mode to use
  * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
  *
@@ -496,7 +496,7 @@ long prctl_get_seccomp(void)
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
 {
 	long ret = -EINVAL;
 
@@ -527,3 +527,15 @@ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 out:
 	return ret;
 }
+
+/**
+ * prctl_set_seccomp: configures current->seccomp.mode
+ * @seccomp_mode: requested mode to use
+ * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+{
+	return seccomp_set_mode(seccomp_mode, filter);
+}

From c208e4e9f119bc9cd0c444344de6407c0d373a2c Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Jun 2014 15:38:02 -0700
Subject: [PATCH 0187/1185] seccomp: extract check/assign mode helpers

To support splitting mode 1 from mode 2, extract the mode checking and
assignment logic into common functions.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index bf1cd927707f..f627d2cb0815 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -219,7 +219,23 @@ static u32 seccomp_run_filters(int syscall)
 	}
 	return ret;
 }
+#endif /* CONFIG_SECCOMP_FILTER */
 
+static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
+{
+	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
+		return false;
+
+	return true;
+}
+
+static inline void seccomp_assign_mode(unsigned long seccomp_mode)
+{
+	current->seccomp.mode = seccomp_mode;
+	set_tsk_thread_flag(current, TIF_SECCOMP);
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
 /**
  * seccomp_attach_filter: Attaches a seccomp filter to current.
  * @fprog: BPF program to install
@@ -500,8 +516,7 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
 {
 	long ret = -EINVAL;
 
-	if (current->seccomp.mode &&
-	    current->seccomp.mode != seccomp_mode)
+	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
 	switch (seccomp_mode) {
@@ -522,8 +537,7 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
 		goto out;
 	}
 
-	current->seccomp.mode = seccomp_mode;
-	set_thread_flag(TIF_SECCOMP);
+	seccomp_assign_mode(seccomp_mode);
 out:
 	return ret;
 }

From 1a63bcec4fb055acbed31a5cf1cc40e968cf0664 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Jun 2014 15:55:25 -0700
Subject: [PATCH 0188/1185] seccomp: split mode setting routines

Separates the two mode setting paths to make things more readable with
fewer #ifdefs within function bodies.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c | 71 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 23 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index f627d2cb0815..793bec19b6c7 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -499,48 +499,66 @@ long prctl_get_seccomp(void)
 }
 
 /**
- * seccomp_set_mode: internal function for setting seccomp mode
- * @seccomp_mode: requested mode to use
- * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
- *
- * This function may be called repeatedly with a @seccomp_mode of
- * SECCOMP_MODE_FILTER to install additional filters.  Every filter
- * successfully installed will be evaluated (in reverse order) for each system
- * call the task makes.
+ * seccomp_set_mode_strict: internal function for setting strict seccomp
  *
  * Once current->seccomp.mode is non-zero, it may not be changed.
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
+static long seccomp_set_mode_strict(void)
 {
+	const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
 	long ret = -EINVAL;
 
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
-	switch (seccomp_mode) {
-	case SECCOMP_MODE_STRICT:
-		ret = 0;
 #ifdef TIF_NOTSC
-		disable_TSC();
+	disable_TSC();
 #endif
-		break;
+	seccomp_assign_mode(seccomp_mode);
+	ret = 0;
+
+out:
+
+	return ret;
+}
+
 #ifdef CONFIG_SECCOMP_FILTER
-	case SECCOMP_MODE_FILTER:
-		ret = seccomp_attach_user_filter(filter);
-		if (ret)
-			goto out;
-		break;
-#endif
-	default:
+/**
+ * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @filter: struct sock_fprog containing filter
+ *
+ * This function may be called repeatedly to install additional filters.
+ * Every filter successfully installed will be evaluated (in reverse order)
+ * for each system call the task makes.
+ *
+ * Once current->seccomp.mode is non-zero, it may not be changed.
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+static long seccomp_set_mode_filter(char __user *filter)
+{
+	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
+	long ret = -EINVAL;
+
+	if (!seccomp_may_assign_mode(seccomp_mode))
+		goto out;
+
+	ret = seccomp_attach_user_filter(filter);
+	if (ret)
 		goto out;
-	}
 
 	seccomp_assign_mode(seccomp_mode);
 out:
 	return ret;
 }
+#else
+static inline long seccomp_set_mode_filter(char __user *filter)
+{
+	return -EINVAL;
+}
+#endif
 
 /**
  * prctl_set_seccomp: configures current->seccomp.mode
@@ -551,5 +569,12 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
  */
 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 {
-	return seccomp_set_mode(seccomp_mode, filter);
+	switch (seccomp_mode) {
+	case SECCOMP_MODE_STRICT:
+		return seccomp_set_mode_strict();
+	case SECCOMP_MODE_FILTER:
+		return seccomp_set_mode_filter(filter);
+	default:
+		return -EINVAL;
+	}
 }

From a75a29b16e1f3eb2afcb9b2567edfc4cd8899635 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Jun 2014 16:08:24 -0700
Subject: [PATCH 0189/1185] seccomp: add "seccomp" syscall

This adds the new "seccomp" syscall with both an "operation" and "flags"
parameter for future expansion. The third argument is a pointer value,
used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must
be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...).

In addition to the TSYNC flag later in this patch series, there is a
non-zero chance that this syscall could be used for configuring a fixed
argument area for seccomp-tracer-aware processes to pass syscall arguments
in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter"
for this syscall. Additionally, this syscall uses operation, flags,
and user pointer for arguments because strictly passing arguments via
a user pointer would mean seccomp itself would be unable to trivially
filter the seccomp syscall itself.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Conflicts:
	arch/x86/syscalls/syscall_32.tbl
	arch/x86/syscalls/syscall_64.tbl
	include/uapi/asm-generic/unistd.h
	kernel/seccomp.c

Change-Id: Id7a365079829fd9164315dec75d6ee415c29b176
---
 arch/Kconfig                      |  1 +
 arch/x86/syscalls/syscall_32.tbl  |  5 +++
 arch/x86/syscalls/syscall_64.tbl  |  5 +++
 include/linux/syscalls.h          |  2 ++
 include/uapi/asm-generic/unistd.h | 12 ++++++-
 include/uapi/linux/seccomp.h      |  4 +++
 kernel/seccomp.c                  | 55 ++++++++++++++++++++++++++++---
 kernel/sys_ni.c                   |  3 ++
 8 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index a4429bcd609e..84c94a89e75b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER
 	  - secure_computing is called from a ptrace_event()-safe context
 	  - secure_computing return value is checked and a return value of -1
 	    results in the system call being skipped immediately.
+	  - seccomp syscall wired up
 
 config SECCOMP_FILTER
 	def_bool y
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index aabfb8380a1c..de6d048d0305 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -357,3 +357,8 @@
 348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
 350	i386	finit_module		sys_finit_module
+# Backporting seccomp, skip a few ...
+# 351	i386	sched_setattr		sys_sched_setattr
+# 352	i386	sched_getattr		sys_sched_getattr
+# 353	i386	renameat2		sys_renameat2
+354	i386	seccomp			sys_seccomp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 38ae65dfd14f..a40bd6eda554 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -320,6 +320,11 @@
 311	64	process_vm_writev	sys_process_vm_writev
 312	common	kcmp			sys_kcmp
 313	common	finit_module		sys_finit_module
+# Backporting seccomp, skip a few ...
+# 314	common	sched_setattr		sys_sched_setattr
+# 315	common	sched_getattr		sys_sched_getattr
+# 316	common	renameat2		sys_renameat2
+317	common	seccomp			sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4147d700a293..2a955dcc863c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -841,4 +841,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
+			    const char __user *uargs);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 0cc74c4403e4..b422ad5d238b 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -692,9 +692,19 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 273
 __SYSCALL(__NR_finit_module, sys_finit_module)
+/* Backporting seccomp, skip a few ...
+ * #define __NR_sched_setattr 274
+__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
+ * #define __NR_sched_getattr 275
+__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
+ * #define __NR_renameat2 276
+__SYSCALL(__NR_renameat2, sys_renameat2)
+ */
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
 
 #undef __NR_syscalls
-#define __NR_syscalls 274
+#define __NR_syscalls 278
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index ac2dc9f72973..b258878ba754 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -10,6 +10,10 @@
 #define SECCOMP_MODE_STRICT	1 /* uses hard-coded filter. */
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT	0
+#define SECCOMP_SET_MODE_FILTER	1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 793bec19b6c7..47dc5c59621e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
 
@@ -325,7 +326,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
  *
  * Returns 0 on success and non-zero otherwise.
  */
-long seccomp_attach_user_filter(char __user *user_filter)
+static long seccomp_attach_user_filter(const char __user *user_filter)
 {
 	struct sock_fprog fprog;
 	long ret = -EFAULT;
@@ -527,6 +528,7 @@ static long seccomp_set_mode_strict(void)
 #ifdef CONFIG_SECCOMP_FILTER
 /**
  * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @flags:  flags to change filter behavior
  * @filter: struct sock_fprog containing filter
  *
  * This function may be called repeatedly to install additional filters.
@@ -537,11 +539,16 @@ static long seccomp_set_mode_strict(void)
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-static long seccomp_set_mode_filter(char __user *filter)
+static long seccomp_set_mode_filter(unsigned int flags,
+				    const char __user *filter)
 {
 	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
 	long ret = -EINVAL;
 
+	/* Validate flags. */
+	if (flags != 0)
+		goto out;
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -554,12 +561,35 @@ static long seccomp_set_mode_filter(char __user *filter)
 	return ret;
 }
 #else
-static inline long seccomp_set_mode_filter(char __user *filter)
+static inline long seccomp_set_mode_filter(unsigned int flags,
+					   const char __user *filter)
 {
 	return -EINVAL;
 }
 #endif
 
+/* Common entry point for both prctl and syscall. */
+static long do_seccomp(unsigned int op, unsigned int flags,
+		       const char __user *uargs)
+{
+	switch (op) {
+	case SECCOMP_SET_MODE_STRICT:
+		if (flags != 0 || uargs != NULL)
+			return -EINVAL;
+		return seccomp_set_mode_strict();
+	case SECCOMP_SET_MODE_FILTER:
+		return seccomp_set_mode_filter(flags, uargs);
+	default:
+		return -EINVAL;
+	}
+}
+
+SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
+			 const char __user *, uargs)
+{
+	return do_seccomp(op, flags, uargs);
+}
+
 /**
  * prctl_set_seccomp: configures current->seccomp.mode
  * @seccomp_mode: requested mode to use
@@ -569,12 +599,27 @@ static inline long seccomp_set_mode_filter(char __user *filter)
  */
 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 {
+	unsigned int op;
+	char __user *uargs;
+
 	switch (seccomp_mode) {
 	case SECCOMP_MODE_STRICT:
-		return seccomp_set_mode_strict();
+		op = SECCOMP_SET_MODE_STRICT;
+		/*
+		 * Setting strict mode through prctl always ignored filter,
+		 * so make sure it is always NULL here to pass the internal
+		 * check in do_seccomp().
+		 */
+		uargs = NULL;
+		break;
 	case SECCOMP_MODE_FILTER:
-		return seccomp_set_mode_filter(filter);
+		op = SECCOMP_SET_MODE_FILTER;
+		uargs = filter;
+		break;
 	default:
 		return -EINVAL;
 	}
+
+	/* prctl interface doesn't have flags, so they are always zero. */
+	return do_seccomp(op, 0, uargs);
 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7078052284fd..7e7fc0a082c4 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -209,3 +209,6 @@ cond_syscall(compat_sys_open_by_handle_at);
 
 /* compare kernel pointers */
 cond_syscall(sys_kcmp);
+
+/* operate on Secure Computing state */
+cond_syscall(sys_seccomp);

From 83f1ccba87b06575966b65352db565c363af7bcf Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 10 Jun 2014 15:40:23 -0700
Subject: [PATCH 0190/1185] ARM: add seccomp syscall

Wires up the new seccomp syscall.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>

Conflicts:
	arch/arm/include/uapi/asm/unistd.h
	arch/arm/kernel/calls.S

Change-Id: Ia519993681f70bd38699a73d8897ae9b44b3f0af
---
 arch/arm/include/uapi/asm/unistd.h | 6 ++++++
 arch/arm/kernel/calls.S            | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index af33b44990ed..bbe80a7cba0c 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -406,6 +406,12 @@
 #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
 #define __NR_kcmp			(__NR_SYSCALL_BASE+378)
 #define __NR_finit_module		(__NR_SYSCALL_BASE+379)
+/* Backporting seccomp, skip a few ...
+ * #define __NR_sched_setattr		(__NR_SYSCALL_BASE+380)
+ * #define __NR_sched_getattr		(__NR_SYSCALL_BASE+381)
+ * #define __NR_renameat2			(__NR_SYSCALL_BASE+382)
+ */
+#define __NR_seccomp			(__NR_SYSCALL_BASE+383)
 
 /*
  * This may need to be greater than __NR_last_syscall+1 in order to
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index c6ca7e376773..1a2e529a1340 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -389,6 +389,10 @@
 		CALL(sys_process_vm_writev)
 		CALL(sys_kcmp)
 		CALL(sys_finit_module)
+/* 380 */	CALL(sys_ni_syscall) /* CALL(sys_sched_setattr) */
+		CALL(sys_ni_syscall) /* CALL(sys_sched_getattr) */
+		CALL(sys_ni_syscall) /* CALL(sys_renameat2) */
+		CALL(sys_seccomp)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted

From 2c6d7de579c2cfebbc9378e5209c641a93839f0a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 10 Jun 2014 15:45:09 -0700
Subject: [PATCH 0191/1185] MIPS: add seccomp syscall

Wires up the new seccomp syscall.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>

Conflicts:
	arch/mips/include/uapi/asm/unistd.h
	arch/mips/kernel/scall32-o32.S
	arch/mips/kernel/scall64-64.S
	arch/mips/kernel/scall64-n32.S
	arch/mips/kernel/scall64-o32.S

Change-Id: I7031bdbec7c90292aeb7e255c73cb36e6ec43af2
---
 arch/mips/include/uapi/asm/unistd.h | 30 +++++++++++++++++++++++------
 arch/mips/kernel/scall32-o32.S      |  6 ++++++
 arch/mips/kernel/scall64-64.S       |  4 ++++
 arch/mips/kernel/scall64-n32.S      |  4 ++++
 arch/mips/kernel/scall64-o32.S      |  6 +++++-
 5 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index 1dee279f9665..af4d5c0a2f02 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -369,16 +369,22 @@
 #define __NR_process_vm_writev		(__NR_Linux + 346)
 #define __NR_kcmp			(__NR_Linux + 347)
 #define __NR_finit_module		(__NR_Linux + 348)
+/* Backporting seccomp, skip a few ...
+ * #define __NR_sched_setattr		(__NR_Linux + 349)
+ * #define __NR_sched_getattr		(__NR_Linux + 350)
+ * #define __NR_renameat2			(__NR_Linux + 351)
+ */
+#define __NR_seccomp			(__NR_Linux + 352)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		348
+#define __NR_Linux_syscalls		352
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		348
+#define __NR_O32_Linux_syscalls		352
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -695,16 +701,22 @@
 #define __NR_kcmp			(__NR_Linux + 306)
 #define __NR_finit_module		(__NR_Linux + 307)
 #define __NR_getdents64			(__NR_Linux + 308)
+/* Backporting seccomp, skip a few ...
+ * #define __NR_sched_setattr		(__NR_Linux + 309)
+ * #define __NR_sched_getattr		(__NR_Linux + 310)
+ * #define __NR_renameat2			(__NR_Linux + 311)
+ */
+#define __NR_seccomp			(__NR_Linux + 312)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		308
+#define __NR_Linux_syscalls		312
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		308
+#define __NR_64_Linux_syscalls		312
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1025,15 +1037,21 @@
 #define __NR_process_vm_writev		(__NR_Linux + 310)
 #define __NR_kcmp			(__NR_Linux + 311)
 #define __NR_finit_module		(__NR_Linux + 312)
+/* Backporting seccomp, skip a few ...
+ * #define __NR_sched_setattr		(__NR_Linux + 313)
+ * #define __NR_sched_getattr		(__NR_Linux + 314)
+ * #define __NR_renameat2			(__NR_Linux + 315)
+ */
+#define __NR_seccomp			(__NR_Linux + 316)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		312
+#define __NR_Linux_syscalls		316
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		312
+#define __NR_N32_Linux_syscalls		316
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 9b36424b03c5..bcb2184e8a47 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -593,6 +593,12 @@ einval: li	v0, -ENOSYS
 	sys	sys_process_vm_writev	6
 	sys	sys_kcmp		5
 	sys	sys_finit_module	3
+	/* Backporting seccomp, skip a few ... */
+	sys sys_ni_syscall		0	/* sys_sched_setattr */
+	sys sys_ni_syscall		0	/* sys_sched_getattr */		/* 4350 */
+	sys sys_ni_syscall		0	/* sys_renameat2 */
+	sys	sys_seccomp 3
+
 	.endm
 
 	/* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 97a5909a61cf..285872f9d6d1 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -424,4 +424,8 @@ sys_call_table:
 	PTR	sys_kcmp
 	PTR	sys_finit_module
 	PTR	sys_getdents64
+	sys sys_ni_syscall	/* sys_sched_setattr */
+	sys sys_ni_syscall	/* sys_sched_getattr */		/* 5310 */
+	sys sys_ni_syscall	/* sys_renameat2 */
+	sys	sys_seccomp
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index edcb6594e7b5..bdee1a1ed1c2 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -417,4 +417,8 @@ EXPORT(sysn32_call_table)
 	PTR	compat_sys_process_vm_writev	/* 6310 */
 	PTR	sys_kcmp
 	PTR	sys_finit_module
+	sys sys_ni_syscall	/* sys_sched_setattr */
+	sys sys_ni_syscall	/* sys_sched_getattr */
+	sys sys_ni_syscall	/* sys_renameat2 */			/* 6315 */
+	sys	sys_seccomp
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 74f485d3c0ef..a1f826a24578 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -541,4 +541,8 @@ sys_call_table:
 	PTR	compat_sys_process_vm_writev
 	PTR	sys_kcmp
 	PTR	sys_finit_module
-	.size	sys_call_table,.-sys_call_table
+	sys sys_ni_syscall	/* sys_sched_setattr */
+	sys sys_ni_syscall	/* sys_sched_getattr */		/* 4350 */
+	sys sys_ni_syscall	/* sys_renameat2 */
+	sys	sys_seccomp
+	.size	sys32_call_table,.-sys32_call_table

From 3497a88f5510c809ca10deac3030493eabba65d7 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 27 Jun 2014 15:16:33 -0700
Subject: [PATCH 0192/1185] seccomp: split filter prep from check and apply

In preparation for adding seccomp locking, move filter creation away
from where it is checked and applied. This will allow for locking where
no memory allocation is happening. The validation, filter attachment,
and seccomp mode setting can all happen under the future locks.

For extreme defensiveness, I've added a BUG_ON check for the calculated
size of the buffer allocation in case BPF_MAXINSN ever changes, which
shouldn't ever happen. The compiler should actually optimize out this
check since the test above it makes it impossible.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Conflicts:
	kernel/seccomp.c

Change-Id: I8d89f80a5b4f2826d90474dcea441c41f0af6594
---
 kernel/seccomp.c | 113 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 77 insertions(+), 36 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 47dc5c59621e..5737aafa6c2f 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/slab.h>
 #include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
@@ -27,7 +28,6 @@
 #include <linux/filter.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
-#include <linux/slab.h>
 #include <linux/tracehook.h>
 #include <linux/uaccess.h>
 
@@ -238,25 +238,23 @@ static inline void seccomp_assign_mode(unsigned long seccomp_mode)
 
 #ifdef CONFIG_SECCOMP_FILTER
 /**
- * seccomp_attach_filter: Attaches a seccomp filter to current.
+ * seccomp_prepare_filter: Prepares a seccomp filter for use.
  * @fprog: BPF program to install
  *
- * Returns 0 on success or an errno on failure.
+ * Returns filter on success or an ERR_PTR on failure.
  */
-static long seccomp_attach_filter(struct sock_fprog *fprog)
+static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 {
 	struct seccomp_filter *filter;
-	unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
-	unsigned long total_insns = fprog->len;
+	unsigned long fp_size;
+	struct sock_filter *fp;
+	int new_len;
 	long ret;
 
 	if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
-		return -EINVAL;
-
-	for (filter = current->seccomp.filter; filter; filter = filter->prev)
-		total_insns += filter->len + 4;  /* include a 4 instr penalty */
-	if (total_insns > MAX_INSNS_PER_PATH)
-		return -ENOMEM;
+		return ERR_PTR(-EINVAL);
+	BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
+	fp_size = fprog->len * sizeof(struct sock_filter);
 
 	/*
 	 * Installing a seccomp filter requires that the task have
@@ -267,15 +265,11 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (!current->no_new_privs &&
 	    security_capable_noaudit(current_cred(), current_user_ns(),
 				     CAP_SYS_ADMIN) != 0)
-		return -EACCES;
+		return ERR_PTR(-EACCES);
 
-	/* Allocate a new seccomp_filter */
-	filter = kzalloc(sizeof(struct seccomp_filter) + fp_size,
-			 GFP_KERNEL|__GFP_NOWARN);
-	if (!filter)
-		return -ENOMEM;
-	atomic_set(&filter->usage, 1);
-	filter->len = fprog->len;
+	fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
+	if (!fp)
+		return ERR_PTR(-ENOMEM);
 
 	/* Copy the instructions from fprog. */
 	ret = -EFAULT;
@@ -308,28 +302,28 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	atomic_set(&filter->usage, 1);
 	filter->len = new_len;
 
-	/*
-	 * If there is an existing filter, make it the prev and don't drop its
-	 * task reference.
-	 */
-	filter->prev = current->seccomp.filter;
-	current->seccomp.filter = filter;
-	return 0;
-fail:
+	return filter;
+
+free_filter_prog:
+	kfree(filter->prog);
+free_filter:
 	kfree(filter);
-	return ret;
+free_prog:
+	kfree(fp);
+	return ERR_PTR(ret);
 }
 
 /**
- * seccomp_attach_user_filter - attaches a user-supplied sock_fprog
+ * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
  * @user_filter: pointer to the user data containing a sock_fprog.
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static long seccomp_attach_user_filter(const char __user *user_filter)
+static struct seccomp_filter *
+seccomp_prepare_user_filter(const char __user *user_filter)
 {
 	struct sock_fprog fprog;
-	long ret = -EFAULT;
+	struct seccomp_filter *filter = ERR_PTR(-EFAULT);
 
 #ifdef CONFIG_COMPAT
 	if (is_compat_task()) {
@@ -342,9 +336,39 @@ static long seccomp_attach_user_filter(const char __user *user_filter)
 #endif
 	if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
 		goto out;
-	ret = seccomp_attach_filter(&fprog);
+	filter = seccomp_prepare_filter(&fprog);
 out:
-	return ret;
+	return filter;
+}
+
+/**
+ * seccomp_attach_filter: validate and attach filter
+ * @flags:  flags to change filter behavior
+ * @filter: seccomp filter to add to the current process
+ *
+ * Returns 0 on success, -ve on error.
+ */
+static long seccomp_attach_filter(unsigned int flags,
+				  struct seccomp_filter *filter)
+{
+	unsigned long total_insns;
+	struct seccomp_filter *walker;
+
+	/* Validate resulting filter length. */
+	total_insns = filter->prog->len;
+	for (walker = current->seccomp.filter; walker; walker = walker->prev)
+		total_insns += walker->prog->len + 4;  /* 4 instr penalty */
+	if (total_insns > MAX_INSNS_PER_PATH)
+		return -ENOMEM;
+
+	/*
+	 * If there is an existing filter, make it the prev and don't drop its
+	 * task reference.
+	 */
+	filter->prev = current->seccomp.filter;
+	current->seccomp.filter = filter;
+
+	return 0;
 }
 
 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
@@ -357,6 +381,14 @@ void get_seccomp_filter(struct task_struct *tsk)
 	atomic_inc(&orig->usage);
 }
 
+static inline void seccomp_filter_free(struct seccomp_filter *filter)
+{
+	if (filter) {
+		sk_filter_free(filter->prog);
+		kfree(filter);
+	}
+}
+
 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
 void put_seccomp_filter(struct task_struct *tsk)
 {
@@ -365,7 +397,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
-		kfree(freeme);
+		seccomp_filter_free(freeme);
 	}
 }
 
@@ -543,21 +575,30 @@ static long seccomp_set_mode_filter(unsigned int flags,
 				    const char __user *filter)
 {
 	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
+	struct seccomp_filter *prepared = NULL;
 	long ret = -EINVAL;
 
 	/* Validate flags. */
 	if (flags != 0)
 		goto out;
 
+	/* Prepare the new filter before holding any locks. */
+	prepared = seccomp_prepare_user_filter(filter);
+	if (IS_ERR(prepared))
+		return PTR_ERR(prepared);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
-	ret = seccomp_attach_user_filter(filter);
+	ret = seccomp_attach_filter(flags, prepared);
 	if (ret)
 		goto out;
+	/* Do not free the successfully attached filter. */
+	prepared = NULL;
 
 	seccomp_assign_mode(seccomp_mode);
 out:
+	seccomp_filter_free(prepared);
 	return ret;
 }
 #else

From 743266ae255c44edeb50911c191bf8083ea683dc Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 27 Jun 2014 15:18:48 -0700
Subject: [PATCH 0193/1185] seccomp: introduce writer locking

Normally, task_struct.seccomp.filter is only ever read or modified by
the task that owns it (current). This property aids in fast access
during system call filtering as read access is lockless.

Updating the pointer from another task, however, opens up race
conditions. To allow cross-thread filter pointer updates, writes to the
seccomp fields are now protected by the sighand spinlock (which is shared
by all threads in the thread group). Read access remains lockless because
pointer updates themselves are atomic.  However, writes (or cloning)
often entail additional checking (like maximum instruction counts)
which require locking to perform safely.

In the case of cloning threads, the child is invisible to the system
until it enters the task list. To make sure a child can't be cloned from
a thread and left in a prior state, seccomp duplication is additionally
moved under the sighand lock. Then parent and child are certain have
the same seccomp state when they exit the lock.

Based on patches by Will Drewry and David Drysdale.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Conflicts:
	kernel/fork.c

Change-Id: Ie01ece43b610867013f7d0e0a2a7be0b9077630f
---
 include/linux/seccomp.h |  6 ++---
 kernel/fork.c           | 49 ++++++++++++++++++++++++++++++++++++++++-
 kernel/seccomp.c        | 16 +++++++++++++-
 3 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 6f19cfd1840e..9ab63a574d40 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -14,11 +14,11 @@ struct seccomp_filter;
  *
  * @mode:  indicates one of the valid values above for controlled
  *         system calls available to a process.
- * @filter: The metadata and ruleset for determining what system calls
- *          are allowed for a task.
+ * @filter: must always point to a valid seccomp-filter or NULL as it is
+ *          accessed without locking during system call entry.
  *
  *          @filter must only be accessed from the context of current as there
- *          is no locking.
+ *          is no read locking.
  */
 struct seccomp {
 	int mode;
diff --git a/kernel/fork.c b/kernel/fork.c
index 41671a5d637d..8a3e9a91130c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -327,6 +327,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 		goto free_ti;
 
 	tsk->stack = ti;
+#ifdef CONFIG_SECCOMP
+	/*
+	 * We must handle setting up seccomp filters once we're under
+	 * the sighand lock in case orig has changed between now and
+	 * then. Until then, filter must be NULL to avoid messing up
+	 * the usage counts on the error path calling free_task.
+	 */
+	tsk->seccomp.filter = NULL;
+#endif
 
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
@@ -1102,6 +1111,39 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 	p->flags = new_flags;
 }
 
+static void copy_seccomp(struct task_struct *p)
+{
+#ifdef CONFIG_SECCOMP
+	/*
+	 * Must be called with sighand->lock held, which is common to
+	 * all threads in the group. Holding cred_guard_mutex is not
+	 * needed because this new task is not yet running and cannot
+	 * be racing exec.
+	 */
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Ref-count the new filter user, and assign it. */
+	get_seccomp_filter(current);
+	p->seccomp = current->seccomp;
+
+	/*
+	 * Explicitly enable no_new_privs here in case it got set
+	 * between the task_struct being duplicated and holding the
+	 * sighand lock. The seccomp state and nnp must be in sync.
+	 */
+	if (task_no_new_privs(current))
+		task_set_no_new_privs(p);
+
+	/*
+	 * If the parent gained a seccomp mode after copying thread
+	 * flags and between before we held the sighand lock, we have
+	 * to manually enable the seccomp thread flag here.
+	 */
+	if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
+		set_tsk_thread_flag(p, TIF_SECCOMP);
+#endif
+}
+
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
 	current->clear_child_tid = tidptr;
@@ -1205,7 +1247,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto fork_out;
 
 	ftrace_graph_init_task(p);
-	get_seccomp_filter(p);
 
 	rt_mutex_init_task(p);
 
@@ -1447,6 +1488,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	spin_lock(&current->sighand->siglock);
 
+	/*
+	 * Copy seccomp details explicitly here, in case they were changed
+	 * before holding sighand lock.
+	 */
+	copy_seccomp(p);
+
 	/*
 	 * Process group and session signals need to be delivered to just the
 	 * parent before the fork or both the parent and the child after the
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5737aafa6c2f..616e109ccd2e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -224,6 +224,8 @@ static u32 seccomp_run_filters(int syscall)
 
 static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 {
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
 		return false;
 
@@ -232,6 +234,8 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 
 static inline void seccomp_assign_mode(unsigned long seccomp_mode)
 {
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	current->seccomp.mode = seccomp_mode;
 	set_tsk_thread_flag(current, TIF_SECCOMP);
 }
@@ -346,6 +350,8 @@ seccomp_prepare_user_filter(const char __user *user_filter)
  * @flags:  flags to change filter behavior
  * @filter: seccomp filter to add to the current process
  *
+ * Caller must be holding current->sighand->siglock lock.
+ *
  * Returns 0 on success, -ve on error.
  */
 static long seccomp_attach_filter(unsigned int flags,
@@ -354,6 +360,8 @@ static long seccomp_attach_filter(unsigned int flags,
 	unsigned long total_insns;
 	struct seccomp_filter *walker;
 
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	/* Validate resulting filter length. */
 	total_insns = filter->prog->len;
 	for (walker = current->seccomp.filter; walker; walker = walker->prev)
@@ -543,6 +551,8 @@ static long seccomp_set_mode_strict(void)
 	const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
 	long ret = -EINVAL;
 
+	spin_lock_irq(&current->sighand->siglock);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -553,6 +563,7 @@ static long seccomp_set_mode_strict(void)
 	ret = 0;
 
 out:
+	spin_unlock_irq(&current->sighand->siglock);
 
 	return ret;
 }
@@ -580,13 +591,15 @@ static long seccomp_set_mode_filter(unsigned int flags,
 
 	/* Validate flags. */
 	if (flags != 0)
-		goto out;
+		return -EINVAL;
 
 	/* Prepare the new filter before holding any locks. */
 	prepared = seccomp_prepare_user_filter(filter);
 	if (IS_ERR(prepared))
 		return PTR_ERR(prepared);
 
+	spin_lock_irq(&current->sighand->siglock);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -598,6 +611,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 
 	seccomp_assign_mode(seccomp_mode);
 out:
+	spin_unlock_irq(&current->sighand->siglock);
 	seccomp_filter_free(prepared);
 	return ret;
 }

From d92472791131c6809f21142a8bf75c72cff6ee19 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 27 Jun 2014 15:01:35 -0700
Subject: [PATCH 0194/1185] seccomp: allow mode setting across threads

This changes the mode setting helper to allow threads to change the
seccomp mode from another thread. We must maintain barriers to keep
TIF_SECCOMP synchronized with the rest of the seccomp state.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Conflicts:
	kernel/seccomp.c

Change-Id: I091ffa55d8f4e83ff02558a55e2b4dc76ac26905
---
 kernel/seccomp.c | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 616e109ccd2e..1d25f3cdc1be 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -202,19 +202,26 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  */
 static u32 seccomp_run_filters(int syscall)
 {
-	struct seccomp_filter *f;
+	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
+	struct seccomp_data sd;
 	u32 ret = SECCOMP_RET_ALLOW;
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
-	if (WARN_ON(current->seccomp.filter == NULL))
+	if (unlikely(WARN_ON(f == NULL)))
 		return SECCOMP_RET_KILL;
 
+	/* Make sure cross-thread synced filter points somewhere sane. */
+	smp_read_barrier_depends();
+
+	populate_seccomp_data(&sd);
+
 	/*
 	 * All filters in the list are evaluated and the lowest BPF return
 	 * value always takes priority (ignoring the DATA).
 	 */
-	for (f = current->seccomp.filter; f; f = f->prev) {
-		u32 cur_ret = sk_run_filter(NULL, f->insns);
+	for (; f; f = f->prev) {
+		u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
+
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
 	}
@@ -232,12 +239,18 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 	return true;
 }
 
-static inline void seccomp_assign_mode(unsigned long seccomp_mode)
+static inline void seccomp_assign_mode(struct task_struct *task,
+				       unsigned long seccomp_mode)
 {
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+	BUG_ON(!spin_is_locked(&task->sighand->siglock));
 
-	current->seccomp.mode = seccomp_mode;
-	set_tsk_thread_flag(current, TIF_SECCOMP);
+	task->seccomp.mode = seccomp_mode;
+	/*
+	 * Make sure TIF_SECCOMP cannot be set before the mode (and
+	 * filter) is set.
+	 */
+	smp_mb__before_atomic();
+	set_tsk_thread_flag(task, TIF_SECCOMP);
 }
 
 #ifdef CONFIG_SECCOMP_FILTER
@@ -449,12 +462,17 @@ static int mode1_syscalls_32[] = {
 
 int __secure_computing(int this_syscall)
 {
-	int mode = current->seccomp.mode;
 	int exit_sig = 0;
 	int *syscall;
 	u32 ret;
 
-	switch (mode) {
+	/*
+	 * Make sure that any changes to mode from another thread have
+	 * been seen after TIF_SECCOMP was seen.
+	 */
+	rmb();
+
+	switch (current->seccomp.mode) {
 	case SECCOMP_MODE_STRICT:
 		syscall = mode1_syscalls;
 #ifdef CONFIG_COMPAT
@@ -559,7 +577,7 @@ static long seccomp_set_mode_strict(void)
 #ifdef TIF_NOTSC
 	disable_TSC();
 #endif
-	seccomp_assign_mode(seccomp_mode);
+	seccomp_assign_mode(current, seccomp_mode);
 	ret = 0;
 
 out:
@@ -609,7 +627,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	/* Do not free the successfully attached filter. */
 	prepared = NULL;
 
-	seccomp_assign_mode(seccomp_mode);
+	seccomp_assign_mode(current, seccomp_mode);
 out:
 	spin_unlock_irq(&current->sighand->siglock);
 	seccomp_filter_free(prepared);

From 81ff7fa232f4d404346ba6a22ac70f7e3385c632 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 5 Jun 2014 00:23:17 -0700
Subject: [PATCH 0195/1185] seccomp: implement SECCOMP_FILTER_FLAG_TSYNC

Applying restrictive seccomp filter programs to large or diverse
codebases often requires handling threads which may be started early in
the process lifetime (e.g., by code that is linked in). While it is
possible to apply permissive programs prior to process start up, it is
difficult to further restrict the kernel ABI to those threads after that
point.

This change adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER for
synchronizing thread group seccomp filters at filter installation time.

When calling seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
filter) an attempt will be made to synchronize all threads in current's
threadgroup to its new seccomp filter program. This is possible iff all
threads are using a filter that is an ancestor to the filter current is
attempting to synchronize to. NULL filters (where the task is running as
SECCOMP_MODE_NONE) are also treated as ancestors allowing threads to be
transitioned into SECCOMP_MODE_FILTER. If prctrl(PR_SET_NO_NEW_PRIVS,
...) has been set on the calling thread, no_new_privs will be set for
all synchronized threads too. On success, 0 is returned. On failure,
the pid of one of the failing threads will be returned and no filters
will have been applied.

The race conditions against another thread are:
- requesting TSYNC (already handled by sighand lock)
- performing a clone (already handled by sighand lock)
- changing its filter (already handled by sighand lock)
- calling exec (handled by cred_guard_mutex)
The clone case is assisted by the fact that new threads will have their
seccomp state duplicated from their parent before appearing on the tasklist.

Holding cred_guard_mutex means that seccomp filters cannot be assigned
while in the middle of another thread's exec (potentially bypassing
no_new_privs or similar). The call to de_thread() may kill threads waiting
for the mutex.

Changes across threads to the filter pointer includes a barrier.

Based on patches by Will Drewry.

Suggested-by: Julien Tinnes <jln@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 fs/exec.c                    |   2 +-
 include/linux/seccomp.h      |   2 +
 include/uapi/linux/seccomp.h |   3 +
 kernel/seccomp.c             | 135 ++++++++++++++++++++++++++++++++++-
 4 files changed, 140 insertions(+), 2 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index ffd7a813ad3d..b331086ace95 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds);
 /*
  * determine how safe it is to execute the proposed program
  * - the caller must hold ->cred_guard_mutex to protect against
- *   PTRACE_ATTACH
+ *   PTRACE_ATTACH or seccomp thread-sync
  */
 static int check_unsafe_exec(struct linux_binprm *bprm)
 {
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 9ab63a574d40..9687691799ff 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,6 +3,8 @@
 
 #include <uapi/linux/seccomp.h>
 
+#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC)
+
 #ifdef CONFIG_SECCOMP
 
 #include <linux/thread_info.h>
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index b258878ba754..0f238a43ff1e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -14,6 +14,9 @@
 #define SECCOMP_SET_MODE_STRICT	0
 #define SECCOMP_SET_MODE_FILTER	1
 
+/* Valid flags for SECCOMP_SET_MODE_FILTER */
+#define SECCOMP_FILTER_FLAG_TSYNC	1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 1d25f3cdc1be..2d13b264d850 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -26,6 +26,7 @@
 #ifdef CONFIG_SECCOMP_FILTER
 #include <asm/syscall.h>
 #include <linux/filter.h>
+#include <linux/pid.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
 #include <linux/tracehook.h>
@@ -254,6 +255,114 @@ static inline void seccomp_assign_mode(struct task_struct *task,
 }
 
 #ifdef CONFIG_SECCOMP_FILTER
+/* Returns 1 if the parent is an ancestor of the child. */
+static int is_ancestor(struct seccomp_filter *parent,
+		       struct seccomp_filter *child)
+{
+	/* NULL is the root ancestor. */
+	if (parent == NULL)
+		return 1;
+	for (; child; child = child->prev)
+		if (child == parent)
+			return 1;
+	return 0;
+}
+
+/**
+ * seccomp_can_sync_threads: checks if all threads can be synchronized
+ *
+ * Expects sighand and cred_guard_mutex locks to be held.
+ *
+ * Returns 0 on success, -ve on error, or the pid of a thread which was
+ * either not in the correct seccomp mode or it did not have an ancestral
+ * seccomp filter.
+ */
+static inline pid_t seccomp_can_sync_threads(void)
+{
+	struct task_struct *thread, *caller;
+
+	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Validate all threads being eligible for synchronization. */
+	caller = current;
+	for_each_thread(caller, thread) {
+		pid_t failed;
+
+		/* Skip current, since it is initiating the sync. */
+		if (thread == caller)
+			continue;
+
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
+		    (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
+		     is_ancestor(thread->seccomp.filter,
+				 caller->seccomp.filter)))
+			continue;
+
+		/* Return the first thread that cannot be synchronized. */
+		failed = task_pid_vnr(thread);
+		/* If the pid cannot be resolved, then return -ESRCH */
+		if (unlikely(WARN_ON(failed == 0)))
+			failed = -ESRCH;
+		return failed;
+	}
+
+	return 0;
+}
+
+/**
+ * seccomp_sync_threads: sets all threads to use current's filter
+ *
+ * Expects sighand and cred_guard_mutex locks to be held, and for
+ * seccomp_can_sync_threads() to have returned success already
+ * without dropping the locks.
+ *
+ */
+static inline void seccomp_sync_threads(void)
+{
+	struct task_struct *thread, *caller;
+
+	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Synchronize all threads. */
+	caller = current;
+	for_each_thread(caller, thread) {
+		/* Skip current, since it needs no changes. */
+		if (thread == caller)
+			continue;
+
+		/* Get a task reference for the new leaf node. */
+		get_seccomp_filter(caller);
+		/*
+		 * Drop the task reference to the shared ancestor since
+		 * current's path will hold a reference.  (This also
+		 * allows a put before the assignment.)
+		 */
+		put_seccomp_filter(thread);
+		smp_store_release(&thread->seccomp.filter,
+				  caller->seccomp.filter);
+		/*
+		 * Opt the other thread into seccomp if needed.
+		 * As threads are considered to be trust-realm
+		 * equivalent (see ptrace_may_access), it is safe to
+		 * allow one thread to transition the other.
+		 */
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
+			/*
+			 * Don't let an unprivileged task work around
+			 * the no_new_privs restriction by creating
+			 * a thread that sets it up, enters seccomp,
+			 * then dies.
+			 */
+			if (task_no_new_privs(caller))
+				task_set_no_new_privs(thread);
+
+			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+		}
+	}
+}
+
 /**
  * seccomp_prepare_filter: Prepares a seccomp filter for use.
  * @fprog: BPF program to install
@@ -382,6 +491,15 @@ static long seccomp_attach_filter(unsigned int flags,
 	if (total_insns > MAX_INSNS_PER_PATH)
 		return -ENOMEM;
 
+	/* If thread sync has been requested, check that it is possible. */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
+		int ret;
+
+		ret = seccomp_can_sync_threads();
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
 	 * task reference.
@@ -389,6 +507,10 @@ static long seccomp_attach_filter(unsigned int flags,
 	filter->prev = current->seccomp.filter;
 	current->seccomp.filter = filter;
 
+	/* Now that the new filter is in place, synchronize to all threads. */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+		seccomp_sync_threads();
+
 	return 0;
 }
 
@@ -608,7 +730,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	long ret = -EINVAL;
 
 	/* Validate flags. */
-	if (flags != 0)
+	if (flags & ~SECCOMP_FILTER_FLAG_MASK)
 		return -EINVAL;
 
 	/* Prepare the new filter before holding any locks. */
@@ -616,6 +738,14 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	if (IS_ERR(prepared))
 		return PTR_ERR(prepared);
 
+	/*
+	 * Make sure we cannot change seccomp or nnp state via TSYNC
+	 * while another thread is in the middle of calling exec.
+	 */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
+	    mutex_lock_killable(&current->signal->cred_guard_mutex))
+		goto out_free;
+
 	spin_lock_irq(&current->sighand->siglock);
 
 	if (!seccomp_may_assign_mode(seccomp_mode))
@@ -630,6 +760,9 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	seccomp_assign_mode(current, seccomp_mode);
 out:
 	spin_unlock_irq(&current->sighand->siglock);
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+		mutex_unlock(&current->signal->cred_guard_mutex);
+out_free:
 	seccomp_filter_free(prepared);
 	return ret;
 }

From f56b1aa3553e8e75178a8f61f13d6d1ab896143c Mon Sep 17 00:00:00 2001
From: JP Abgrall <jpa@google.com>
Date: Wed, 27 Aug 2014 20:30:29 -0700
Subject: [PATCH 0196/1185] arm: fixup NR_syscalls to accommodate the new
 seccomp syscall

This belongs in
  commit: 83f1ccba87b06575966b65352db565c363af7bcf
  https://android-review.googlesource.com/#/c/104520

Change-Id: Id5037cbebac9b86c863da79c3b8729e627e65f8e
Signed-off-by: JP Abgrall <jpa@google.com>
---
 arch/arm/include/asm/unistd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 141baa3f9a72..acabef1a75df 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include <uapi/asm/unistd.h>
 
-#define __NR_syscalls  (380)
+#define __NR_syscalls  (384)
 #define __ARM_NR_cmpxchg		(__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64

From 42db8d1ff86afb45a5f1d4930e43042bd3c448bc Mon Sep 17 00:00:00 2001
From: JP Abgrall <jpa@google.com>
Date: Wed, 27 Aug 2014 19:07:30 -0700
Subject: [PATCH 0197/1185] arm64/crypto: SHA-1 using ARMv8 Crypto Extensions

This patch adds support for the SHA-1 Secure Hash Algorithm for CPUs that
have support for the SHA-1 part of the ARM v8 Crypto Extensions.

Change-Id: I29fafd308e17aff6e0d59938c106fae6ad7fe78e
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>

Conflicts:
	arch/arm64/Makefile
---
 arch/arm64/Kconfig               |   3 +
 arch/arm64/Makefile              |   1 +
 arch/arm64/crypto/Kconfig        |  16 +++
 arch/arm64/crypto/Makefile       |  12 +++
 arch/arm64/crypto/sha1-ce-core.S | 153 +++++++++++++++++++++++++++
 arch/arm64/crypto/sha1-ce-glue.c | 174 +++++++++++++++++++++++++++++++
 6 files changed, 359 insertions(+)
 create mode 100644 arch/arm64/crypto/Kconfig
 create mode 100644 arch/arm64/crypto/Makefile
 create mode 100644 arch/arm64/crypto/sha1-ce-core.S
 create mode 100644 arch/arm64/crypto/sha1-ce-glue.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 02f4d1cfd7b3..8832ac02ffef 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -273,5 +273,8 @@ source "arch/arm64/Kconfig.debug"
 source "security/Kconfig"
 
 source "crypto/Kconfig"
+if CRYPTO
+source "arch/arm64/crypto/Kconfig"
+endif
 
 source "lib/Kconfig"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index d69354dbd789..fb8c8f0fbf4f 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -37,6 +37,7 @@ TEXT_OFFSET := 0x00080000
 export	TEXT_OFFSET GZFLAGS
 
 core-y		+= arch/arm64/kernel/ arch/arm64/mm/
+core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
 libs-y		:= arch/arm64/lib/ $(libs-y)
 libs-y		+= $(LIBGCC)
 
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
new file mode 100644
index 000000000000..7956881b5986
--- /dev/null
+++ b/arch/arm64/crypto/Kconfig
@@ -0,0 +1,16 @@
+
+menuconfig ARM64_CRYPTO
+	bool "ARM64 Accelerated Cryptographic Algorithms"
+	depends on ARM64
+	help
+	  Say Y here to choose from a selection of cryptographic algorithms
+	  implemented using ARM64 specific CPU features or instructions.
+
+if ARM64_CRYPTO
+
+config CRYPTO_SHA1_ARM64_CE
+	tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_HASH
+
+endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
new file mode 100644
index 000000000000..0ed3caaec81b
--- /dev/null
+++ b/arch/arm64/crypto/Makefile
@@ -0,0 +1,12 @@
+#
+# linux/arch/arm64/crypto/Makefile
+#
+# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
+sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
new file mode 100644
index 000000000000..09d57d98609c
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -0,0 +1,153 @@
+/*
+ * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.arch		armv8-a+crypto
+
+	k0		.req	v0
+	k1		.req	v1
+	k2		.req	v2
+	k3		.req	v3
+
+	t0		.req	v4
+	t1		.req	v5
+
+	dga		.req	q6
+	dgav		.req	v6
+	dgb		.req	s7
+	dgbv		.req	v7
+
+	dg0q		.req	q12
+	dg0s		.req	s12
+	dg0v		.req	v12
+	dg1s		.req	s13
+	dg1v		.req	v13
+	dg2s		.req	s14
+
+	.macro		add_only, op, ev, rc, s0, dg1
+	.ifc		\ev, ev
+	add		t1.4s, v\s0\().4s, \rc\().4s
+	sha1h		dg2s, dg0s
+	.ifnb		\dg1
+	sha1\op		dg0q, \dg1, t0.4s
+	.else
+	sha1\op		dg0q, dg1s, t0.4s
+	.endif
+	.else
+	.ifnb		\s0
+	add		t0.4s, v\s0\().4s, \rc\().4s
+	.endif
+	sha1h		dg1s, dg0s
+	sha1\op		dg0q, dg2s, t1.4s
+	.endif
+	.endm
+
+	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
+	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
+	add_only	\op, \ev, \rc, \s1, \dg1
+	sha1su1		v\s0\().4s, v\s3\().4s
+	.endm
+
+	/*
+	 * The SHA1 round constants
+	 */
+	.align		4
+.Lsha1_rcon:
+	.word		0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
+
+	/*
+	 * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+	 * 			  u8 *head, long bytes)
+	 */
+ENTRY(sha1_ce_transform)
+	/* load round constants */
+	adr		x6, .Lsha1_rcon
+	ld1r		{k0.4s}, [x6], #4
+	ld1r		{k1.4s}, [x6], #4
+	ld1r		{k2.4s}, [x6], #4
+	ld1r		{k3.4s}, [x6]
+
+	/* load state */
+	ldr		dga, [x2]
+	ldr		dgb, [x2, #16]
+
+	/* load partial state (if supplied) */
+	cbz		x3, 0f
+	ld1		{v8.4s-v11.4s}, [x3]
+	b		1f
+
+	/* load input */
+0:	ld1		{v8.4s-v11.4s}, [x1], #64
+	sub		w0, w0, #1
+
+1:
+CPU_LE(	rev32		v8.16b, v8.16b		)
+CPU_LE(	rev32		v9.16b, v9.16b		)
+CPU_LE(	rev32		v10.16b, v10.16b	)
+CPU_LE(	rev32		v11.16b, v11.16b	)
+
+2:	add		t0.4s, v8.4s, k0.4s
+	mov		dg0v.16b, dgav.16b
+
+	add_update	c, ev, k0,  8,  9, 10, 11, dgb
+	add_update	c, od, k0,  9, 10, 11,  8
+	add_update	c, ev, k0, 10, 11,  8,  9
+	add_update	c, od, k0, 11,  8,  9, 10
+	add_update	c, ev, k1,  8,  9, 10, 11
+
+	add_update	p, od, k1,  9, 10, 11,  8
+	add_update	p, ev, k1, 10, 11,  8,  9
+	add_update	p, od, k1, 11,  8,  9, 10
+	add_update	p, ev, k1,  8,  9, 10, 11
+	add_update	p, od, k2,  9, 10, 11,  8
+
+	add_update	m, ev, k2, 10, 11,  8,  9
+	add_update	m, od, k2, 11,  8,  9, 10
+	add_update	m, ev, k2,  8,  9, 10, 11
+	add_update	m, od, k2,  9, 10, 11,  8
+	add_update	m, ev, k3, 10, 11,  8,  9
+
+	add_update	p, od, k3, 11,  8,  9, 10
+	add_only	p, ev, k3,  9
+	add_only	p, od, k3, 10
+	add_only	p, ev, k3, 11
+	add_only	p, od
+
+	/* update state */
+	add		dgbv.2s, dgbv.2s, dg1v.2s
+	add		dgav.4s, dgav.4s, dg0v.4s
+
+	cbnz		w0, 0b
+
+	/*
+	 * Final block: add padding and total bit count.
+	 * Skip if we have no total byte count in x4. In that case, the input
+	 * size was not a round multiple of the block size, and the padding is
+	 * handled by the C code.
+	 */
+	cbz		x4, 3f
+	movi		v9.2d, #0
+	mov		x8, #0x80000000
+	movi		v10.2d, #0
+	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
+	fmov		d8, x8
+	mov		x4, #0
+	mov		v11.d[0], xzr
+	mov		v11.d[1], x7
+	b		2b
+
+	/* store new state */
+3:	str		dga, [x2]
+	str		dgb, [x2, #16]
+	ret
+ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
new file mode 100644
index 000000000000..6fe83f37a750
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -0,0 +1,174 @@
+/*
+ * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+				  u8 *head, long bytes);
+
+static int sha1_init(struct shash_desc *desc)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha1_state){
+		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+	};
+	return 0;
+}
+
+static int sha1_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+
+	sctx->count += len;
+
+	if ((partial + len) >= SHA1_BLOCK_SIZE) {
+		int blocks;
+
+		if (partial) {
+			int p = SHA1_BLOCK_SIZE - partial;
+
+			memcpy(sctx->buffer + partial, data, p);
+			data += p;
+			len -= p;
+		}
+
+		blocks = len / SHA1_BLOCK_SIZE;
+		len %= SHA1_BLOCK_SIZE;
+
+		kernel_neon_begin_partial(16);
+		sha1_ce_transform(blocks, data, sctx->state,
+				  partial ? sctx->buffer : NULL, 0);
+		kernel_neon_end();
+
+		data += blocks * SHA1_BLOCK_SIZE;
+		partial = 0;
+	}
+	if (len)
+		memcpy(sctx->buffer + partial, data, len);
+	return 0;
+}
+
+static int sha1_final(struct shash_desc *desc, u8 *out)
+{
+	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	__be64 bits = cpu_to_be64(sctx->count << 3);
+	__be32 *dst = (__be32 *)out;
+	int i;
+
+	u32 padlen = SHA1_BLOCK_SIZE
+		     - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
+
+	sha1_update(desc, padding, padlen);
+	sha1_update(desc, (const u8 *)&bits, sizeof(bits));
+
+	for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha1_state){};
+	return 0;
+}
+
+static int sha1_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	int blocks;
+	int i;
+
+	if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
+		sha1_update(desc, data, len);
+		return sha1_final(desc, out);
+	}
+
+	/*
+	 * Use a fast path if the input is a multiple of 64 bytes. In
+	 * this case, there is no need to copy data around, and we can
+	 * perform the entire digest calculation in a single invocation
+	 * of sha1_ce_transform()
+	 */
+	blocks = len / SHA1_BLOCK_SIZE;
+
+	kernel_neon_begin_partial(16);
+	sha1_ce_transform(blocks, data, sctx->state, NULL, len);
+	kernel_neon_end();
+
+	for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha1_state){};
+	return 0;
+}
+
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	struct sha1_state *dst = out;
+
+	*dst = *sctx;
+	return 0;
+}
+
+static int sha1_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	struct sha1_state const *src = in;
+
+	*sctx = *src;
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.init			= sha1_init,
+	.update			= sha1_update,
+	.final			= sha1_final,
+	.finup			= sha1_finup,
+	.export			= sha1_export,
+	.import			= sha1_import,
+	.descsize		= sizeof(struct sha1_state),
+	.digestsize		= SHA1_DIGEST_SIZE,
+	.statesize		= sizeof(struct sha1_state),
+	.base			= {
+		.cra_name		= "sha1",
+		.cra_driver_name	= "sha1-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+};
+
+static int __init sha1_ce_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_ce_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(SHA1, sha1_ce_mod_init);
+module_exit(sha1_ce_mod_fini);

From 77278cdb9d7f93f00c0b43b544daf73b755e2dcd Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 20 Mar 2014 15:35:40 +0100
Subject: [PATCH 0198/1185] arm64/crypto: SHA-224/SHA-256 using ARMv8 Crypto
 Extensions

This patch adds support for the SHA-224 and SHA-256 Secure Hash Algorithms
for CPUs that have support for the SHA-2 part of the ARM v8 Crypto Extensions.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig        |   5 +
 arch/arm64/crypto/Makefile       |   3 +
 arch/arm64/crypto/sha2-ce-core.S | 156 +++++++++++++++++++
 arch/arm64/crypto/sha2-ce-glue.c | 255 +++++++++++++++++++++++++++++++
 4 files changed, 419 insertions(+)
 create mode 100644 arch/arm64/crypto/sha2-ce-core.S
 create mode 100644 arch/arm64/crypto/sha2-ce-glue.c

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 7956881b5986..eb1e99770c21 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -13,4 +13,9 @@ config CRYPTO_SHA1_ARM64_CE
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_HASH
 
+config CRYPTO_SHA2_ARM64_CE
+	tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_HASH
+
 endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 0ed3caaec81b..0b3885a60d43 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -10,3 +10,6 @@
 
 obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
 sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
+
+obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
+sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
new file mode 100644
index 000000000000..7f29fc031ea8
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -0,0 +1,156 @@
+/*
+ * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.arch		armv8-a+crypto
+
+	dga		.req	q20
+	dgav		.req	v20
+	dgb		.req	q21
+	dgbv		.req	v21
+
+	t0		.req	v22
+	t1		.req	v23
+
+	dg0q		.req	q24
+	dg0v		.req	v24
+	dg1q		.req	q25
+	dg1v		.req	v25
+	dg2q		.req	q26
+	dg2v		.req	v26
+
+	.macro		add_only, ev, rc, s0
+	mov		dg2v.16b, dg0v.16b
+	.ifeq		\ev
+	add		t1.4s, v\s0\().4s, \rc\().4s
+	sha256h		dg0q, dg1q, t0.4s
+	sha256h2	dg1q, dg2q, t0.4s
+	.else
+	.ifnb		\s0
+	add		t0.4s, v\s0\().4s, \rc\().4s
+	.endif
+	sha256h		dg0q, dg1q, t1.4s
+	sha256h2	dg1q, dg2q, t1.4s
+	.endif
+	.endm
+
+	.macro		add_update, ev, rc, s0, s1, s2, s3
+	sha256su0	v\s0\().4s, v\s1\().4s
+	add_only	\ev, \rc, \s1
+	sha256su1	v\s0\().4s, v\s2\().4s, v\s3\().4s
+	.endm
+
+	/*
+	 * The SHA-256 round constants
+	 */
+	.align		4
+.Lsha2_rcon:
+	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+	.word		0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+	.word		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+	.word		0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+	.word		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+	.word		0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+	.word		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+	.word		0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+	.word		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+	.word		0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+	.word		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+	.word		0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+	.word		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+	.word		0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+	.word		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+	.word		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+	/*
+	 * void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+	 *                        u8 *head, long bytes)
+	 */
+ENTRY(sha2_ce_transform)
+	/* load round constants */
+	adr		x8, .Lsha2_rcon
+	ld1		{ v0.4s- v3.4s}, [x8], #64
+	ld1		{ v4.4s- v7.4s}, [x8], #64
+	ld1		{ v8.4s-v11.4s}, [x8], #64
+	ld1		{v12.4s-v15.4s}, [x8]
+
+	/* load state */
+	ldp		dga, dgb, [x2]
+
+	/* load partial input (if supplied) */
+	cbz		x3, 0f
+	ld1		{v16.4s-v19.4s}, [x3]
+	b		1f
+
+	/* load input */
+0:	ld1		{v16.4s-v19.4s}, [x1], #64
+	sub		w0, w0, #1
+
+1:
+CPU_LE(	rev32		v16.16b, v16.16b	)
+CPU_LE(	rev32		v17.16b, v17.16b	)
+CPU_LE(	rev32		v18.16b, v18.16b	)
+CPU_LE(	rev32		v19.16b, v19.16b	)
+
+2:	add		t0.4s, v16.4s, v0.4s
+	mov		dg0v.16b, dgav.16b
+	mov		dg1v.16b, dgbv.16b
+
+	add_update	0,  v1, 16, 17, 18, 19
+	add_update	1,  v2, 17, 18, 19, 16
+	add_update	0,  v3, 18, 19, 16, 17
+	add_update	1,  v4, 19, 16, 17, 18
+
+	add_update	0,  v5, 16, 17, 18, 19
+	add_update	1,  v6, 17, 18, 19, 16
+	add_update	0,  v7, 18, 19, 16, 17
+	add_update	1,  v8, 19, 16, 17, 18
+
+	add_update	0,  v9, 16, 17, 18, 19
+	add_update	1, v10, 17, 18, 19, 16
+	add_update	0, v11, 18, 19, 16, 17
+	add_update	1, v12, 19, 16, 17, 18
+
+	add_only	0, v13, 17
+	add_only	1, v14, 18
+	add_only	0, v15, 19
+	add_only	1
+
+	/* update state */
+	add		dgav.4s, dgav.4s, dg0v.4s
+	add		dgbv.4s, dgbv.4s, dg1v.4s
+
+	/* handled all input blocks? */
+	cbnz		w0, 0b
+
+	/*
+	 * Final block: add padding and total bit count.
+	 * Skip if we have no total byte count in x4. In that case, the input
+	 * size was not a round multiple of the block size, and the padding is
+	 * handled by the C code.
+	 */
+	cbz		x4, 3f
+	movi		v17.2d, #0
+	mov		x8, #0x80000000
+	movi		v18.2d, #0
+	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
+	fmov		d16, x8
+	mov		x4, #0
+	mov		v19.d[0], xzr
+	mov		v19.d[1], x7
+	b		2b
+
+	/* store new state */
+3:	stp		dga, dgb, [x2]
+	ret
+ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
new file mode 100644
index 000000000000..c294e67d3925
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -0,0 +1,255 @@
+/*
+ * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+				 u8 *head, long bytes);
+
+static int sha224_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha256_state){
+		.state = {
+			SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
+			SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
+		}
+	};
+	return 0;
+}
+
+static int sha256_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha256_state){
+		.state = {
+			SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+			SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
+		}
+	};
+	return 0;
+}
+
+static int sha2_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+	sctx->count += len;
+
+	if ((partial + len) >= SHA256_BLOCK_SIZE) {
+		int blocks;
+
+		if (partial) {
+			int p = SHA256_BLOCK_SIZE - partial;
+
+			memcpy(sctx->buf + partial, data, p);
+			data += p;
+			len -= p;
+		}
+
+		blocks = len / SHA256_BLOCK_SIZE;
+		len %= SHA256_BLOCK_SIZE;
+
+		kernel_neon_begin_partial(28);
+		sha2_ce_transform(blocks, data, sctx->state,
+				  partial ? sctx->buf : NULL, 0);
+		kernel_neon_end();
+
+		data += blocks * SHA256_BLOCK_SIZE;
+		partial = 0;
+	}
+	if (len)
+		memcpy(sctx->buf + partial, data, len);
+	return 0;
+}
+
+static void sha2_final(struct shash_desc *desc)
+{
+	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	__be64 bits = cpu_to_be64(sctx->count << 3);
+	u32 padlen = SHA256_BLOCK_SIZE
+		     - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
+
+	sha2_update(desc, padding, padlen);
+	sha2_update(desc, (const u8 *)&bits, sizeof(bits));
+}
+
+static int sha224_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	int i;
+
+	sha2_final(desc);
+
+	for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha256_state){};
+	return 0;
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	int i;
+
+	sha2_final(desc);
+
+	for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha256_state){};
+	return 0;
+}
+
+static void sha2_finup(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	int blocks;
+
+	if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
+		sha2_update(desc, data, len);
+		sha2_final(desc);
+		return;
+	}
+
+	/*
+	 * Use a fast path if the input is a multiple of 64 bytes. In
+	 * this case, there is no need to copy data around, and we can
+	 * perform the entire digest calculation in a single invocation
+	 * of sha2_ce_transform()
+	 */
+	blocks = len / SHA256_BLOCK_SIZE;
+
+	kernel_neon_begin_partial(28);
+	sha2_ce_transform(blocks, data, sctx->state, NULL, len);
+	kernel_neon_end();
+	data += blocks * SHA256_BLOCK_SIZE;
+}
+
+static int sha224_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	int i;
+
+	sha2_finup(desc, data, len);
+
+	for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha256_state){};
+	return 0;
+}
+
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	int i;
+
+	sha2_finup(desc, data, len);
+
+	for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
+		put_unaligned_be32(sctx->state[i], dst++);
+
+	*sctx = (struct sha256_state){};
+	return 0;
+}
+
+static int sha2_export(struct shash_desc *desc, void *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct sha256_state *dst = out;
+
+	*dst = *sctx;
+	return 0;
+}
+
+static int sha2_import(struct shash_desc *desc, const void *in)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct sha256_state const *src = in;
+
+	*sctx = *src;
+	return 0;
+}
+
+static struct shash_alg algs[] = { {
+	.init			= sha224_init,
+	.update			= sha2_update,
+	.final			= sha224_final,
+	.finup			= sha224_finup,
+	.export			= sha2_export,
+	.import			= sha2_import,
+	.descsize		= sizeof(struct sha256_state),
+	.digestsize		= SHA224_DIGEST_SIZE,
+	.statesize		= sizeof(struct sha256_state),
+	.base			= {
+		.cra_name		= "sha224",
+		.cra_driver_name	= "sha224-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA256_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+}, {
+	.init			= sha256_init,
+	.update			= sha2_update,
+	.final			= sha256_final,
+	.finup			= sha256_finup,
+	.export			= sha2_export,
+	.import			= sha2_import,
+	.descsize		= sizeof(struct sha256_state),
+	.digestsize		= SHA256_DIGEST_SIZE,
+	.statesize		= sizeof(struct sha256_state),
+	.base			= {
+		.cra_name		= "sha256",
+		.cra_driver_name	= "sha256-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA256_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+} };
+
+static int __init sha2_ce_mod_init(void)
+{
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha2_ce_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(SHA2, sha2_ce_mod_init);
+module_exit(sha2_ce_mod_fini);

From 0224264fee47605f30771a988448375a0f84b5e9 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 26 Mar 2014 20:53:05 +0100
Subject: [PATCH 0199/1185] arm64/crypto: GHASH secure hash using ARMv8 Crypto
 Extensions

This is a port to ARMv8 (Crypto Extensions) of the Intel implementation of the
GHASH Secure Hash (used in the Galois/Counter chaining mode). It relies on the
optional PMULL/PMULL2 instruction (polynomial multiply long, what Intel call
carry-less multiply).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig         |   6 ++
 arch/arm64/crypto/Makefile        |   3 +
 arch/arm64/crypto/ghash-ce-core.S |  95 ++++++++++++++++++
 arch/arm64/crypto/ghash-ce-glue.c | 155 ++++++++++++++++++++++++++++++
 4 files changed, 259 insertions(+)
 create mode 100644 arch/arm64/crypto/ghash-ce-core.S
 create mode 100644 arch/arm64/crypto/ghash-ce-glue.c

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index eb1e99770c21..0c50859ee7b9 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -18,4 +18,10 @@ config CRYPTO_SHA2_ARM64_CE
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_HASH
 
+
+config CRYPTO_GHASH_ARM64_CE
+	tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_HASH
+
 endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 0b3885a60d43..e8c81a068868 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -13,3 +13,6 @@ sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
 
 obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
 sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
+
+obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
+ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
new file mode 100644
index 000000000000..b9e6eaf41c9b
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -0,0 +1,95 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
+ *
+ * Copyright (c) 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *           Vinodh Gopal
+ *           Erdinc Ozturk
+ *           Deniz Karakoyunlu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	DATA	.req	v0
+	SHASH	.req	v1
+	IN1	.req	v2
+	T1	.req	v2
+	T2	.req	v3
+	T3	.req	v4
+	VZR	.req	v5
+
+	.text
+	.arch		armv8-a+crypto
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update)
+	ld1		{DATA.16b}, [x1]
+	ld1		{SHASH.16b}, [x3]
+	eor		VZR.16b, VZR.16b, VZR.16b
+
+	/* do the head block first, if supplied */
+	cbz		x4, 0f
+	ld1		{IN1.2d}, [x4]
+	b		1f
+
+0:	ld1		{IN1.2d}, [x2], #16
+	sub		w0, w0, #1
+1:	ext		IN1.16b, IN1.16b, IN1.16b, #8
+CPU_LE(	rev64		IN1.16b, IN1.16b	)
+	eor		DATA.16b, DATA.16b, IN1.16b
+
+	/* multiply DATA by SHASH in GF(2^128) */
+	ext		T2.16b, DATA.16b, DATA.16b, #8
+	ext		T3.16b, SHASH.16b, SHASH.16b, #8
+	eor		T2.16b, T2.16b, DATA.16b
+	eor		T3.16b, T3.16b, SHASH.16b
+
+	pmull2		T1.1q, SHASH.2d, DATA.2d	// a1 * b1
+	pmull		DATA.1q, SHASH.1d, DATA.1d	// a0 * b0
+	pmull		T2.1q, T2.1d, T3.1d		// (a1 + a0)(b1 + b0)
+	eor		T2.16b, T2.16b, T1.16b		// (a0 * b1) + (a1 * b0)
+	eor		T2.16b, T2.16b, DATA.16b
+
+	ext		T3.16b, VZR.16b, T2.16b, #8
+	ext		T2.16b, T2.16b, VZR.16b, #8
+	eor		DATA.16b, DATA.16b, T3.16b
+	eor		T1.16b, T1.16b, T2.16b	// <T1:DATA> is result of
+						// carry-less multiplication
+
+	/* first phase of the reduction */
+	shl		T3.2d, DATA.2d, #1
+	eor		T3.16b, T3.16b, DATA.16b
+	shl		T3.2d, T3.2d, #5
+	eor		T3.16b, T3.16b, DATA.16b
+	shl		T3.2d, T3.2d, #57
+	ext		T2.16b, VZR.16b, T3.16b, #8
+	ext		T3.16b, T3.16b, VZR.16b, #8
+	eor		DATA.16b, DATA.16b, T2.16b
+	eor		T1.16b, T1.16b, T3.16b
+
+	/* second phase of the reduction */
+	ushr		T2.2d, DATA.2d, #5
+	eor		T2.16b, T2.16b, DATA.16b
+	ushr		T2.2d, T2.2d, #1
+	eor		T2.16b, T2.16b, DATA.16b
+	ushr		T2.2d, T2.2d, #1
+	eor		T1.16b, T1.16b, T2.16b
+	eor		DATA.16b, DATA.16b, T1.16b
+
+	cbnz		w0, 0b
+
+	st1		{DATA.16b}, [x1]
+	ret
+ENDPROC(pmull_ghash_update)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
new file mode 100644
index 000000000000..b92baf3f68c7
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -0,0 +1,155 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+#define GHASH_BLOCK_SIZE	16
+#define GHASH_DIGEST_SIZE	16
+
+struct ghash_key {
+	u64 a;
+	u64 b;
+};
+
+struct ghash_desc_ctx {
+	u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
+	u8 buf[GHASH_BLOCK_SIZE];
+	u32 count;
+};
+
+asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+				   struct ghash_key const *k, const char *head);
+
+static int ghash_init(struct shash_desc *desc)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*ctx = (struct ghash_desc_ctx){};
+	return 0;
+}
+
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+			unsigned int len)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+	ctx->count += len;
+
+	if ((partial + len) >= GHASH_BLOCK_SIZE) {
+		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+		int blocks;
+
+		if (partial) {
+			int p = GHASH_BLOCK_SIZE - partial;
+
+			memcpy(ctx->buf + partial, src, p);
+			src += p;
+			len -= p;
+		}
+
+		blocks = len / GHASH_BLOCK_SIZE;
+		len %= GHASH_BLOCK_SIZE;
+
+		kernel_neon_begin_partial(6);
+		pmull_ghash_update(blocks, ctx->digest, src, key,
+				   partial ? ctx->buf : NULL);
+		kernel_neon_end();
+		src += blocks * GHASH_BLOCK_SIZE;
+	}
+	if (len)
+		memcpy(ctx->buf + partial, src, len);
+	return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+	if (partial) {
+		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+
+		kernel_neon_begin_partial(6);
+		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
+		kernel_neon_end();
+	}
+	put_unaligned_be64(ctx->digest[1], dst);
+	put_unaligned_be64(ctx->digest[0], dst + 8);
+
+	*ctx = (struct ghash_desc_ctx){};
+	return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *inkey, unsigned int keylen)
+{
+	struct ghash_key *key = crypto_shash_ctx(tfm);
+	u64 a, b;
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	/* perform multiplication by 'x' in GF(2^128) */
+	b = get_unaligned_be64(inkey);
+	a = get_unaligned_be64(inkey + 8);
+
+	key->a = (a << 1) | (b >> 63);
+	key->b = (b << 1) | (a >> 63);
+
+	if (b >> 63)
+		key->b ^= 0xc200000000000000UL;
+
+	return 0;
+}
+
+static struct shash_alg ghash_alg = {
+	.digestsize	= GHASH_DIGEST_SIZE,
+	.init		= ghash_init,
+	.update		= ghash_update,
+	.final		= ghash_final,
+	.setkey		= ghash_setkey,
+	.descsize	= sizeof(struct ghash_desc_ctx),
+	.base		= {
+		.cra_name		= "ghash",
+		.cra_driver_name	= "ghash-ce",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= GHASH_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct ghash_key),
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init ghash_ce_mod_init(void)
+{
+	return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_ce_mod_exit(void)
+{
+	crypto_unregister_shash(&ghash_alg);
+}
+
+module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_exit(ghash_ce_mod_exit);

From b42ec6c2c7a7ea0ccfcdb580eeb3806311b3479b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 5 Feb 2014 18:13:38 +0100
Subject: [PATCH 0200/1185] arm64/crypto: AES using ARMv8 Crypto Extensions

This patch adds support for the AES symmetric encryption algorithm for CPUs
that have support for the AES part of the ARM v8 Crypto Extensions.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig         |   7 +-
 arch/arm64/crypto/Makefile        |   3 +
 arch/arm64/crypto/aes-ce-cipher.c | 155 ++++++++++++++++++++++++++++++
 3 files changed, 164 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/crypto/aes-ce-cipher.c

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 0c50859ee7b9..9ba32c0da871 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -18,10 +18,15 @@ config CRYPTO_SHA2_ARM64_CE
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_HASH
 
-
 config CRYPTO_GHASH_ARM64_CE
 	tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_HASH
 
+config CRYPTO_AES_ARM64_CE
+	tristate "AES core cipher using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_AES
+
 endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index e8c81a068868..908abd9242b1 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -16,3 +16,6 @@ sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
 
 obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
 ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
+CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
new file mode 100644
index 000000000000..2075e1acae6b
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -0,0 +1,155 @@
+/*
+ * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+struct aes_block {
+	u8 b[AES_BLOCK_SIZE];
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aes_block *out = (struct aes_block *)dst;
+	struct aes_block const *in = (struct aes_block *)src;
+	void *dummy0;
+	int dummy1;
+
+	kernel_neon_begin_partial(4);
+
+	__asm__("	ld1	{v0.16b}, %[in]			;"
+		"	ld1	{v1.2d}, [%[key]], #16		;"
+		"	cmp	%w[rounds], #10			;"
+		"	bmi	0f				;"
+		"	bne	3f				;"
+		"	mov	v3.16b, v1.16b			;"
+		"	b	2f				;"
+		"0:	mov	v2.16b, v1.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"1:	aese	v0.16b, v2.16b			;"
+		"	aesmc	v0.16b, v0.16b			;"
+		"2:	ld1	{v1.2d}, [%[key]], #16		;"
+		"	aese	v0.16b, v3.16b			;"
+		"	aesmc	v0.16b, v0.16b			;"
+		"3:	ld1	{v2.2d}, [%[key]], #16		;"
+		"	subs	%w[rounds], %w[rounds], #3	;"
+		"	aese	v0.16b, v1.16b			;"
+		"	aesmc	v0.16b, v0.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"	bpl	1b				;"
+		"	aese	v0.16b, v2.16b			;"
+		"	eor	v0.16b, v0.16b, v3.16b		;"
+		"	st1	{v0.16b}, %[out]		;"
+
+	:	[out]		"=Q"(*out),
+		[key]		"=r"(dummy0),
+		[rounds]	"=r"(dummy1)
+	:	[in]		"Q"(*in),
+				"1"(ctx->key_enc),
+				"2"(num_rounds(ctx) - 2)
+	:	"cc");
+
+	kernel_neon_end();
+}
+
+static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aes_block *out = (struct aes_block *)dst;
+	struct aes_block const *in = (struct aes_block *)src;
+	void *dummy0;
+	int dummy1;
+
+	kernel_neon_begin_partial(4);
+
+	__asm__("	ld1	{v0.16b}, %[in]			;"
+		"	ld1	{v1.2d}, [%[key]], #16		;"
+		"	cmp	%w[rounds], #10			;"
+		"	bmi	0f				;"
+		"	bne	3f				;"
+		"	mov	v3.16b, v1.16b			;"
+		"	b	2f				;"
+		"0:	mov	v2.16b, v1.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"1:	aesd	v0.16b, v2.16b			;"
+		"	aesimc	v0.16b, v0.16b			;"
+		"2:	ld1	{v1.2d}, [%[key]], #16		;"
+		"	aesd	v0.16b, v3.16b			;"
+		"	aesimc	v0.16b, v0.16b			;"
+		"3:	ld1	{v2.2d}, [%[key]], #16		;"
+		"	subs	%w[rounds], %w[rounds], #3	;"
+		"	aesd	v0.16b, v1.16b			;"
+		"	aesimc	v0.16b, v0.16b			;"
+		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"	bpl	1b				;"
+		"	aesd	v0.16b, v2.16b			;"
+		"	eor	v0.16b, v0.16b, v3.16b		;"
+		"	st1	{v0.16b}, %[out]		;"
+
+	:	[out]		"=Q"(*out),
+		[key]		"=r"(dummy0),
+		[rounds]	"=r"(dummy1)
+	:	[in]		"Q"(*in),
+				"1"(ctx->key_dec),
+				"2"(num_rounds(ctx) - 2)
+	:	"cc");
+
+	kernel_neon_end();
+}
+
+static struct crypto_alg aes_alg = {
+	.cra_name		= "aes",
+	.cra_driver_name	= "aes-ce",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_cipher = {
+		.cia_min_keysize	= AES_MIN_KEY_SIZE,
+		.cia_max_keysize	= AES_MAX_KEY_SIZE,
+		.cia_setkey		= crypto_aes_set_key,
+		.cia_encrypt		= aes_cipher_encrypt,
+		.cia_decrypt		= aes_cipher_decrypt
+	}
+};
+
+static int __init aes_mod_init(void)
+{
+	return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+	crypto_unregister_alg(&aes_alg);
+}
+
+module_cpu_feature_match(AES, aes_mod_init);
+module_exit(aes_mod_exit);

From d8cb4d92e89d59ccfddb38e0e07b3f70efae6754 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 10 Feb 2014 11:26:29 +0100
Subject: [PATCH 0201/1185] arm64/crypto: AES in CCM mode using ARMv8 Crypto
 Extensions

This patch adds support for the AES-CCM encryption algorithm for CPUs that
have support for the AES part of the ARM v8 Crypto Extensions.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig           |   7 +
 arch/arm64/crypto/Makefile          |   3 +
 arch/arm64/crypto/aes-ce-ccm-core.S | 222 +++++++++++++++++++++
 arch/arm64/crypto/aes-ce-ccm-glue.c | 297 ++++++++++++++++++++++++++++
 4 files changed, 529 insertions(+)
 create mode 100644 arch/arm64/crypto/aes-ce-ccm-core.S
 create mode 100644 arch/arm64/crypto/aes-ce-ccm-glue.c

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 9ba32c0da871..8fffd5af65ef 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -29,4 +29,11 @@ config CRYPTO_AES_ARM64_CE
 	select CRYPTO_ALGAPI
 	select CRYPTO_AES
 
+config CRYPTO_AES_ARM64_CE_CCM
+	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_AES
+	select CRYPTO_AEAD
+
 endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 908abd9242b1..311287d68078 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -19,3 +19,6 @@ ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
 CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
+aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
new file mode 100644
index 000000000000..432e4841cd81
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -0,0 +1,222 @@
+/*
+ * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+	.text
+	.arch	armv8-a+crypto
+
+	/*
+	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+	 *			     u32 *macp, u8 const rk[], u32 rounds);
+	 */
+ENTRY(ce_aes_ccm_auth_data)
+	ldr	w8, [x3]			/* leftover from prev round? */
+	ld1	{v0.2d}, [x0]			/* load mac */
+	cbz	w8, 1f
+	sub	w8, w8, #16
+	eor	v1.16b, v1.16b, v1.16b
+0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
+	subs	w2, w2, #1
+	add	w8, w8, #1
+	ins	v1.b[0], w7
+	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
+	beq	8f				/* out of input? */
+	cbnz	w8, 0b
+	eor	v0.16b, v0.16b, v1.16b
+1:	ld1	{v3.2d}, [x4]			/* load first round key */
+	prfm	pldl1strm, [x1]
+	cmp	w5, #12				/* which key size? */
+	add	x6, x4, #16
+	sub	w7, w5, #2			/* modified # of rounds */
+	bmi	2f
+	bne	5f
+	mov	v5.16b, v3.16b
+	b	4f
+2:	mov	v4.16b, v3.16b
+	ld1	{v5.2d}, [x6], #16		/* load 2nd round key */
+3:	aese	v0.16b, v4.16b
+	aesmc	v0.16b, v0.16b
+4:	ld1	{v3.2d}, [x6], #16		/* load next round key */
+	aese	v0.16b, v5.16b
+	aesmc	v0.16b, v0.16b
+5:	ld1	{v4.2d}, [x6], #16		/* load next round key */
+	subs	w7, w7, #3
+	aese	v0.16b, v3.16b
+	aesmc	v0.16b, v0.16b
+	ld1	{v5.2d}, [x6], #16		/* load next round key */
+	bpl	3b
+	aese	v0.16b, v4.16b
+	subs	w2, w2, #16			/* last data? */
+	eor	v0.16b, v0.16b, v5.16b		/* final round */
+	bmi	6f
+	ld1	{v1.16b}, [x1], #16		/* load next input block */
+	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
+	bne	1b
+6:	st1	{v0.2d}, [x0]			/* store mac */
+	beq	10f
+	adds	w2, w2, #16
+	beq	10f
+	mov	w8, w2
+7:	ldrb	w7, [x1], #1
+	umov	w6, v0.b[0]
+	eor	w6, w6, w7
+	strb	w6, [x0], #1
+	subs	w2, w2, #1
+	beq	10f
+	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
+	b	7b
+8:	mov	w7, w8
+	add	w8, w8, #16
+9:	ext	v1.16b, v1.16b, v1.16b, #1
+	adds	w7, w7, #1
+	bne	9b
+	eor	v0.16b, v0.16b, v1.16b
+	st1	{v0.2d}, [x0]
+10:	str	w8, [x3]
+	ret
+ENDPROC(ce_aes_ccm_auth_data)
+
+	/*
+	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
+	 * 			 u32 rounds);
+	 */
+ENTRY(ce_aes_ccm_final)
+	ld1	{v3.2d}, [x2], #16		/* load first round key */
+	ld1	{v0.2d}, [x0]			/* load mac */
+	cmp	w3, #12				/* which key size? */
+	sub	w3, w3, #2			/* modified # of rounds */
+	ld1	{v1.2d}, [x1]			/* load 1st ctriv */
+	bmi	0f
+	bne	3f
+	mov	v5.16b, v3.16b
+	b	2f
+0:	mov	v4.16b, v3.16b
+1:	ld1	{v5.2d}, [x2], #16		/* load next round key */
+	aese	v0.16b, v4.16b
+	aese	v1.16b, v4.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+2:	ld1	{v3.2d}, [x2], #16		/* load next round key */
+	aese	v0.16b, v5.16b
+	aese	v1.16b, v5.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+3:	ld1	{v4.2d}, [x2], #16		/* load next round key */
+	subs	w3, w3, #3
+	aese	v0.16b, v3.16b
+	aese	v1.16b, v3.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+	bpl	1b
+	aese	v0.16b, v4.16b
+	aese	v1.16b, v4.16b
+	/* final round key cancels out */
+	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
+	st1	{v0.2d}, [x0]			/* store result */
+	ret
+ENDPROC(ce_aes_ccm_final)
+
+	.macro	aes_ccm_do_crypt,enc
+	ldr	x8, [x6, #8]			/* load lower ctr */
+	ld1	{v0.2d}, [x5]			/* load mac */
+	rev	x8, x8				/* keep swabbed ctr in reg */
+0:	/* outer loop */
+	ld1	{v1.1d}, [x6]			/* load upper ctr */
+	prfm	pldl1strm, [x1]
+	add	x8, x8, #1
+	rev	x9, x8
+	cmp	w4, #12				/* which key size? */
+	sub	w7, w4, #2			/* get modified # of rounds */
+	ins	v1.d[1], x9			/* no carry in lower ctr */
+	ld1	{v3.2d}, [x3]			/* load first round key */
+	add	x10, x3, #16
+	bmi	1f
+	bne	4f
+	mov	v5.16b, v3.16b
+	b	3f
+1:	mov	v4.16b, v3.16b
+	ld1	{v5.2d}, [x10], #16		/* load 2nd round key */
+2:	/* inner loop: 3 rounds, 2x interleaved */
+	aese	v0.16b, v4.16b
+	aese	v1.16b, v4.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+3:	ld1	{v3.2d}, [x10], #16		/* load next round key */
+	aese	v0.16b, v5.16b
+	aese	v1.16b, v5.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+4:	ld1	{v4.2d}, [x10], #16		/* load next round key */
+	subs	w7, w7, #3
+	aese	v0.16b, v3.16b
+	aese	v1.16b, v3.16b
+	aesmc	v0.16b, v0.16b
+	aesmc	v1.16b, v1.16b
+	ld1	{v5.2d}, [x10], #16		/* load next round key */
+	bpl	2b
+	aese	v0.16b, v4.16b
+	aese	v1.16b, v4.16b
+	subs	w2, w2, #16
+	bmi	6f				/* partial block? */
+	ld1	{v2.16b}, [x1], #16		/* load next input block */
+	.if	\enc == 1
+	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
+	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
+	.else
+	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
+	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
+	.endif
+	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
+	st1	{v1.16b}, [x0], #16		/* write output block */
+	bne	0b
+	rev	x8, x8
+	st1	{v0.2d}, [x5]			/* store mac */
+	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
+5:	ret
+
+6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
+	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
+	st1	{v0.2d}, [x5]			/* store mac */
+	add	w2, w2, #16			/* process partial tail block */
+7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
+	umov	w6, v1.b[0]			/* get top crypted ctr byte */
+	umov	w7, v0.b[0]			/* get top mac byte */
+	.if	\enc == 1
+	eor	w7, w7, w9
+	eor	w9, w9, w6
+	.else
+	eor	w9, w9, w6
+	eor	w7, w7, w9
+	.endif
+	strb	w9, [x0], #1			/* store out byte */
+	strb	w7, [x5], #1			/* store mac byte */
+	subs	w2, w2, #1
+	beq	5b
+	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
+	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
+	b	7b
+	.endm
+
+	/*
+	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+	 * 			   u8 const rk[], u32 rounds, u8 mac[],
+	 * 			   u8 ctr[]);
+	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+	 * 			   u8 const rk[], u32 rounds, u8 mac[],
+	 * 			   u8 ctr[]);
+	 */
+ENTRY(ce_aes_ccm_encrypt)
+	aes_ccm_do_crypt	1
+ENDPROC(ce_aes_ccm_encrypt)
+
+ENTRY(ce_aes_ccm_decrypt)
+	aes_ccm_do_crypt	0
+ENDPROC(ce_aes_ccm_decrypt)
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
new file mode 100644
index 000000000000..9e6cdde9b43d
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -0,0 +1,297 @@
+/*
+ * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/scatterwalk.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+				     u32 *macp, u32 const rk[], u32 rounds);
+
+asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+				   u32 const rk[], u32 rounds, u8 mac[],
+				   u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+				   u32 const rk[], u32 rounds, u8 mac[],
+				   u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
+				 u32 rounds);
+
+static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
+		      unsigned int key_len)
+{
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	int ret;
+
+	ret = crypto_aes_expand_key(ctx, in_key, key_len);
+	if (!ret)
+		return 0;
+
+	tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+
+static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	if ((authsize & 1) || authsize < 4)
+		return -EINVAL;
+	return 0;
+}
+
+static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	__be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
+	u32 l = req->iv[0] + 1;
+
+	/* verify that CCM dimension 'L' is set correctly in the IV */
+	if (l < 2 || l > 8)
+		return -EINVAL;
+
+	/* verify that msglen can in fact be represented in L bytes */
+	if (l < 4 && msglen >> (8 * l))
+		return -EOVERFLOW;
+
+	/*
+	 * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
+	 * uses a u32 type to represent msglen so the top 4 bytes are always 0.
+	 */
+	n[0] = 0;
+	n[1] = cpu_to_be32(msglen);
+
+	memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
+
+	/*
+	 * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
+	 * - bits 0..2	: max # of bytes required to represent msglen, minus 1
+	 *                (already set by caller)
+	 * - bits 3..5	: size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
+	 * - bit 6	: indicates presence of authenticate-only data
+	 */
+	maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
+	if (req->assoclen)
+		maciv[0] |= 0x40;
+
+	memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
+	return 0;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+	struct __packed { __be16 l; __be32 h; u16 len; } ltag;
+	struct scatter_walk walk;
+	u32 len = req->assoclen;
+	u32 macp = 0;
+
+	/* prepend the AAD with a length tag */
+	if (len < 0xff00) {
+		ltag.l = cpu_to_be16(len);
+		ltag.len = 2;
+	} else  {
+		ltag.l = cpu_to_be16(0xfffe);
+		put_unaligned_be32(len, &ltag.h);
+		ltag.len = 6;
+	}
+
+	ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
+			     num_rounds(ctx));
+	scatterwalk_start(&walk, req->assoc);
+
+	do {
+		u32 n = scatterwalk_clamp(&walk, len);
+		u8 *p;
+
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, len);
+		}
+		p = scatterwalk_map(&walk);
+		ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
+				     num_rounds(ctx));
+		len -= n;
+
+		scatterwalk_unmap(p);
+		scatterwalk_advance(&walk, n);
+		scatterwalk_done(&walk, 0, len);
+	} while (len);
+}
+
+static int ccm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+	struct blkcipher_desc desc = { .info = req->iv };
+	struct blkcipher_walk walk;
+	u8 __aligned(8) mac[AES_BLOCK_SIZE];
+	u8 buf[AES_BLOCK_SIZE];
+	u32 len = req->cryptlen;
+	int err;
+
+	err = ccm_init_mac(req, mac, len);
+	if (err)
+		return err;
+
+	kernel_neon_begin_partial(6);
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, mac);
+
+	/* preserve the original iv for the final round */
+	memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+	blkcipher_walk_init(&walk, req->dst, req->src, len);
+	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+					     AES_BLOCK_SIZE);
+
+	while (walk.nbytes) {
+		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+		if (walk.nbytes == len)
+			tail = 0;
+
+		ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   walk.nbytes - tail, ctx->key_enc,
+				   num_rounds(ctx), mac, walk.iv);
+
+		len -= walk.nbytes - tail;
+		err = blkcipher_walk_done(&desc, &walk, tail);
+	}
+	if (!err)
+		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+	kernel_neon_end();
+
+	if (err)
+		return err;
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+	return 0;
+}
+
+static int ccm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int authsize = crypto_aead_authsize(aead);
+	struct blkcipher_desc desc = { .info = req->iv };
+	struct blkcipher_walk walk;
+	u8 __aligned(8) mac[AES_BLOCK_SIZE];
+	u8 buf[AES_BLOCK_SIZE];
+	u32 len = req->cryptlen - authsize;
+	int err;
+
+	err = ccm_init_mac(req, mac, len);
+	if (err)
+		return err;
+
+	kernel_neon_begin_partial(6);
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, mac);
+
+	/* preserve the original iv for the final round */
+	memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+	blkcipher_walk_init(&walk, req->dst, req->src, len);
+	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+					     AES_BLOCK_SIZE);
+
+	while (walk.nbytes) {
+		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+		if (walk.nbytes == len)
+			tail = 0;
+
+		ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   walk.nbytes - tail, ctx->key_enc,
+				   num_rounds(ctx), mac, walk.iv);
+
+		len -= walk.nbytes - tail;
+		err = blkcipher_walk_done(&desc, &walk, tail);
+	}
+	if (!err)
+		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+	kernel_neon_end();
+
+	if (err)
+		return err;
+
+	/* compare calculated auth tag with the stored one */
+	scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
+				 authsize, 0);
+
+	if (memcmp(mac, buf, authsize))
+		return -EBADMSG;
+	return 0;
+}
+
+static struct crypto_alg ccm_aes_alg = {
+	.cra_name		= "ccm(aes)",
+	.cra_driver_name	= "ccm-aes-ce",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_AEAD,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_aead_type,
+	.cra_module		= THIS_MODULE,
+	.cra_aead = {
+		.ivsize		= AES_BLOCK_SIZE,
+		.maxauthsize	= AES_BLOCK_SIZE,
+		.setkey		= ccm_setkey,
+		.setauthsize	= ccm_setauthsize,
+		.encrypt	= ccm_encrypt,
+		.decrypt	= ccm_decrypt,
+	}
+};
+
+static int __init aes_mod_init(void)
+{
+	if (!(elf_hwcap & HWCAP_AES))
+		return -ENODEV;
+	return crypto_register_alg(&ccm_aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+	crypto_unregister_alg(&ccm_aes_alg);
+}
+
+module_init(aes_mod_init);
+module_exit(aes_mod_exit);
+
+MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ccm(aes)");

From a3025a1a97c04833415b658283702e37648a7b7e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 21 Mar 2014 10:19:17 +0100
Subject: [PATCH 0202/1185] arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON
 and Crypto Extensions

This adds ARMv8 implementations of AES in ECB, CBC, CTR and XTS modes,
both for ARMv8 with Crypto Extensions and for plain ARMv8 NEON.

The Crypto Extensions version can only run on ARMv8 implementations that
have support for these optional extensions.

The plain NEON version is a table based yet time invariant implementation.
All S-box substitutions are performed in parallel, leveraging the wide range
of ARMv8's tbl/tbx instructions, and the huge NEON register file, which can
comfortably hold the entire S-box and still have room to spare for doing the
actual computations.

The key expansion routines were borrowed from aes_generic.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig     |  14 +
 arch/arm64/crypto/Makefile    |  14 +
 arch/arm64/crypto/aes-ce.S    | 133 +++++++++
 arch/arm64/crypto/aes-glue.c  | 446 ++++++++++++++++++++++++++++
 arch/arm64/crypto/aes-modes.S | 532 ++++++++++++++++++++++++++++++++++
 arch/arm64/crypto/aes-neon.S  | 382 ++++++++++++++++++++++++
 6 files changed, 1521 insertions(+)
 create mode 100644 arch/arm64/crypto/aes-ce.S
 create mode 100644 arch/arm64/crypto/aes-glue.c
 create mode 100644 arch/arm64/crypto/aes-modes.S
 create mode 100644 arch/arm64/crypto/aes-neon.S

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 8fffd5af65ef..5562652c5316 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -36,4 +36,18 @@ config CRYPTO_AES_ARM64_CE_CCM
 	select CRYPTO_AES
 	select CRYPTO_AEAD
 
+config CRYPTO_AES_ARM64_CE_BLK
+	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES
+	select CRYPTO_ABLK_HELPER
+
+config CRYPTO_AES_ARM64_NEON_BLK
+	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES
+	select CRYPTO_ABLK_HELPER
+
 endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 311287d68078..2070a56ecc46 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -22,3 +22,17 @@ CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
 aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
+aes-ce-blk-y := aes-glue-ce.o aes-ce.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
+aes-neon-blk-y := aes-glue-neon.o aes-neon.o
+
+AFLAGS_aes-ce.o		:= -DINTERLEAVE=2 -DINTERLEAVE_INLINE
+AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
+
+CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
+
+$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
+	$(call if_changed_dep,cc_o_c)
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
new file mode 100644
index 000000000000..685a18f731eb
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce.S
@@ -0,0 +1,133 @@
+/*
+ * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
+ *                                    Crypto Extensions
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func)		ENTRY(ce_ ## func)
+#define AES_ENDPROC(func)	ENDPROC(ce_ ## func)
+
+	.arch		armv8-a+crypto
+
+	/* preload all round keys */
+	.macro		load_round_keys, rounds, rk
+	cmp		\rounds, #12
+	blo		2222f		/* 128 bits */
+	beq		1111f		/* 192 bits */
+	ld1		{v17.16b-v18.16b}, [\rk], #32
+1111:	ld1		{v19.16b-v20.16b}, [\rk], #32
+2222:	ld1		{v21.16b-v24.16b}, [\rk], #64
+	ld1		{v25.16b-v28.16b}, [\rk], #64
+	ld1		{v29.16b-v31.16b}, [\rk]
+	.endm
+
+	/* prepare for encryption with key in rk[] */
+	.macro		enc_prepare, rounds, rk, ignore
+	load_round_keys	\rounds, \rk
+	.endm
+
+	/* prepare for encryption (again) but with new key in rk[] */
+	.macro		enc_switch_key, rounds, rk, ignore
+	load_round_keys	\rounds, \rk
+	.endm
+
+	/* prepare for decryption with key in rk[] */
+	.macro		dec_prepare, rounds, rk, ignore
+	load_round_keys	\rounds, \rk
+	.endm
+
+	.macro		do_enc_Nx, de, mc, k, i0, i1, i2, i3
+	aes\de		\i0\().16b, \k\().16b
+	.ifnb		\i1
+	aes\de		\i1\().16b, \k\().16b
+	.ifnb		\i3
+	aes\de		\i2\().16b, \k\().16b
+	aes\de		\i3\().16b, \k\().16b
+	.endif
+	.endif
+	aes\mc		\i0\().16b, \i0\().16b
+	.ifnb		\i1
+	aes\mc		\i1\().16b, \i1\().16b
+	.ifnb		\i3
+	aes\mc		\i2\().16b, \i2\().16b
+	aes\mc		\i3\().16b, \i3\().16b
+	.endif
+	.endif
+	.endm
+
+	/* up to 4 interleaved encryption rounds with the same round key */
+	.macro		round_Nx, enc, k, i0, i1, i2, i3
+	.ifc		\enc, e
+	do_enc_Nx	e, mc, \k, \i0, \i1, \i2, \i3
+	.else
+	do_enc_Nx	d, imc, \k, \i0, \i1, \i2, \i3
+	.endif
+	.endm
+
+	/* up to 4 interleaved final rounds */
+	.macro		fin_round_Nx, de, k, k2, i0, i1, i2, i3
+	aes\de		\i0\().16b, \k\().16b
+	.ifnb		\i1
+	aes\de		\i1\().16b, \k\().16b
+	.ifnb		\i3
+	aes\de		\i2\().16b, \k\().16b
+	aes\de		\i3\().16b, \k\().16b
+	.endif
+	.endif
+	eor		\i0\().16b, \i0\().16b, \k2\().16b
+	.ifnb		\i1
+	eor		\i1\().16b, \i1\().16b, \k2\().16b
+	.ifnb		\i3
+	eor		\i2\().16b, \i2\().16b, \k2\().16b
+	eor		\i3\().16b, \i3\().16b, \k2\().16b
+	.endif
+	.endif
+	.endm
+
+	/* up to 4 interleaved blocks */
+	.macro		do_block_Nx, enc, rounds, i0, i1, i2, i3
+	cmp		\rounds, #12
+	blo		2222f		/* 128 bits */
+	beq		1111f		/* 192 bits */
+	round_Nx	\enc, v17, \i0, \i1, \i2, \i3
+	round_Nx	\enc, v18, \i0, \i1, \i2, \i3
+1111:	round_Nx	\enc, v19, \i0, \i1, \i2, \i3
+	round_Nx	\enc, v20, \i0, \i1, \i2, \i3
+2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+	round_Nx	\enc, \key, \i0, \i1, \i2, \i3
+	.endr
+	fin_round_Nx	\enc, v30, v31, \i0, \i1, \i2, \i3
+	.endm
+
+	.macro		encrypt_block, in, rounds, t0, t1, t2
+	do_block_Nx	e, \rounds, \in
+	.endm
+
+	.macro		encrypt_block2x, i0, i1, rounds, t0, t1, t2
+	do_block_Nx	e, \rounds, \i0, \i1
+	.endm
+
+	.macro		encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+	do_block_Nx	e, \rounds, \i0, \i1, \i2, \i3
+	.endm
+
+	.macro		decrypt_block, in, rounds, t0, t1, t2
+	do_block_Nx	d, \rounds, \in
+	.endm
+
+	.macro		decrypt_block2x, i0, i1, rounds, t0, t1, t2
+	do_block_Nx	d, \rounds, \i0, \i1
+	.endm
+
+	.macro		decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+	do_block_Nx	d, \rounds, \i0, \i1, \i2, \i3
+	.endm
+
+#include "aes-modes.S"
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
new file mode 100644
index 000000000000..60f2f4c12256
--- /dev/null
+++ b/arch/arm64/crypto/aes-glue.c
@@ -0,0 +1,446 @@
+/*
+ * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/hwcap.h>
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+#define MODE			"ce"
+#define PRIO			300
+#define aes_ecb_encrypt		ce_aes_ecb_encrypt
+#define aes_ecb_decrypt		ce_aes_ecb_decrypt
+#define aes_cbc_encrypt		ce_aes_cbc_encrypt
+#define aes_cbc_decrypt		ce_aes_cbc_decrypt
+#define aes_ctr_encrypt		ce_aes_ctr_encrypt
+#define aes_xts_encrypt		ce_aes_xts_encrypt
+#define aes_xts_decrypt		ce_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
+#else
+#define MODE			"neon"
+#define PRIO			200
+#define aes_ecb_encrypt		neon_aes_ecb_encrypt
+#define aes_ecb_decrypt		neon_aes_ecb_decrypt
+#define aes_cbc_encrypt		neon_aes_cbc_encrypt
+#define aes_cbc_decrypt		neon_aes_cbc_decrypt
+#define aes_ctr_encrypt		neon_aes_ctr_encrypt
+#define aes_xts_encrypt		neon_aes_xts_encrypt
+#define aes_xts_decrypt		neon_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
+MODULE_ALIAS("ecb(aes)");
+MODULE_ALIAS("cbc(aes)");
+MODULE_ALIAS("ctr(aes)");
+MODULE_ALIAS("xts(aes)");
+#endif
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+/* defined in aes-modes.S */
+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, int first);
+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, int first);
+
+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 iv[], int first);
+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 iv[], int first);
+
+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				int rounds, int blocks, u8 ctr[], int first);
+
+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
+				int rounds, int blocks, u8 const rk2[], u8 iv[],
+				int first);
+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
+				int rounds, int blocks, u8 const rk2[], u8 iv[],
+				int first);
+
+struct crypto_aes_xts_ctx {
+	struct crypto_aes_ctx key1;
+	struct crypto_aes_ctx __aligned(8) key2;
+};
+
+static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2);
+	if (!ret)
+		ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2],
+					    key_len / 2);
+	if (!ret)
+		return 0;
+
+	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_enc, rounds, blocks, first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_dec, rounds, blocks, first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+				first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
+				first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+	return err;
+}
+
+static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key_length / 4;
+	struct blkcipher_walk walk;
+	int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+	first = 1;
+	kernel_neon_begin();
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+				first);
+		first = 0;
+		nbytes -= blocks * AES_BLOCK_SIZE;
+		if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
+			break;
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % AES_BLOCK_SIZE);
+	}
+	if (nbytes) {
+		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+		u8 __aligned(8) tail[AES_BLOCK_SIZE];
+
+		/*
+		 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
+		 * to tell aes_ctr_encrypt() to only read half a block.
+		 */
+		blocks = (nbytes <= 8) ? -1 : 1;
+
+		aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
+				blocks, walk.iv, first);
+		memcpy(tdst, tail, nbytes);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key1.key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key1.key_enc, rounds, blocks,
+				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int err, first, rounds = 6 + ctx->key1.key_length / 4;
+	struct blkcipher_walk walk;
+	unsigned int blocks;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	kernel_neon_begin();
+	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				(u8 *)ctx->key1.key_dec, rounds, blocks,
+				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static struct crypto_alg aes_algs[] = { {
+	.cra_name		= "__ecb-aes-" MODE,
+	.cra_driver_name	= "__driver-ecb-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= crypto_aes_set_key,
+		.encrypt	= ecb_encrypt,
+		.decrypt	= ecb_decrypt,
+	},
+}, {
+	.cra_name		= "__cbc-aes-" MODE,
+	.cra_driver_name	= "__driver-cbc-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= crypto_aes_set_key,
+		.encrypt	= cbc_encrypt,
+		.decrypt	= cbc_decrypt,
+	},
+}, {
+	.cra_name		= "__ctr-aes-" MODE,
+	.cra_driver_name	= "__driver-ctr-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= crypto_aes_set_key,
+		.encrypt	= ctr_encrypt,
+		.decrypt	= ctr_encrypt,
+	},
+}, {
+	.cra_name		= "__xts-aes-" MODE,
+	.cra_driver_name	= "__driver-xts-aes-" MODE,
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= xts_set_key,
+		.encrypt	= xts_encrypt,
+		.decrypt	= xts_decrypt,
+	},
+}, {
+	.cra_name		= "ecb(aes)",
+	.cra_driver_name	= "ecb-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "cbc-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "ctr-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "xts(aes)",
+	.cra_driver_name	= "xts-aes-" MODE,
+	.cra_priority		= PRIO,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+} };
+
+static int __init aes_init(void)
+{
+	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit aes_exit(void)
+{
+	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+module_cpu_feature_match(AES, aes_init);
+#else
+module_init(aes_init);
+#endif
+module_exit(aes_exit);
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
new file mode 100644
index 000000000000..f6e372c528eb
--- /dev/null
+++ b/arch/arm64/crypto/aes-modes.S
@@ -0,0 +1,532 @@
+/*
+ * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* included by aes-ce.S and aes-neon.S */
+
+	.text
+	.align		4
+
+/*
+ * There are several ways to instantiate this code:
+ * - no interleave, all inline
+ * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
+ * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
+ * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
+ * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
+ *
+ * Macros imported by this code:
+ * - enc_prepare	- setup NEON registers for encryption
+ * - dec_prepare	- setup NEON registers for decryption
+ * - enc_switch_key	- change to new key after having prepared for encryption
+ * - encrypt_block	- encrypt a single block
+ * - decrypt block	- decrypt a single block
+ * - encrypt_block2x	- encrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - decrypt_block2x	- decrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - encrypt_block4x	- encrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ * - decrypt_block4x	- decrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ */
+
+#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
+#define FRAME_PUSH	stp x29, x30, [sp,#-16]! ; mov x29, sp
+#define FRAME_POP	ldp x29, x30, [sp],#16
+
+#if INTERLEAVE == 2
+
+aes_encrypt_block2x:
+	encrypt_block2x	v0, v1, w3, x2, x6, w7
+	ret
+ENDPROC(aes_encrypt_block2x)
+
+aes_decrypt_block2x:
+	decrypt_block2x	v0, v1, w3, x2, x6, w7
+	ret
+ENDPROC(aes_decrypt_block2x)
+
+#elif INTERLEAVE == 4
+
+aes_encrypt_block4x:
+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	ret
+ENDPROC(aes_encrypt_block4x)
+
+aes_decrypt_block4x:
+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	ret
+ENDPROC(aes_decrypt_block4x)
+
+#else
+#error INTERLEAVE should equal 2 or 4
+#endif
+
+	.macro		do_encrypt_block2x
+	bl		aes_encrypt_block2x
+	.endm
+
+	.macro		do_decrypt_block2x
+	bl		aes_decrypt_block2x
+	.endm
+
+	.macro		do_encrypt_block4x
+	bl		aes_encrypt_block4x
+	.endm
+
+	.macro		do_decrypt_block4x
+	bl		aes_decrypt_block4x
+	.endm
+
+#else
+#define FRAME_PUSH
+#define FRAME_POP
+
+	.macro		do_encrypt_block2x
+	encrypt_block2x	v0, v1, w3, x2, x6, w7
+	.endm
+
+	.macro		do_decrypt_block2x
+	decrypt_block2x	v0, v1, w3, x2, x6, w7
+	.endm
+
+	.macro		do_encrypt_block4x
+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	.endm
+
+	.macro		do_decrypt_block4x
+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	.endm
+
+#endif
+
+	/*
+	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, int first)
+	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, int first)
+	 */
+
+AES_ENTRY(aes_ecb_encrypt)
+	FRAME_PUSH
+	cbz		w5, .LecbencloopNx
+
+	enc_prepare	w3, x2, x5
+
+.LecbencloopNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lecbenc1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
+	do_encrypt_block2x
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	do_encrypt_block4x
+	st1		{v0.16b-v3.16b}, [x0], #64
+#endif
+	b		.LecbencloopNx
+.Lecbenc1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lecbencout
+#endif
+.Lecbencloop:
+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
+	encrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lecbencloop
+.Lecbencout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_ecb_encrypt)
+
+
+AES_ENTRY(aes_ecb_decrypt)
+	FRAME_PUSH
+	cbz		w5, .LecbdecloopNx
+
+	dec_prepare	w3, x2, x5
+
+.LecbdecloopNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lecbdec1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
+	do_decrypt_block2x
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	do_decrypt_block4x
+	st1		{v0.16b-v3.16b}, [x0], #64
+#endif
+	b		.LecbdecloopNx
+.Lecbdec1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lecbdecout
+#endif
+.Lecbdecloop:
+	ld1		{v0.16b}, [x1], #16		/* get next ct block */
+	decrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lecbdecloop
+.Lecbdecout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_ecb_decrypt)
+
+
+	/*
+	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 iv[], int first)
+	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 iv[], int first)
+	 */
+
+AES_ENTRY(aes_cbc_encrypt)
+	cbz		w6, .Lcbcencloop
+
+	ld1		{v0.16b}, [x5]			/* get iv */
+	enc_prepare	w3, x2, x5
+
+.Lcbcencloop:
+	ld1		{v1.16b}, [x1], #16		/* get next pt block */
+	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with iv */
+	encrypt_block	v0, w3, x2, x5, w6
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lcbcencloop
+	ret
+AES_ENDPROC(aes_cbc_encrypt)
+
+
+AES_ENTRY(aes_cbc_decrypt)
+	FRAME_PUSH
+	cbz		w6, .LcbcdecloopNx
+
+	ld1		{v7.16b}, [x5]			/* get iv */
+	dec_prepare	w3, x2, x5
+
+.LcbcdecloopNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lcbcdec1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
+	mov		v2.16b, v0.16b
+	mov		v3.16b, v1.16b
+	do_decrypt_block2x
+	eor		v0.16b, v0.16b, v7.16b
+	eor		v1.16b, v1.16b, v2.16b
+	mov		v7.16b, v3.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	mov		v4.16b, v0.16b
+	mov		v5.16b, v1.16b
+	mov		v6.16b, v2.16b
+	do_decrypt_block4x
+	sub		x1, x1, #16
+	eor		v0.16b, v0.16b, v7.16b
+	eor		v1.16b, v1.16b, v4.16b
+	ld1		{v7.16b}, [x1], #16		/* reload 1 ct block */
+	eor		v2.16b, v2.16b, v5.16b
+	eor		v3.16b, v3.16b, v6.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+#endif
+	b		.LcbcdecloopNx
+.Lcbcdec1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lcbcdecout
+#endif
+.Lcbcdecloop:
+	ld1		{v1.16b}, [x1], #16		/* get next ct block */
+	mov		v0.16b, v1.16b			/* ...and copy to v0 */
+	decrypt_block	v0, w3, x2, x5, w6
+	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
+	mov		v7.16b, v1.16b			/* ct is next iv */
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	bne		.Lcbcdecloop
+.Lcbcdecout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_cbc_decrypt)
+
+
+	/*
+	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int blocks, u8 ctr[], int first)
+	 */
+
+AES_ENTRY(aes_ctr_encrypt)
+	FRAME_PUSH
+	cbnz		w6, .Lctrfirst		/* 1st time around? */
+	umov		x5, v4.d[1]		/* keep swabbed ctr in reg */
+	rev		x5, x5
+#if INTERLEAVE >= 2
+	cmn		w5, w4			/* 32 bit overflow? */
+	bcs		.Lctrinc
+	add		x5, x5, #1		/* increment BE ctr */
+	b		.LctrincNx
+#else
+	b		.Lctrinc
+#endif
+.Lctrfirst:
+	enc_prepare	w3, x2, x6
+	ld1		{v4.16b}, [x5]
+	umov		x5, v4.d[1]		/* keep swabbed ctr in reg */
+	rev		x5, x5
+#if INTERLEAVE >= 2
+	cmn		w5, w4			/* 32 bit overflow? */
+	bcs		.Lctrloop
+.LctrloopNx:
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lctr1x
+#if INTERLEAVE == 2
+	mov		v0.8b, v4.8b
+	mov		v1.8b, v4.8b
+	rev		x7, x5
+	add		x5, x5, #1
+	ins		v0.d[1], x7
+	rev		x7, x5
+	add		x5, x5, #1
+	ins		v1.d[1], x7
+	ld1		{v2.16b-v3.16b}, [x1], #32	/* get 2 input blocks */
+	do_encrypt_block2x
+	eor		v0.16b, v0.16b, v2.16b
+	eor		v1.16b, v1.16b, v3.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+#else
+	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
+	dup		v7.4s, w5
+	mov		v0.16b, v4.16b
+	add		v7.4s, v7.4s, v8.4s
+	mov		v1.16b, v4.16b
+	rev32		v8.16b, v7.16b
+	mov		v2.16b, v4.16b
+	mov		v3.16b, v4.16b
+	mov		v1.s[3], v8.s[0]
+	mov		v2.s[3], v8.s[1]
+	mov		v3.s[3], v8.s[2]
+	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
+	do_encrypt_block4x
+	eor		v0.16b, v5.16b, v0.16b
+	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
+	eor		v1.16b, v6.16b, v1.16b
+	eor		v2.16b, v7.16b, v2.16b
+	eor		v3.16b, v5.16b, v3.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+	add		x5, x5, #INTERLEAVE
+#endif
+	cbz		w4, .LctroutNx
+.LctrincNx:
+	rev		x7, x5
+	ins		v4.d[1], x7
+	b		.LctrloopNx
+.LctroutNx:
+	sub		x5, x5, #1
+	rev		x7, x5
+	ins		v4.d[1], x7
+	b		.Lctrout
+.Lctr1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lctrout
+#endif
+.Lctrloop:
+	mov		v0.16b, v4.16b
+	encrypt_block	v0, w3, x2, x6, w7
+	subs		w4, w4, #1
+	bmi		.Lctrhalfblock		/* blocks < 0 means 1/2 block */
+	ld1		{v3.16b}, [x1], #16
+	eor		v3.16b, v0.16b, v3.16b
+	st1		{v3.16b}, [x0], #16
+	beq		.Lctrout
+.Lctrinc:
+	adds		x5, x5, #1		/* increment BE ctr */
+	rev		x7, x5
+	ins		v4.d[1], x7
+	bcc		.Lctrloop		/* no overflow? */
+	umov		x7, v4.d[0]		/* load upper word of ctr  */
+	rev		x7, x7			/* ... to handle the carry */
+	add		x7, x7, #1
+	rev		x7, x7
+	ins		v4.d[0], x7
+	b		.Lctrloop
+.Lctrhalfblock:
+	ld1		{v3.8b}, [x1]
+	eor		v3.8b, v0.8b, v3.8b
+	st1		{v3.8b}, [x0]
+.Lctrout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_ctr_encrypt)
+	.ltorg
+
+
+	/*
+	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
+	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
+	 */
+
+	.macro		next_tweak, out, in, const, tmp
+	sshr		\tmp\().2d,  \in\().2d,   #63
+	and		\tmp\().16b, \tmp\().16b, \const\().16b
+	add		\out\().2d,  \in\().2d,   \in\().2d
+	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+	eor		\out\().16b, \out\().16b, \tmp\().16b
+	.endm
+
+.Lxts_mul_x:
+	.word		1, 0, 0x87, 0
+
+AES_ENTRY(aes_xts_encrypt)
+	FRAME_PUSH
+	cbz		w7, .LxtsencloopNx
+
+	ld1		{v4.16b}, [x6]
+	enc_prepare	w3, x5, x6
+	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
+	enc_switch_key	w3, x2, x6
+	ldr		q7, .Lxts_mul_x
+	b		.LxtsencNx
+
+.LxtsencloopNx:
+	ldr		q7, .Lxts_mul_x
+	next_tweak	v4, v4, v7, v8
+.LxtsencNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lxtsenc1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	do_encrypt_block2x
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+	cbz		w4, .LxtsencoutNx
+	next_tweak	v4, v5, v7, v8
+	b		.LxtsencNx
+.LxtsencoutNx:
+	mov		v4.16b, v5.16b
+	b		.Lxtsencout
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	next_tweak	v6, v5, v7, v8
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	next_tweak	v7, v6, v7, v8
+	eor		v3.16b, v3.16b, v7.16b
+	do_encrypt_block4x
+	eor		v3.16b, v3.16b, v7.16b
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+	mov		v4.16b, v7.16b
+	cbz		w4, .Lxtsencout
+	b		.LxtsencloopNx
+#endif
+.Lxtsenc1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lxtsencout
+#endif
+.Lxtsencloop:
+	ld1		{v1.16b}, [x1], #16
+	eor		v0.16b, v1.16b, v4.16b
+	encrypt_block	v0, w3, x2, x6, w7
+	eor		v0.16b, v0.16b, v4.16b
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	beq		.Lxtsencout
+	next_tweak	v4, v4, v7, v8
+	b		.Lxtsencloop
+.Lxtsencout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_xts_encrypt)
+
+
+AES_ENTRY(aes_xts_decrypt)
+	FRAME_PUSH
+	cbz		w7, .LxtsdecloopNx
+
+	ld1		{v4.16b}, [x6]
+	enc_prepare	w3, x5, x6
+	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
+	dec_prepare	w3, x2, x6
+	ldr		q7, .Lxts_mul_x
+	b		.LxtsdecNx
+
+.LxtsdecloopNx:
+	ldr		q7, .Lxts_mul_x
+	next_tweak	v4, v4, v7, v8
+.LxtsdecNx:
+#if INTERLEAVE >= 2
+	subs		w4, w4, #INTERLEAVE
+	bmi		.Lxtsdec1x
+#if INTERLEAVE == 2
+	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	do_decrypt_block2x
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	st1		{v0.16b-v1.16b}, [x0], #32
+	cbz		w4, .LxtsdecoutNx
+	next_tweak	v4, v5, v7, v8
+	b		.LxtsdecNx
+.LxtsdecoutNx:
+	mov		v4.16b, v5.16b
+	b		.Lxtsdecout
+#else
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	next_tweak	v5, v4, v7, v8
+	eor		v0.16b, v0.16b, v4.16b
+	next_tweak	v6, v5, v7, v8
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	next_tweak	v7, v6, v7, v8
+	eor		v3.16b, v3.16b, v7.16b
+	do_decrypt_block4x
+	eor		v3.16b, v3.16b, v7.16b
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	st1		{v0.16b-v3.16b}, [x0], #64
+	mov		v4.16b, v7.16b
+	cbz		w4, .Lxtsdecout
+	b		.LxtsdecloopNx
+#endif
+.Lxtsdec1x:
+	adds		w4, w4, #INTERLEAVE
+	beq		.Lxtsdecout
+#endif
+.Lxtsdecloop:
+	ld1		{v1.16b}, [x1], #16
+	eor		v0.16b, v1.16b, v4.16b
+	decrypt_block	v0, w3, x2, x6, w7
+	eor		v0.16b, v0.16b, v4.16b
+	st1		{v0.16b}, [x0], #16
+	subs		w4, w4, #1
+	beq		.Lxtsdecout
+	next_tweak	v4, v4, v7, v8
+	b		.Lxtsdecloop
+.Lxtsdecout:
+	FRAME_POP
+	ret
+AES_ENDPROC(aes_xts_decrypt)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
new file mode 100644
index 000000000000..b93170e1cc93
--- /dev/null
+++ b/arch/arm64/crypto/aes-neon.S
@@ -0,0 +1,382 @@
+/*
+ * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func)		ENTRY(neon_ ## func)
+#define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
+
+	/* multiply by polynomial 'x' in GF(2^8) */
+	.macro		mul_by_x, out, in, temp, const
+	sshr		\temp, \in, #7
+	add		\out, \in, \in
+	and		\temp, \temp, \const
+	eor		\out, \out, \temp
+	.endm
+
+	/* preload the entire Sbox */
+	.macro		prepare, sbox, shiftrows, temp
+	adr		\temp, \sbox
+	movi		v12.16b, #0x40
+	ldr		q13, \shiftrows
+	movi		v14.16b, #0x1b
+	ld1		{v16.16b-v19.16b}, [\temp], #64
+	ld1		{v20.16b-v23.16b}, [\temp], #64
+	ld1		{v24.16b-v27.16b}, [\temp], #64
+	ld1		{v28.16b-v31.16b}, [\temp]
+	.endm
+
+	/* do preload for encryption */
+	.macro		enc_prepare, ignore0, ignore1, temp
+	prepare		.LForward_Sbox, .LForward_ShiftRows, \temp
+	.endm
+
+	.macro		enc_switch_key, ignore0, ignore1, temp
+	/* do nothing */
+	.endm
+
+	/* do preload for decryption */
+	.macro		dec_prepare, ignore0, ignore1, temp
+	prepare		.LReverse_Sbox, .LReverse_ShiftRows, \temp
+	.endm
+
+	/* apply SubBytes transformation using the the preloaded Sbox */
+	.macro		sub_bytes, in
+	sub		v9.16b, \in\().16b, v12.16b
+	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
+	sub		v10.16b, v9.16b, v12.16b
+	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
+	sub		v11.16b, v10.16b, v12.16b
+	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
+	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
+	.endm
+
+	/* apply MixColumns transformation */
+	.macro		mix_columns, in
+	mul_by_x	v10.16b, \in\().16b, v9.16b, v14.16b
+	rev32		v8.8h, \in\().8h
+	eor		\in\().16b, v10.16b, \in\().16b
+	shl		v9.4s, v8.4s, #24
+	shl		v11.4s, \in\().4s, #24
+	sri		v9.4s, v8.4s, #8
+	sri		v11.4s, \in\().4s, #8
+	eor		v9.16b, v9.16b, v8.16b
+	eor		v10.16b, v10.16b, v9.16b
+	eor		\in\().16b, v10.16b, v11.16b
+	.endm
+
+	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
+	.macro		inv_mix_columns, in
+	mul_by_x	v11.16b, \in\().16b, v10.16b, v14.16b
+	mul_by_x	v11.16b, v11.16b, v10.16b, v14.16b
+	eor		\in\().16b, \in\().16b, v11.16b
+	rev32		v11.8h, v11.8h
+	eor		\in\().16b, \in\().16b, v11.16b
+	mix_columns	\in
+	.endm
+
+	.macro		do_block, enc, in, rounds, rk, rkp, i
+	ld1		{v15.16b}, [\rk]
+	add		\rkp, \rk, #16
+	mov		\i, \rounds
+1111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
+	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
+	sub_bytes	\in
+	ld1		{v15.16b}, [\rkp], #16
+	subs		\i, \i, #1
+	beq		2222f
+	.if		\enc == 1
+	mix_columns	\in
+	.else
+	inv_mix_columns	\in
+	.endif
+	b		1111b
+2222:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
+	.endm
+
+	.macro		encrypt_block, in, rounds, rk, rkp, i
+	do_block	1, \in, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		decrypt_block, in, rounds, rk, rkp, i
+	do_block	0, \in, \rounds, \rk, \rkp, \i
+	.endm
+
+	/*
+	 * Interleaved versions: functionally equivalent to the
+	 * ones above, but applied to 2 or 4 AES states in parallel.
+	 */
+
+	.macro		sub_bytes_2x, in0, in1
+	sub		v8.16b, \in0\().16b, v12.16b
+	sub		v9.16b, \in1\().16b, v12.16b
+	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+	sub		v10.16b, v8.16b, v12.16b
+	sub		v11.16b, v9.16b, v12.16b
+	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
+	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
+	sub		v8.16b, v10.16b, v12.16b
+	sub		v9.16b, v11.16b, v12.16b
+	tbx		\in0\().16b, {v24.16b-v27.16b}, v10.16b
+	tbx		\in1\().16b, {v24.16b-v27.16b}, v11.16b
+	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
+	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
+	.endm
+
+	.macro		sub_bytes_4x, in0, in1, in2, in3
+	sub		v8.16b, \in0\().16b, v12.16b
+	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+	sub		v9.16b, \in1\().16b, v12.16b
+	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+	sub		v10.16b, \in2\().16b, v12.16b
+	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
+	sub		v11.16b, \in3\().16b, v12.16b
+	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
+	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
+	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
+	sub		v8.16b, v8.16b, v12.16b
+	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
+	sub		v9.16b, v9.16b, v12.16b
+	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
+	sub		v10.16b, v10.16b, v12.16b
+	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
+	sub		v11.16b, v11.16b, v12.16b
+	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
+	sub		v8.16b, v8.16b, v12.16b
+	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
+	sub		v9.16b, v9.16b, v12.16b
+	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
+	sub		v10.16b, v10.16b, v12.16b
+	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
+	sub		v11.16b, v11.16b, v12.16b
+	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
+	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
+	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
+	.endm
+
+	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
+	sshr		\tmp0\().16b, \in0\().16b,  #7
+	add		\out0\().16b, \in0\().16b,  \in0\().16b
+	sshr		\tmp1\().16b, \in1\().16b,  #7
+	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
+	add		\out1\().16b, \in1\().16b,  \in1\().16b
+	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
+	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
+	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
+	.endm
+
+	.macro		mix_columns_2x, in0, in1
+	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v14
+	rev32		v10.8h, \in0\().8h
+	rev32		v11.8h, \in1\().8h
+	eor		\in0\().16b, v8.16b, \in0\().16b
+	eor		\in1\().16b, v9.16b, \in1\().16b
+	shl		v12.4s, v10.4s, #24
+	shl		v13.4s, v11.4s, #24
+	eor		v8.16b, v8.16b, v10.16b
+	sri		v12.4s, v10.4s, #8
+	shl		v10.4s, \in0\().4s, #24
+	eor		v9.16b, v9.16b, v11.16b
+	sri		v13.4s, v11.4s, #8
+	shl		v11.4s, \in1\().4s, #24
+	sri		v10.4s, \in0\().4s, #8
+	eor		\in0\().16b, v8.16b, v12.16b
+	sri		v11.4s, \in1\().4s, #8
+	eor		\in1\().16b, v9.16b, v13.16b
+	eor		\in0\().16b, v10.16b, \in0\().16b
+	eor		\in1\().16b, v11.16b, \in1\().16b
+	.endm
+
+	.macro		inv_mix_cols_2x, in0, in1
+	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v14
+	mul_by_x_2x	v8, v9, v8, v9, v10, v11, v14
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	rev32		v8.8h, v8.8h
+	rev32		v9.8h, v9.8h
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	mix_columns_2x	\in0, \in1
+	.endm
+
+	.macro		inv_mix_cols_4x, in0, in1, in2, in3
+	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v14
+	mul_by_x_2x	v10, v11, \in2, \in3, v12, v13, v14
+	mul_by_x_2x	v8, v9, v8, v9, v12, v13, v14
+	mul_by_x_2x	v10, v11, v10, v11, v12, v13, v14
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	eor		\in2\().16b, \in2\().16b, v10.16b
+	eor		\in3\().16b, \in3\().16b, v11.16b
+	rev32		v8.8h, v8.8h
+	rev32		v9.8h, v9.8h
+	rev32		v10.8h, v10.8h
+	rev32		v11.8h, v11.8h
+	eor		\in0\().16b, \in0\().16b, v8.16b
+	eor		\in1\().16b, \in1\().16b, v9.16b
+	eor		\in2\().16b, \in2\().16b, v10.16b
+	eor		\in3\().16b, \in3\().16b, v11.16b
+	mix_columns_2x	\in0, \in1
+	mix_columns_2x	\in2, \in3
+	.endm
+
+	.macro		do_block_2x, enc, in0, in1 rounds, rk, rkp, i
+	ld1		{v15.16b}, [\rk]
+	add		\rkp, \rk, #16
+	mov		\i, \rounds
+1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	sub_bytes_2x	\in0, \in1
+	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
+	ld1		{v15.16b}, [\rkp], #16
+	subs		\i, \i, #1
+	beq		2222f
+	.if		\enc == 1
+	mix_columns_2x	\in0, \in1
+	ldr		q13, .LForward_ShiftRows
+	.else
+	inv_mix_cols_2x	\in0, \in1
+	ldr		q13, .LReverse_ShiftRows
+	.endif
+	movi		v12.16b, #0x40
+	b		1111b
+2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	.endm
+
+	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
+	ld1		{v15.16b}, [\rk]
+	add		\rkp, \rk, #16
+	mov		\i, \rounds
+1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
+	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
+	sub_bytes_4x	\in0, \in1, \in2, \in3
+	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
+	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
+	ld1		{v15.16b}, [\rkp], #16
+	subs		\i, \i, #1
+	beq		2222f
+	.if		\enc == 1
+	mix_columns_2x	\in0, \in1
+	mix_columns_2x	\in2, \in3
+	ldr		q13, .LForward_ShiftRows
+	.else
+	inv_mix_cols_4x	\in0, \in1, \in2, \in3
+	ldr		q13, .LReverse_ShiftRows
+	.endif
+	movi		v12.16b, #0x40
+	b		1111b
+2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
+	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
+	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
+	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
+	.endm
+
+	.macro		encrypt_block2x, in0, in1, rounds, rk, rkp, i
+	do_block_2x	1, \in0, \in1, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		decrypt_block2x, in0, in1, rounds, rk, rkp, i
+	do_block_2x	0, \in0, \in1, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+	.endm
+
+	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+	.endm
+
+#include "aes-modes.S"
+
+	.text
+	.align		4
+.LForward_ShiftRows:
+	.byte		0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
+	.byte		0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+
+.LReverse_ShiftRows:
+	.byte		0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
+	.byte		0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+
+.LForward_Sbox:
+	.byte		0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+	.byte		0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+	.byte		0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+	.byte		0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+	.byte		0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+	.byte		0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+	.byte		0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+	.byte		0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+	.byte		0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+	.byte		0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+	.byte		0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+	.byte		0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+	.byte		0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+	.byte		0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+	.byte		0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+	.byte		0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+	.byte		0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+	.byte		0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+	.byte		0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+	.byte		0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+	.byte		0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+	.byte		0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+	.byte		0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+	.byte		0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+	.byte		0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+	.byte		0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+	.byte		0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+	.byte		0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+	.byte		0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+	.byte		0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+	.byte		0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+	.byte		0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+
+.LReverse_Sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d

From 0680e92ef3a88e4168aee4cf1b95130373f25b25 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 16 Jun 2014 11:02:15 +0100
Subject: [PATCH 0203/1185] arm64/crypto: fix data corruption bug in GHASH
 algorithm

This fixes a bug in the GHASH algorithm resulting in the calculated hash to be
incorrect if the input is presented in chunks whose size is not a multiple of
16 bytes.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Fixes: fdd2389457b2 ("arm64/crypto: GHASH secure hash using ARMv8 Crypto Extensions")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/crypto/ghash-ce-glue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index b92baf3f68c7..ef6aa69c4e0c 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -72,6 +72,7 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
 				   partial ? ctx->buf : NULL);
 		kernel_neon_end();
 		src += blocks * GHASH_BLOCK_SIZE;
+		partial = 0;
 	}
 	if (len)
 		memcpy(ctx->buf + partial, src, len);

From 945ce0a7bdc63b712f332848c51d5505841d50e0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 16 Jun 2014 11:02:16 +0100
Subject: [PATCH 0204/1185] arm64/crypto: improve performance of GHASH
 algorithm

This patches modifies the GHASH secure hash implementation to switch to a
faster, polynomial multiplication based reduction instead of one that uses
shifts and rotates.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/crypto/ghash-ce-core.S | 96 +++++++++++++------------------
 arch/arm64/crypto/ghash-ce-glue.c |  4 +-
 2 files changed, 42 insertions(+), 58 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index b9e6eaf41c9b..dc457015884e 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -3,14 +3,6 @@
  *
  * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
- * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
- *
- * Copyright (c) 2009 Intel Corp.
- *   Author: Huang Ying <ying.huang@intel.com>
- *           Vinodh Gopal
- *           Erdinc Ozturk
- *           Deniz Karakoyunlu
- *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
@@ -19,13 +11,15 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-	DATA	.req	v0
-	SHASH	.req	v1
-	IN1	.req	v2
+	SHASH	.req	v0
+	SHASH2	.req	v1
 	T1	.req	v2
 	T2	.req	v3
-	T3	.req	v4
-	VZR	.req	v5
+	MASK	.req	v4
+	XL	.req	v5
+	XM	.req	v6
+	XH	.req	v7
+	IN1	.req	v7
 
 	.text
 	.arch		armv8-a+crypto
@@ -35,61 +29,51 @@
 	 *			   struct ghash_key const *k, const char *head)
 	 */
 ENTRY(pmull_ghash_update)
-	ld1		{DATA.16b}, [x1]
 	ld1		{SHASH.16b}, [x3]
-	eor		VZR.16b, VZR.16b, VZR.16b
+	ld1		{XL.16b}, [x1]
+	movi		MASK.16b, #0xe1
+	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
+	shl		MASK.2d, MASK.2d, #57
+	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 
 	/* do the head block first, if supplied */
 	cbz		x4, 0f
-	ld1		{IN1.2d}, [x4]
+	ld1		{T1.2d}, [x4]
 	b		1f
 
-0:	ld1		{IN1.2d}, [x2], #16
+0:	ld1		{T1.2d}, [x2], #16
 	sub		w0, w0, #1
-1:	ext		IN1.16b, IN1.16b, IN1.16b, #8
-CPU_LE(	rev64		IN1.16b, IN1.16b	)
-	eor		DATA.16b, DATA.16b, IN1.16b
 
-	/* multiply DATA by SHASH in GF(2^128) */
-	ext		T2.16b, DATA.16b, DATA.16b, #8
-	ext		T3.16b, SHASH.16b, SHASH.16b, #8
-	eor		T2.16b, T2.16b, DATA.16b
-	eor		T3.16b, T3.16b, SHASH.16b
+1:	/* multiply XL by SHASH in GF(2^128) */
+CPU_LE(	rev64		T1.16b, T1.16b	)
 
-	pmull2		T1.1q, SHASH.2d, DATA.2d	// a1 * b1
-	pmull		DATA.1q, SHASH.1d, DATA.1d	// a0 * b0
-	pmull		T2.1q, T2.1d, T3.1d		// (a1 + a0)(b1 + b0)
-	eor		T2.16b, T2.16b, T1.16b		// (a0 * b1) + (a1 * b0)
-	eor		T2.16b, T2.16b, DATA.16b
-
-	ext		T3.16b, VZR.16b, T2.16b, #8
-	ext		T2.16b, T2.16b, VZR.16b, #8
-	eor		DATA.16b, DATA.16b, T3.16b
-	eor		T1.16b, T1.16b, T2.16b	// <T1:DATA> is result of
-						// carry-less multiplication
-
-	/* first phase of the reduction */
-	shl		T3.2d, DATA.2d, #1
-	eor		T3.16b, T3.16b, DATA.16b
-	shl		T3.2d, T3.2d, #5
-	eor		T3.16b, T3.16b, DATA.16b
-	shl		T3.2d, T3.2d, #57
-	ext		T2.16b, VZR.16b, T3.16b, #8
-	ext		T3.16b, T3.16b, VZR.16b, #8
-	eor		DATA.16b, DATA.16b, T2.16b
-	eor		T1.16b, T1.16b, T3.16b
-
-	/* second phase of the reduction */
-	ushr		T2.2d, DATA.2d, #5
-	eor		T2.16b, T2.16b, DATA.16b
-	ushr		T2.2d, T2.2d, #1
-	eor		T2.16b, T2.16b, DATA.16b
-	ushr		T2.2d, T2.2d, #1
+	ext		T2.16b, XL.16b, XL.16b, #8
+	ext		IN1.16b, T1.16b, T1.16b, #8
 	eor		T1.16b, T1.16b, T2.16b
-	eor		DATA.16b, DATA.16b, T1.16b
+	eor		XL.16b, XL.16b, IN1.16b
+
+	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
+	eor		T1.16b, T1.16b, XL.16b
+	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
+	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
+
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		T2.16b, XL.16b, XH.16b
+	eor		XM.16b, XM.16b, T1.16b
+	eor		XM.16b, XM.16b, T2.16b
+	pmull		T2.1q, XL.1d, MASK.1d
+
+	mov		XH.d[0], XM.d[1]
+	mov		XM.d[1], XL.d[0]
+
+	eor		XL.16b, XM.16b, T2.16b
+	ext		T2.16b, XL.16b, XL.16b, #8
+	pmull		XL.1q, XL.1d, MASK.1d
+	eor		T2.16b, T2.16b, XH.16b
+	eor		XL.16b, XL.16b, T2.16b
 
 	cbnz		w0, 0b
 
-	st1		{DATA.16b}, [x1]
+	st1		{XL.16b}, [x1]
 	ret
 ENDPROC(pmull_ghash_update)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index ef6aa69c4e0c..833ec1e3f3e9 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -67,7 +67,7 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
 		blocks = len / GHASH_BLOCK_SIZE;
 		len %= GHASH_BLOCK_SIZE;
 
-		kernel_neon_begin_partial(6);
+		kernel_neon_begin_partial(8);
 		pmull_ghash_update(blocks, ctx->digest, src, key,
 				   partial ? ctx->buf : NULL);
 		kernel_neon_end();
@@ -89,7 +89,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
 		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 
-		kernel_neon_begin_partial(6);
+		kernel_neon_begin_partial(8);
 		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
 		kernel_neon_end();
 	}

From 181dbc7cc2ba7135f08aa8647b7817cc37b8ffa7 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Thu, 24 Jul 2014 17:03:26 +0100
Subject: [PATCH 0205/1185] arm64/crypto: fix makefile rule for aes-glue-%.o

This fixes the following build failure when building with CONFIG_MODVERSIONS
enabled:

  CC [M]  arch/arm64/crypto/aes-glue-ce.o
ld: cannot find arch/arm64/crypto/aes-glue-ce.o: No such file or directory
make[1]: *** [arch/arm64/crypto/aes-ce-blk.o] Error 1
make: *** [arch/arm64/crypto] Error 2

The $(obj)/aes-glue-%.o rule only creates $(obj)/.tmp_aes-glue-ce.o, it
should use if_changed_rule instead of if_changed_dep.

Signed-off-by: Andreas Schwab <schwab@suse.de>
[ardb: mention CONFIG_MODVERSIONS in commit log]
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/crypto/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 2070a56ecc46..a3f935fde975 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -35,4 +35,4 @@ AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
 CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
 
 $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
-	$(call if_changed_dep,cc_o_c)
+	$(call if_changed_rule,cc_o_c)

From be64e76c02d51b5323f228da3575b92ae03e25ce Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 25 Jul 2014 19:40:20 -0400
Subject: [PATCH 0206/1185] crypto: arm64-aes - fix encryption of unaligned
 data

cryptsetup fails on arm64 when using kernel encryption via AF_ALG socket.
See https://bugzilla.redhat.com/show_bug.cgi?id=1122937

The bug is caused by incorrect handling of unaligned data in
arch/arm64/crypto/aes-glue.c. Cryptsetup creates a buffer that is aligned
on 8 bytes, but not on 16 bytes. It opens AF_ALG socket and uses the
socket to encrypt data in the buffer. The arm64 crypto accelerator causes
data corruption or crashes in the scatterwalk_pagedone.

This patch fixes the bug by passing the residue bytes that were not
processed as the last parameter to blkcipher_walk_done.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-glue.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 60f2f4c12256..79cd911ef88c 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -106,7 +106,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_enc, rounds, blocks, first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
@@ -128,7 +128,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_dec, rounds, blocks, first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
@@ -151,7 +151,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
 				first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
@@ -174,7 +174,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
 				first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
@@ -243,7 +243,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_enc, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 
@@ -267,7 +267,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_dec, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 

From 2f5acd7615cca9aa638cb54d69706bb4ac21fa94 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 20 Sep 2013 09:55:40 +0200
Subject: [PATCH 0207/1185] crypto: create generic version of ablk_helper

Create a generic version of ablk_helper so it can be reused
by other architectures.

Acked-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig               |   4 +
 crypto/Makefile              |   1 +
 crypto/ablk_helper.c         | 150 +++++++++++++++++++++++++++++++++++
 include/asm-generic/simd.h   |  14 ++++
 include/crypto/ablk_helper.h |  31 ++++++++
 5 files changed, 200 insertions(+)
 create mode 100644 crypto/ablk_helper.c
 create mode 100644 include/asm-generic/simd.h
 create mode 100644 include/crypto/ablk_helper.h

diff --git a/crypto/Kconfig b/crypto/Kconfig
index bf8148e74e73..a1eba1845367 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -179,6 +179,10 @@ config CRYPTO_ABLK_HELPER_X86
 	depends on X86
 	select CRYPTO_CRYPTD
 
+config CRYPTO_ABLK_HELPER
+	tristate
+	select CRYPTO_CRYPTD
+
 config CRYPTO_GLUE_HELPER_X86
 	tristate
 	depends on X86
diff --git a/crypto/Makefile b/crypto/Makefile
index a8e9b0fefbe9..5d0b869b173f 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -101,3 +101,4 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
 obj-$(CONFIG_XOR_BLOCKS) += xor.o
 obj-$(CONFIG_ASYNC_CORE) += async_tx/
 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
+obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o
diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c
new file mode 100644
index 000000000000..62568b1fc885
--- /dev/null
+++ b/crypto/ablk_helper.c
@@ -0,0 +1,150 @@
+/*
+ * Shared async block cipher helpers
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * Based on aesni-intel_glue.c by:
+ *  Copyright (C) 2008, Intel Corp.
+ *    Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <crypto/algapi.h>
+#include <crypto/cryptd.h>
+#include <crypto/ablk_helper.h>
+#include <asm/simd.h>
+
+int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+		 unsigned int key_len)
+{
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
+	int err;
+
+	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
+				    & CRYPTO_TFM_REQ_MASK);
+	err = crypto_ablkcipher_setkey(child, key, key_len);
+	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
+				    & CRYPTO_TFM_RES_MASK);
+	return err;
+}
+EXPORT_SYMBOL_GPL(ablk_set_key);
+
+int __ablk_encrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct blkcipher_desc desc;
+
+	desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+	desc.info = req->info;
+	desc.flags = 0;
+
+	return crypto_blkcipher_crt(desc.tfm)->encrypt(
+		&desc, req->dst, req->src, req->nbytes);
+}
+EXPORT_SYMBOL_GPL(__ablk_encrypt);
+
+int ablk_encrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (!may_use_simd()) {
+		struct ablkcipher_request *cryptd_req =
+			ablkcipher_request_ctx(req);
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+
+		return crypto_ablkcipher_encrypt(cryptd_req);
+	} else {
+		return __ablk_encrypt(req);
+	}
+}
+EXPORT_SYMBOL_GPL(ablk_encrypt);
+
+int ablk_decrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (!may_use_simd()) {
+		struct ablkcipher_request *cryptd_req =
+			ablkcipher_request_ctx(req);
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+
+		return crypto_ablkcipher_decrypt(cryptd_req);
+	} else {
+		struct blkcipher_desc desc;
+
+		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+		desc.info = req->info;
+		desc.flags = 0;
+
+		return crypto_blkcipher_crt(desc.tfm)->decrypt(
+			&desc, req->dst, req->src, req->nbytes);
+	}
+}
+EXPORT_SYMBOL_GPL(ablk_decrypt);
+
+void ablk_exit(struct crypto_tfm *tfm)
+{
+	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cryptd_free_ablkcipher(ctx->cryptd_tfm);
+}
+EXPORT_SYMBOL_GPL(ablk_exit);
+
+int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
+{
+	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	ctx->cryptd_tfm = cryptd_tfm;
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
+		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ablk_init_common);
+
+int ablk_init(struct crypto_tfm *tfm)
+{
+	char drv_name[CRYPTO_MAX_ALG_NAME];
+
+	snprintf(drv_name, sizeof(drv_name), "__driver-%s",
+					crypto_tfm_alg_driver_name(tfm));
+
+	return ablk_init_common(tfm, drv_name);
+}
+EXPORT_SYMBOL_GPL(ablk_init);
+
+MODULE_LICENSE("GPL");
diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h
new file mode 100644
index 000000000000..f57eb7b5c23b
--- /dev/null
+++ b/include/asm-generic/simd.h
@@ -0,0 +1,14 @@
+
+#include <linux/hardirq.h>
+
+/*
+ * may_use_simd - whether it is allowable at this time to issue SIMD
+ *                instructions or access the SIMD register file
+ *
+ * As architectures typically don't preserve the SIMD register file when
+ * taking an interrupt, !in_interrupt() should be a reasonable default.
+ */
+static __must_check inline bool may_use_simd(void)
+{
+	return !in_interrupt();
+}
diff --git a/include/crypto/ablk_helper.h b/include/crypto/ablk_helper.h
new file mode 100644
index 000000000000..4f93df50c23e
--- /dev/null
+++ b/include/crypto/ablk_helper.h
@@ -0,0 +1,31 @@
+/*
+ * Shared async block cipher helpers
+ */
+
+#ifndef _CRYPTO_ABLK_HELPER_H
+#define _CRYPTO_ABLK_HELPER_H
+
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <crypto/cryptd.h>
+
+struct async_helper_ctx {
+	struct cryptd_ablkcipher *cryptd_tfm;
+};
+
+extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+			unsigned int key_len);
+
+extern int __ablk_encrypt(struct ablkcipher_request *req);
+
+extern int ablk_encrypt(struct ablkcipher_request *req);
+
+extern int ablk_decrypt(struct ablkcipher_request *req);
+
+extern void ablk_exit(struct crypto_tfm *tfm);
+
+extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name);
+
+extern int ablk_init(struct crypto_tfm *tfm);
+
+#endif /* _CRYPTO_ABLK_HELPER_H */

From 59472af5848e3063c149baa92a35882ce3f6f291 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Tue, 24 Sep 2013 08:21:29 +0800
Subject: [PATCH 0208/1185] crypto: ablk_helper - Replace memcpy with struct
 assignment

tree:   git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
head:   48e6dc1b2a1ad8186d48968d5018912bdacac744
commit: a62b01cd6cc1feb5e80d64d6937c291473ed82cb [20/24] crypto: create generic version of ablk_helper

coccinelle warnings: (new ones prefixed by >>)

>> crypto/ablk_helper.c:97:2-8: Replace memcpy with struct assignment
>> crypto/ablk_helper.c:78:2-8: Replace memcpy with struct assignment

Please consider folding the attached diff :-)

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ablk_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c
index 62568b1fc885..ffe7278d4bd8 100644
--- a/crypto/ablk_helper.c
+++ b/crypto/ablk_helper.c
@@ -75,7 +75,7 @@ int ablk_encrypt(struct ablkcipher_request *req)
 		struct ablkcipher_request *cryptd_req =
 			ablkcipher_request_ctx(req);
 
-		memcpy(cryptd_req, req, sizeof(*req));
+		*cryptd_req = *req;
 		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
 
 		return crypto_ablkcipher_encrypt(cryptd_req);
@@ -94,7 +94,7 @@ int ablk_decrypt(struct ablkcipher_request *req)
 		struct ablkcipher_request *cryptd_req =
 			ablkcipher_request_ctx(req);
 
-		memcpy(cryptd_req, req, sizeof(*req));
+		*cryptd_req = *req;
 		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
 
 		return crypto_ablkcipher_decrypt(cryptd_req);

From 197028d295db718f4e5cf8af9038cfeb4823b4ef Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 8 Feb 2014 13:34:09 +0100
Subject: [PATCH 0209/1185] cpu: add generic support for CPU feature based
 module autoloading

This patch adds support for advertising optional CPU features over udev
using the modalias, and for declaring compatibility with/dependency upon
such a feature in a module.

The mapping between feature numbers and actual features should be provided
by the architecture in a file called <asm/cpufeature.h> which exports the
following functions/macros:
- cpu_feature(FEAT), a preprocessor macro that maps token FEAT to a
  numeric index;
- bool cpu_have_feature(n), returning whether this CPU has support for
  feature #n;
- MAX_CPU_FEATURES, an upper bound for 'n' in the previous function.

The feature can then be enabled by setting CONFIG_GENERIC_CPU_AUTOPROBE
for the architecture.

For instance, a module that registers its module init function using

  module_cpu_feature_match(FEAT_X, module_init_function)

will be probed automatically when the CPU's support for the 'FEAT_X'
feature is advertised over udev, and will only allow the module to be
loaded by hand if the 'FEAT_X' feature is supported.

Change-Id: Icae8e3ff347235fc72a5b41279f0afdb34fb161a
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/Kconfig              |  8 +++++
 drivers/base/cpu.c                | 48 ++++++++++++++++++++++---
 include/linux/cpufeature.h        | 60 +++++++++++++++++++++++++++++++
 include/linux/mod_devicetable.h   |  9 +++++
 scripts/mod/devicetable-offsets.c |  3 ++
 scripts/mod/file2alias.c          | 10 ++++++
 6 files changed, 134 insertions(+), 4 deletions(-)
 create mode 100644 include/linux/cpufeature.h

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 07abd9d76f7f..9634800e800f 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -187,6 +187,14 @@ config GENERIC_CPU_DEVICES
 	bool
 	default n
 
+config HAVE_CPU_AUTOPROBE
+	def_bool ARCH_HAS_CPU_AUTOPROBE
+
+config GENERIC_CPU_AUTOPROBE
+	bool
+	depends on !ARCH_HAS_CPU_AUTOPROBE
+	select HAVE_CPU_AUTOPROBE
+
 config SOC_BUS
 	bool
 
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 3d48fc887ef4..b5668501e3d3 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -13,6 +13,7 @@
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/percpu.h>
+#include <linux/cpufeature.h>
 
 #include "base.h"
 
@@ -260,6 +261,45 @@ static void cpu_device_release(struct device *dev)
 	 */
 }
 
+#ifdef CONFIG_HAVE_CPU_AUTOPROBE
+#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
+static ssize_t print_cpu_modalias(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	ssize_t n;
+	u32 i;
+
+	n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:",
+		    CPU_FEATURE_TYPEVAL);
+
+	for (i = 0; i < MAX_CPU_FEATURES; i++)
+		if (cpu_have_feature(i)) {
+			if (PAGE_SIZE < n + sizeof(",XXXX\n")) {
+				WARN(1, "CPU features overflow page\n");
+				break;
+			}
+			n += sprintf(&buf[n], ",%04X", i);
+		}
+	buf[n++] = '\n';
+	return n;
+}
+#else
+#define print_cpu_modalias	arch_print_cpu_modalias
+#endif
+
+static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (buf) {
+		print_cpu_modalias(NULL, NULL, buf);
+		add_uevent_var(env, "MODALIAS=%s", buf);
+		kfree(buf);
+	}
+	return 0;
+}
+#endif
+
 /*
  * register_cpu - Setup a sysfs device for a CPU.
  * @cpu - cpu->hotpluggable field set to 1 will generate a control file in
@@ -278,7 +318,7 @@ int __cpuinit register_cpu(struct cpu *cpu, int num)
 	cpu->dev.bus = &cpu_subsys;
 	cpu->dev.release = cpu_device_release;
 #ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE
-	cpu->dev.bus->uevent = arch_cpu_uevent;
+	cpu->dev.bus->uevent = cpu_uevent;
 #endif
 	error = device_register(&cpu->dev);
 	if (!error && cpu->hotpluggable)
@@ -307,8 +347,8 @@ struct device *get_cpu_device(unsigned cpu)
 }
 EXPORT_SYMBOL_GPL(get_cpu_device);
 
-#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE
-static DEVICE_ATTR(modalias, 0444, arch_print_cpu_modalias, NULL);
+#ifdef CONFIG_HAVE_CPU_AUTOPROBE
+static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL);
 #endif
 
 static struct attribute *cpu_root_attrs[] = {
@@ -321,7 +361,7 @@ static struct attribute *cpu_root_attrs[] = {
 	&cpu_attrs[2].attr.attr,
 	&dev_attr_kernel_max.attr,
 	&dev_attr_offline.attr,
-#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE
+#ifdef CONFIG_HAVE_CPU_AUTOPROBE
 	&dev_attr_modalias.attr,
 #endif
 	NULL
diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h
new file mode 100644
index 000000000000..c4d4eb8ac9fe
--- /dev/null
+++ b/include/linux/cpufeature.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_CPUFEATURE_H
+#define __LINUX_CPUFEATURE_H
+
+#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
+
+#include <linux/mod_devicetable.h>
+#include <asm/cpufeature.h>
+
+/*
+ * Macros imported from <asm/cpufeature.h>:
+ * - cpu_feature(x)		ordinal value of feature called 'x'
+ * - cpu_have_feature(u32 n)	whether feature #n is available
+ * - MAX_CPU_FEATURES		upper bound for feature ordinal values
+ * Optional:
+ * - CPU_FEATURE_TYPEFMT	format string fragment for printing the cpu type
+ * - CPU_FEATURE_TYPEVAL	set of values matching the format string above
+ */
+
+#ifndef CPU_FEATURE_TYPEFMT
+#define CPU_FEATURE_TYPEFMT	"%s"
+#endif
+
+#ifndef CPU_FEATURE_TYPEVAL
+#define CPU_FEATURE_TYPEVAL	ELF_PLATFORM
+#endif
+
+/*
+ * Use module_cpu_feature_match(feature, module_init_function) to
+ * declare that
+ * a) the module shall be probed upon discovery of CPU feature 'feature'
+ *    (typically at boot time using udev)
+ * b) the module must not be loaded if CPU feature 'feature' is not present
+ *    (not even by manual insmod).
+ *
+ * For a list of legal values for 'feature', please consult the file
+ * 'asm/cpufeature.h' of your favorite architecture.
+ */
+#define module_cpu_feature_match(x, __init)			\
+static struct cpu_feature const cpu_feature_match_ ## x[] =	\
+	{ { .feature = cpu_feature(x) }, { } };			\
+MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x);		\
+								\
+static int cpu_feature_match_ ## x ## _init(void)		\
+{								\
+	if (!cpu_have_feature(cpu_feature(x)))			\
+		return -ENODEV;					\
+	return __init();					\
+}								\
+module_init(cpu_feature_match_ ## x ## _init)
+
+#endif
+#endif
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b508016fb76d..e2c55f297d0b 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -561,6 +561,15 @@ struct x86_cpu_id {
 #define X86_MODEL_ANY  0
 #define X86_FEATURE_ANY 0	/* Same as FPU, you can't test for that */
 
+/*
+ * Generic table type for matching CPU features.
+ * @feature:	the bit number of the feature (0 - 65535)
+ */
+
+struct cpu_feature {
+	__u16	feature;
+};
+
 #define IPACK_ANY_FORMAT 0xff
 #define IPACK_ANY_ID (~0)
 struct ipack_device_id {
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index e66d4d258e1a..5e74595f2e85 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -174,6 +174,9 @@ int main(void)
 	DEVID_FIELD(x86_cpu_id, model);
 	DEVID_FIELD(x86_cpu_id, vendor);
 
+	DEVID(cpu_feature);
+	DEVID_FIELD(cpu_feature, feature);
+
 	DEVID(mei_cl_device_id);
 	DEVID_FIELD(mei_cl_device_id, name);
 
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 45f9a3377dcd..8a9612f17cb0 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1133,6 +1133,16 @@ static int do_x86cpu_entry(const char *filename, void *symval,
 }
 ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry);
 
+/* LOOKS like cpu:type:*:feature:*FEAT* */
+static int do_cpu_entry(const char *filename, void *symval, char *alias)
+{
+	DEF_FIELD(symval, cpu_feature, feature);
+
+	sprintf(alias, "cpu:type:*:feature:*%04X*", feature);
+	return 1;
+}
+ADD_TO_DEVTABLE("cpu", cpu_feature, do_cpu_entry);
+
 /* Looks like: mei:S */
 static int do_mei_entry(const char *filename, void *symval,
 			char *alias)

From f11a3d1bc1528e0a01dde578c2ab88c70b3fbeff Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Feb 2014 15:26:27 +0100
Subject: [PATCH 0210/1185] arm64: add abstractions for FPSIMD state
 manipulation

There are two tacit assumptions in the FPSIMD handling code that will no longer
hold after the next patch that optimizes away some FPSIMD state restores:
. the FPSIMD registers of this CPU contain the userland FPSIMD state of
  task 'current';
. when switching to a task, its FPSIMD state will always be restored from
  memory.

This patch adds the following functions to abstract away from straight FPSIMD
register file saves and restores:
- fpsimd_preserve_current_state -> ensure current's FPSIMD state is saved
- fpsimd_update_current_state -> replace current's FPSIMD state

Where necessary, the signal handling and fork code are updated to use the above
wrappers instead of poking into the FPSIMD registers directly.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

Conflicts:
	arch/arm64/kernel/fpsimd.c

Change-Id: I53ae7082427cb1c5cc32e1f2ddbd4218115601ba
---
 arch/arm64/include/asm/fpsimd.h |  3 ++
 arch/arm64/kernel/fpsimd.c      | 80 +++++++++++++++++++++++++++++++++
 arch/arm64/kernel/process.c     |  2 +-
 arch/arm64/kernel/signal.c      |  9 ++--
 arch/arm64/kernel/signal32.c    |  9 ++--
 5 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index c43b4ac13008..f4e524b67e91 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -58,6 +58,9 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
 extern void fpsimd_thread_switch(struct task_struct *next);
 extern void fpsimd_flush_thread(void);
 
+extern void fpsimd_preserve_current_state(void);
+extern void fpsimd_update_current_state(struct fpsimd_state *state);
+
 #endif
 
 #endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e8b8357aedb4..135cfb88d2aa 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -83,6 +83,86 @@ void fpsimd_flush_thread(void)
 	fpsimd_load_state(&current->thread.fpsimd_state);
 }
 
+/*
+ * Save the userland FPSIMD state of 'current' to memory
+ */
+void fpsimd_preserve_current_state(void)
+{
+	preempt_disable();
+	fpsimd_save_state(&current->thread.fpsimd_state);
+	preempt_enable();
+}
+
+/*
+ * Load an updated userland FPSIMD state for 'current' from memory
+ */
+void fpsimd_update_current_state(struct fpsimd_state *state)
+{
+	preempt_disable();
+	fpsimd_load_state(state);
+	preempt_enable();
+}
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Kernel-side NEON support functions
+ */
+void kernel_neon_begin(void)
+{
+	/* Avoid using the NEON in interrupt context */
+	BUG_ON(in_interrupt());
+	preempt_disable();
+
+	if (current->mm)
+		fpsimd_save_state(&current->thread.fpsimd_state);
+}
+EXPORT_SYMBOL(kernel_neon_begin);
+
+void kernel_neon_end(void)
+{
+	if (current->mm)
+		fpsimd_load_state(&current->thread.fpsimd_state);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(kernel_neon_end);
+
+#endif /* CONFIG_KERNEL_MODE_NEON */
+
+#ifdef CONFIG_CPU_PM
+static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
+				  unsigned long cmd, void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		if (current->mm)
+			fpsimd_save_state(&current->thread.fpsimd_state);
+		break;
+	case CPU_PM_EXIT:
+		if (current->mm)
+			fpsimd_load_state(&current->thread.fpsimd_state);
+		break;
+	case CPU_PM_ENTER_FAILED:
+	default:
+		return NOTIFY_DONE;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block fpsimd_cpu_pm_notifier_block = {
+	.notifier_call = fpsimd_cpu_pm_notifier,
+};
+
+static void fpsimd_pm_init(void)
+{
+	cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
+}
+
+#else
+static inline void fpsimd_pm_init(void) { }
+#endif /* CONFIG_CPU_PM */
+
 /*
  * FP/SIMD support code initialisation.
  */
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 46f02c3b5015..e2eb9453d3a1 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -184,7 +184,7 @@ void release_thread(struct task_struct *dead_task)
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
-	fpsimd_save_state(&current->thread.fpsimd_state);
+	fpsimd_preserve_current_state();
 	*dst = *src;
 	return 0;
 }
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 890a591f75dd..06448a77ff53 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
 	int err;
 
 	/* dump the hardware registers to the fpsimd_state structure */
-	fpsimd_save_state(fpsimd);
+	fpsimd_preserve_current_state();
 
 	/* copy the FP and status/control registers */
 	err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
@@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
 	__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
 
 	/* load the hardware registers from the fpsimd_state structure */
-	if (!err) {
-		preempt_disable();
-		fpsimd_load_state(&fpsimd);
-		preempt_enable();
-	}
+	if (!err)
+		fpsimd_update_current_state(&fpsimd);
 
 	return err ? -EFAULT : 0;
 }
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index e393174fe859..31fbeaf4dd62 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -247,7 +247,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
 	 * Note that this also saves V16-31, which aren't visible
 	 * in AArch32.
 	 */
-	fpsimd_save_state(fpsimd);
+	fpsimd_preserve_current_state();
 
 	/* Place structure header on the stack */
 	__put_user_error(magic, &frame->magic, err);
@@ -310,11 +310,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
 	 * We don't need to touch the exception register, so
 	 * reload the hardware state.
 	 */
-	if (!err) {
-		preempt_disable();
-		fpsimd_load_state(&fpsimd);
-		preempt_enable();
-	}
+	if (!err)
+		fpsimd_update_current_state(&fpsimd);
 
 	return err ? -EFAULT : 0;
 }

From 74780f86ec6464edee0a87309da342f6237a6138 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 8 May 2014 11:20:23 +0200
Subject: [PATCH 0211/1185] arm64: defer reloading a task's FPSIMD state to
 userland resume

If a task gets scheduled out and back in again and nothing has touched
its FPSIMD state in the mean time, there is really no reason to reload
it from memory. Similarly, repeated calls to kernel_neon_begin() and
kernel_neon_end() will preserve and restore the FPSIMD state every time.

This patch defers the FPSIMD state restore to the last possible moment,
i.e., right before the task returns to userland. If a task does not return to
userland at all (for any reason), the existing FPSIMD state is preserved
and may be reused by the owning task if it gets scheduled in again on the
same CPU.

This patch adds two more functions to abstract away from straight FPSIMD
register file saves and restores:
- fpsimd_restore_current_state -> ensure current's FPSIMD state is loaded
- fpsimd_flush_task_state -> invalidate live copies of a task's FPSIMD state

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

Conflicts:
	arch/arm64/kernel/fpsimd.c

Change-Id: Ib1c0d8d0afb3c248cd4d060eb35877530dd92fdc
---
 arch/arm64/include/asm/fpsimd.h      |   5 +
 arch/arm64/include/asm/thread_info.h |   4 +-
 arch/arm64/kernel/entry.S            |   2 +-
 arch/arm64/kernel/fpsimd.c           | 142 ++++++++++++++++++++++++---
 arch/arm64/kernel/ptrace.c           |   2 +
 arch/arm64/kernel/signal.c           |   4 +
 6 files changed, 143 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index f4e524b67e91..7a900142dbc8 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -37,6 +37,8 @@ struct fpsimd_state {
 			u32 fpcr;
 		};
 	};
+	/* the id of the last cpu to have restored this state */
+	unsigned int cpu;
 };
 
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
@@ -59,8 +61,11 @@ extern void fpsimd_thread_switch(struct task_struct *next);
 extern void fpsimd_flush_thread(void);
 
 extern void fpsimd_preserve_current_state(void);
+extern void fpsimd_restore_current_state(void);
 extern void fpsimd_update_current_state(struct fpsimd_state *state);
 
+extern void fpsimd_flush_task_state(struct task_struct *target);
+
 #endif
 
 #endif
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 3659e460071d..5e95a6ce074a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -106,6 +106,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SIGPENDING		0
 #define TIF_NEED_RESCHED	1
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
+#define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
 #define TIF_SYSCALL_TRACE	8
 #define TIF_POLLING_NRFLAG	16
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
@@ -118,10 +119,11 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
-				 _TIF_NOTIFY_RESUME)
+				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1146e6f40a6b..0b65510230bb 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -587,7 +587,7 @@ fast_work_pending:
 	str	x0, [sp, #S_X0]			// returned x0
 work_pending:
 	tbnz	x1, #TIF_NEED_RESCHED, work_resched
-	/* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */
+	/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
 	ldr	x2, [sp, #S_PSTATE]
 	mov	x0, sp				// 'regs'
 	tst	x2, #PSR_MODE_MASK		// user mode regs?
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 135cfb88d2aa..52eb7d7782fe 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -32,6 +32,60 @@
 #define FPEXC_IXF	(1 << 4)
 #define FPEXC_IDF	(1 << 7)
 
+/*
+ * In order to reduce the number of times the FPSIMD state is needlessly saved
+ * and restored, we need to keep track of two things:
+ * (a) for each task, we need to remember which CPU was the last one to have
+ *     the task's FPSIMD state loaded into its FPSIMD registers;
+ * (b) for each CPU, we need to remember which task's userland FPSIMD state has
+ *     been loaded into its FPSIMD registers most recently, or whether it has
+ *     been used to perform kernel mode NEON in the meantime.
+ *
+ * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
+ * the id of the current CPU everytime the state is loaded onto a CPU. For (b),
+ * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
+ * address of the userland FPSIMD state of the task that was loaded onto the CPU
+ * the most recently, or NULL if kernel mode NEON has been performed after that.
+ *
+ * With this in place, we no longer have to restore the next FPSIMD state right
+ * when switching between tasks. Instead, we can defer this check to userland
+ * resume, at which time we verify whether the CPU's fpsimd_last_state and the
+ * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we
+ * can omit the FPSIMD restore.
+ *
+ * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
+ * indicate whether or not the userland FPSIMD state of the current task is
+ * present in the registers. The flag is set unless the FPSIMD registers of this
+ * CPU currently contain the most recent userland FPSIMD state of the current
+ * task.
+ *
+ * For a certain task, the sequence may look something like this:
+ * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
+ *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
+ *   variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
+ *   cleared, otherwise it is set;
+ *
+ * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
+ *   userland FPSIMD state is copied from memory to the registers, the task's
+ *   fpsimd_state.cpu field is set to the id of the current CPU, the current
+ *   CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
+ *   TIF_FOREIGN_FPSTATE flag is cleared;
+ *
+ * - the task executes an ordinary syscall; upon return to userland, the
+ *   TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
+ *   restored;
+ *
+ * - the task executes a syscall which executes some NEON instructions; this is
+ *   preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
+ *   register contents to memory, clears the fpsimd_last_state per-cpu variable
+ *   and sets the TIF_FOREIGN_FPSTATE flag;
+ *
+ * - the task gets preempted after kernel_neon_end() is called; as we have not
+ *   returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
+ *   whatever is in the FPSIMD registers is not saved to memory, but discarded.
+ */
+static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+
 /*
  * Trapped FP/ASIMD access.
  */
@@ -70,39 +124,96 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
 
 void fpsimd_thread_switch(struct task_struct *next)
 {
-	/* check if not kernel threads */
-	if (current->mm)
+	/*
+	 * Save the current FPSIMD state to memory, but only if whatever is in
+	 * the registers is in fact the most recent userland FPSIMD state of
+	 * 'current'.
+	 */
+	if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
 		fpsimd_save_state(&current->thread.fpsimd_state);
-	if (next->mm)
-		fpsimd_load_state(&next->thread.fpsimd_state);
+
+	if (next->mm) {
+		/*
+		 * If we are switching to a task whose most recent userland
+		 * FPSIMD state is already in the registers of *this* cpu,
+		 * we can skip loading the state from memory. Otherwise, set
+		 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
+		 * upon the next return to userland.
+		 */
+		struct fpsimd_state *st = &next->thread.fpsimd_state;
+
+		if (__this_cpu_read(fpsimd_last_state) == st
+		    && st->cpu == smp_processor_id())
+			clear_ti_thread_flag(task_thread_info(next),
+					     TIF_FOREIGN_FPSTATE);
+		else
+			set_ti_thread_flag(task_thread_info(next),
+					   TIF_FOREIGN_FPSTATE);
+	}
 }
 
 void fpsimd_flush_thread(void)
 {
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
-	fpsimd_load_state(&current->thread.fpsimd_state);
+	set_thread_flag(TIF_FOREIGN_FPSTATE);
 }
 
 /*
- * Save the userland FPSIMD state of 'current' to memory
+ * Save the userland FPSIMD state of 'current' to memory, but only if the state
+ * currently held in the registers does in fact belong to 'current'
  */
 void fpsimd_preserve_current_state(void)
 {
 	preempt_disable();
-	fpsimd_save_state(&current->thread.fpsimd_state);
+	if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
+		fpsimd_save_state(&current->thread.fpsimd_state);
 	preempt_enable();
 }
 
 /*
- * Load an updated userland FPSIMD state for 'current' from memory
+ * Load the userland FPSIMD state of 'current' from memory, but only if the
+ * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
+ * state of 'current'
+ */
+void fpsimd_restore_current_state(void)
+{
+	preempt_disable();
+	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+		fpsimd_load_state(st);
+		this_cpu_write(fpsimd_last_state, st);
+		st->cpu = smp_processor_id();
+	}
+	preempt_enable();
+}
+
+/*
+ * Load an updated userland FPSIMD state for 'current' from memory and set the
+ * flag that indicates that the FPSIMD register contents are the most recent
+ * FPSIMD state of 'current'
  */
 void fpsimd_update_current_state(struct fpsimd_state *state)
 {
 	preempt_disable();
 	fpsimd_load_state(state);
+	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+		this_cpu_write(fpsimd_last_state, st);
+		st->cpu = smp_processor_id();
+	}
 	preempt_enable();
 }
 
+/*
+ * Invalidate live CPU copies of task t's FPSIMD state
+ */
+void fpsimd_flush_task_state(struct task_struct *t)
+{
+	t->thread.fpsimd_state.cpu = NR_CPUS;
+}
+
 #ifdef CONFIG_KERNEL_MODE_NEON
 
 /*
@@ -114,16 +225,19 @@ void kernel_neon_begin(void)
 	BUG_ON(in_interrupt());
 	preempt_disable();
 
-	if (current->mm)
+	/*
+	 * Save the userland FPSIMD state if we have one and if we haven't done
+	 * so already. Clear fpsimd_last_state to indicate that there is no
+	 * longer userland FPSIMD state in the registers.
+	 */
+	if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
 		fpsimd_save_state(&current->thread.fpsimd_state);
+	this_cpu_write(fpsimd_last_state, NULL);
 }
 EXPORT_SYMBOL(kernel_neon_begin);
 
 void kernel_neon_end(void)
 {
-	if (current->mm)
-		fpsimd_load_state(&current->thread.fpsimd_state);
-
 	preempt_enable();
 }
 EXPORT_SYMBOL(kernel_neon_end);
@@ -136,12 +250,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
 {
 	switch (cmd) {
 	case CPU_PM_ENTER:
-		if (current->mm)
+		if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
 			fpsimd_save_state(&current->thread.fpsimd_state);
 		break;
 	case CPU_PM_EXIT:
 		if (current->mm)
-			fpsimd_load_state(&current->thread.fpsimd_state);
+			set_thread_flag(TIF_FOREIGN_FPSTATE);
 		break;
 	case CPU_PM_ENTER_FAILED:
 	default:
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 6e1e77f1831c..aebfc1519e8e 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -541,6 +541,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 		return ret;
 
 	target->thread.fpsimd_state.user_fpsimd = newstate;
+	fpsimd_flush_task_state(target);
 	return ret;
 }
 
@@ -790,6 +791,7 @@ static int compat_vfp_set(struct task_struct *target,
 		uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
 	}
 
+	fpsimd_flush_task_state(target);
 	return ret;
 }
 
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 06448a77ff53..882f01774365 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -413,4 +413,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
 	}
+
+	if (thread_flags & _TIF_FOREIGN_FPSTATE)
+		fpsimd_restore_current_state();
+
 }

From 9ed72656162f38017af94acf2e986fdb04fecd05 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 24 Feb 2014 15:26:29 +0100
Subject: [PATCH 0212/1185] arm64: add support for kernel mode NEON in
 interrupt context

This patch modifies kernel_neon_begin() and kernel_neon_end(), so
they may be called from any context. To address the case where only
a couple of registers are needed, kernel_neon_begin_partial(u32) is
introduced which takes as a parameter the number of bottom 'n' NEON
q-registers required. To mark the end of such a partial section, the
regular kernel_neon_end() should be used.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

Conflicts:
	arch/arm64/include/asm/neon.h

Change-Id: Ifc7c6aa77e2ab8dd98bb9975cccab54e09693ab7
---
 arch/arm64/include/asm/fpsimd.h       | 15 +++++++++
 arch/arm64/include/asm/fpsimdmacros.h | 35 +++++++++++++++++++++
 arch/arm64/include/asm/neon.h         | 18 +++++++++++
 arch/arm64/kernel/entry-fpsimd.S      | 24 +++++++++++++++
 arch/arm64/kernel/fpsimd.c            | 44 ++++++++++++++++++---------
 5 files changed, 122 insertions(+), 14 deletions(-)
 create mode 100644 arch/arm64/include/asm/neon.h

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 7a900142dbc8..50f559f574fe 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -41,6 +41,17 @@ struct fpsimd_state {
 	unsigned int cpu;
 };
 
+/*
+ * Struct for stacking the bottom 'n' FP/SIMD registers.
+ */
+struct fpsimd_partial_state {
+	u32		fpsr;
+	u32		fpcr;
+	u32		num_regs;
+	__uint128_t	vregs[32];
+};
+
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
 #define VFP_FPSCR_STAT_MASK	0xf800009f
@@ -66,6 +77,10 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
 
 extern void fpsimd_flush_task_state(struct task_struct *target);
 
+extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
+				      u32 num_regs);
+extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+
 #endif
 
 #endif
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index bbec599c96bd..768414d55e64 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -62,3 +62,38 @@
 	ldr	w\tmpnr, [\state, #16 * 2 + 4]
 	msr	fpcr, x\tmpnr
 .endm
+
+.altmacro
+.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
+	mrs	x\tmpnr1, fpsr
+	str	w\numnr, [\state, #8]
+	mrs	x\tmpnr2, fpcr
+	stp	w\tmpnr1, w\tmpnr2, [\state]
+	adr	x\tmpnr1, 0f
+	add	\state, \state, x\numnr, lsl #4
+	sub	x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
+	br	x\tmpnr1
+	.irp	qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+	.irp	qb, %(qa + 1)
+	stp	q\qa, q\qb, [\state, # -16 * \qa - 16]
+	.endr
+	.endr
+0:
+.endm
+
+.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
+	ldp	w\tmpnr1, w\tmpnr2, [\state]
+	msr	fpsr, x\tmpnr1
+	msr	fpcr, x\tmpnr2
+	adr	x\tmpnr1, 0f
+	ldr	w\tmpnr2, [\state, #8]
+	add	\state, \state, x\tmpnr2, lsl #4
+	sub	x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
+	br	x\tmpnr1
+	.irp	qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+	.irp	qb, %(qa + 1)
+	ldp	q\qa, q\qb, [\state, # -16 * \qa - 16]
+	.endr
+	.endr
+0:
+.endm
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
new file mode 100644
index 000000000000..13ce4cc18e26
--- /dev/null
+++ b/arch/arm64/include/asm/neon.h
@@ -0,0 +1,18 @@
+/*
+ * linux/arch/arm64/include/asm/neon.h
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+
+#define cpu_has_neon()		(1)
+
+#define kernel_neon_begin()	kernel_neon_begin_partial(32)
+
+void kernel_neon_begin_partial(u32 num_regs);
+void kernel_neon_end(void);
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 6a27cd6dbfa6..d358ccacfc00 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)
 	fpsimd_restore x0, 8
 	ret
 ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Save the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_save_partial_state)
+	fpsimd_save_partial x0, 1, 8, 9
+	ret
+ENDPROC(fpsimd_load_partial_state)
+
+/*
+ * Load the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_load_partial_state)
+	fpsimd_restore_partial x0, 8, 9
+	ret
+ENDPROC(fpsimd_load_partial_state)
+
+#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 52eb7d7782fe..0f27578a7039 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -216,29 +216,45 @@ void fpsimd_flush_task_state(struct task_struct *t)
 
 #ifdef CONFIG_KERNEL_MODE_NEON
 
+static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
+static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+
 /*
  * Kernel-side NEON support functions
  */
-void kernel_neon_begin(void)
+void kernel_neon_begin_partial(u32 num_regs)
 {
-	/* Avoid using the NEON in interrupt context */
-	BUG_ON(in_interrupt());
-	preempt_disable();
+	if (in_interrupt()) {
+		struct fpsimd_partial_state *s = this_cpu_ptr(
+			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
 
-	/*
-	 * Save the userland FPSIMD state if we have one and if we haven't done
-	 * so already. Clear fpsimd_last_state to indicate that there is no
-	 * longer userland FPSIMD state in the registers.
-	 */
-	if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
-		fpsimd_save_state(&current->thread.fpsimd_state);
-	this_cpu_write(fpsimd_last_state, NULL);
+		BUG_ON(num_regs > 32);
+		fpsimd_save_partial_state(s, roundup(num_regs, 2));
+	} else {
+		/*
+		 * Save the userland FPSIMD state if we have one and if we
+		 * haven't done so already. Clear fpsimd_last_state to indicate
+		 * that there is no longer userland FPSIMD state in the
+		 * registers.
+		 */
+		preempt_disable();
+		if (current->mm &&
+		    !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+			fpsimd_save_state(&current->thread.fpsimd_state);
+		this_cpu_write(fpsimd_last_state, NULL);
+	}
 }
-EXPORT_SYMBOL(kernel_neon_begin);
+EXPORT_SYMBOL(kernel_neon_begin_partial);
 
 void kernel_neon_end(void)
 {
-	preempt_enable();
+	if (in_interrupt()) {
+		struct fpsimd_partial_state *s = this_cpu_ptr(
+			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+		fpsimd_load_partial_state(s);
+	} else {
+		preempt_enable();
+	}
 }
 EXPORT_SYMBOL(kernel_neon_end);
 

From 29c0c76ffbf9208b6348cef882dd053a6190bb66 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 4 Mar 2014 13:28:38 +0800
Subject: [PATCH 0213/1185] crypto: remove direct blkcipher_walk dependency on
 transform

In order to allow other uses of the blkcipher walk API than the blkcipher
algos themselves, this patch copies some of the transform data members to the
walk struct so the transform is only accessed at walk init time.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/blkcipher.c      | 67 ++++++++++++++++++++---------------------
 include/crypto/algapi.h |  5 ++-
 2 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index a79e7e9ab86e..46fdab5e9cc7 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -70,14 +70,12 @@ static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len)
 	return max(start, end_page);
 }
 
-static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm,
-					       struct blkcipher_walk *walk,
+static inline unsigned int blkcipher_done_slow(struct blkcipher_walk *walk,
 					       unsigned int bsize)
 {
 	u8 *addr;
-	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
 
-	addr = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1);
+	addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
 	addr = blkcipher_get_spot(addr, bsize);
 	scatterwalk_copychunks(addr, &walk->out, bsize, 1);
 	return bsize;
@@ -105,7 +103,6 @@ static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk,
 int blkcipher_walk_done(struct blkcipher_desc *desc,
 			struct blkcipher_walk *walk, int err)
 {
-	struct crypto_blkcipher *tfm = desc->tfm;
 	unsigned int nbytes = 0;
 
 	if (likely(err >= 0)) {
@@ -117,7 +114,7 @@ int blkcipher_walk_done(struct blkcipher_desc *desc,
 			err = -EINVAL;
 			goto err;
 		} else
-			n = blkcipher_done_slow(tfm, walk, n);
+			n = blkcipher_done_slow(walk, n);
 
 		nbytes = walk->total - n;
 		err = 0;
@@ -136,7 +133,7 @@ int blkcipher_walk_done(struct blkcipher_desc *desc,
 	}
 
 	if (walk->iv != desc->info)
-		memcpy(desc->info, walk->iv, crypto_blkcipher_ivsize(tfm));
+		memcpy(desc->info, walk->iv, walk->ivsize);
 	if (walk->buffer != walk->page)
 		kfree(walk->buffer);
 	if (walk->page)
@@ -226,22 +223,20 @@ static inline int blkcipher_next_fast(struct blkcipher_desc *desc,
 static int blkcipher_walk_next(struct blkcipher_desc *desc,
 			       struct blkcipher_walk *walk)
 {
-	struct crypto_blkcipher *tfm = desc->tfm;
-	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
 	unsigned int bsize;
 	unsigned int n;
 	int err;
 
 	n = walk->total;
-	if (unlikely(n < crypto_blkcipher_blocksize(tfm))) {
+	if (unlikely(n < walk->cipher_blocksize)) {
 		desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
 		return blkcipher_walk_done(desc, walk, -EINVAL);
 	}
 
 	walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
 			 BLKCIPHER_WALK_DIFF);
-	if (!scatterwalk_aligned(&walk->in, alignmask) ||
-	    !scatterwalk_aligned(&walk->out, alignmask)) {
+	if (!scatterwalk_aligned(&walk->in, walk->alignmask) ||
+	    !scatterwalk_aligned(&walk->out, walk->alignmask)) {
 		walk->flags |= BLKCIPHER_WALK_COPY;
 		if (!walk->page) {
 			walk->page = (void *)__get_free_page(GFP_ATOMIC);
@@ -250,12 +245,12 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc,
 		}
 	}
 
-	bsize = min(walk->blocksize, n);
+	bsize = min(walk->walk_blocksize, n);
 	n = scatterwalk_clamp(&walk->in, n);
 	n = scatterwalk_clamp(&walk->out, n);
 
 	if (unlikely(n < bsize)) {
-		err = blkcipher_next_slow(desc, walk, bsize, alignmask);
+		err = blkcipher_next_slow(desc, walk, bsize, walk->alignmask);
 		goto set_phys_lowmem;
 	}
 
@@ -277,28 +272,26 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc,
 	return err;
 }
 
-static inline int blkcipher_copy_iv(struct blkcipher_walk *walk,
-				    struct crypto_blkcipher *tfm,
-				    unsigned int alignmask)
+static inline int blkcipher_copy_iv(struct blkcipher_walk *walk)
 {
-	unsigned bs = walk->blocksize;
-	unsigned int ivsize = crypto_blkcipher_ivsize(tfm);
-	unsigned aligned_bs = ALIGN(bs, alignmask + 1);
-	unsigned int size = aligned_bs * 2 + ivsize + max(aligned_bs, ivsize) -
-			    (alignmask + 1);
+	unsigned bs = walk->walk_blocksize;
+	unsigned aligned_bs = ALIGN(bs, walk->alignmask + 1);
+	unsigned int size = aligned_bs * 2 +
+			    walk->ivsize + max(aligned_bs, walk->ivsize) -
+			    (walk->alignmask + 1);
 	u8 *iv;
 
-	size += alignmask & ~(crypto_tfm_ctx_alignment() - 1);
+	size += walk->alignmask & ~(crypto_tfm_ctx_alignment() - 1);
 	walk->buffer = kmalloc(size, GFP_ATOMIC);
 	if (!walk->buffer)
 		return -ENOMEM;
 
-	iv = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1);
+	iv = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
 	iv = blkcipher_get_spot(iv, bs) + aligned_bs;
 	iv = blkcipher_get_spot(iv, bs) + aligned_bs;
-	iv = blkcipher_get_spot(iv, ivsize);
+	iv = blkcipher_get_spot(iv, walk->ivsize);
 
-	walk->iv = memcpy(iv, walk->iv, ivsize);
+	walk->iv = memcpy(iv, walk->iv, walk->ivsize);
 	return 0;
 }
 
@@ -306,7 +299,10 @@ int blkcipher_walk_virt(struct blkcipher_desc *desc,
 			struct blkcipher_walk *walk)
 {
 	walk->flags &= ~BLKCIPHER_WALK_PHYS;
-	walk->blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->cipher_blocksize = walk->walk_blocksize;
+	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
+	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
 	return blkcipher_walk_first(desc, walk);
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_virt);
@@ -315,7 +311,10 @@ int blkcipher_walk_phys(struct blkcipher_desc *desc,
 			struct blkcipher_walk *walk)
 {
 	walk->flags |= BLKCIPHER_WALK_PHYS;
-	walk->blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->cipher_blocksize = walk->walk_blocksize;
+	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
+	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
 	return blkcipher_walk_first(desc, walk);
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_phys);
@@ -323,9 +322,6 @@ EXPORT_SYMBOL_GPL(blkcipher_walk_phys);
 static int blkcipher_walk_first(struct blkcipher_desc *desc,
 				struct blkcipher_walk *walk)
 {
-	struct crypto_blkcipher *tfm = desc->tfm;
-	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
-
 	if (WARN_ON_ONCE(in_irq()))
 		return -EDEADLK;
 
@@ -335,8 +331,8 @@ static int blkcipher_walk_first(struct blkcipher_desc *desc,
 
 	walk->buffer = NULL;
 	walk->iv = desc->info;
-	if (unlikely(((unsigned long)walk->iv & alignmask))) {
-		int err = blkcipher_copy_iv(walk, tfm, alignmask);
+	if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
+		int err = blkcipher_copy_iv(walk);
 		if (err)
 			return err;
 	}
@@ -353,7 +349,10 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
 			      unsigned int blocksize)
 {
 	walk->flags &= ~BLKCIPHER_WALK_PHYS;
-	walk->blocksize = blocksize;
+	walk->walk_blocksize = blocksize;
+	walk->cipher_blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
+	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
 	return blkcipher_walk_first(desc, walk);
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_virt_block);
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 418d270e1806..6c4d916d739d 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -100,9 +100,12 @@ struct blkcipher_walk {
 	void *page;
 	u8 *buffer;
 	u8 *iv;
+	unsigned int ivsize;
 
 	int flags;
-	unsigned int blocksize;
+	unsigned int walk_blocksize;
+	unsigned int cipher_blocksize;
+	unsigned int alignmask;
 };
 
 struct ablkcipher_walk {

From 15aaa954da86024fa1b4e1c26b162fb7be84d2df Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 4 Mar 2014 13:28:39 +0800
Subject: [PATCH 0214/1185] crypto: allow blkcipher walks over AEAD data

This adds the function blkcipher_aead_walk_virt_block, which allows the caller
to use the blkcipher walk API to handle the input and output scatterlists.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/blkcipher.c      | 14 ++++++++++++++
 include/crypto/algapi.h |  4 ++++
 2 files changed, 18 insertions(+)

diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 46fdab5e9cc7..0122bec38564 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -357,6 +357,20 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_virt_block);
 
+int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc,
+				   struct blkcipher_walk *walk,
+				   struct crypto_aead *tfm,
+				   unsigned int blocksize)
+{
+	walk->flags &= ~BLKCIPHER_WALK_PHYS;
+	walk->walk_blocksize = blocksize;
+	walk->cipher_blocksize = crypto_aead_blocksize(tfm);
+	walk->ivsize = crypto_aead_ivsize(tfm);
+	walk->alignmask = crypto_aead_alignmask(tfm);
+	return blkcipher_walk_first(desc, walk);
+}
+EXPORT_SYMBOL_GPL(blkcipher_aead_walk_virt_block);
+
 static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key,
 			    unsigned int keylen)
 {
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 6c4d916d739d..063f8ef49301 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -195,6 +195,10 @@ int blkcipher_walk_phys(struct blkcipher_desc *desc,
 int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
 			      struct blkcipher_walk *walk,
 			      unsigned int blocksize);
+int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc,
+				   struct blkcipher_walk *walk,
+				   struct crypto_aead *tfm,
+				   unsigned int blocksize);
 
 int ablkcipher_walk_done(struct ablkcipher_request *req,
 			 struct ablkcipher_walk *walk, int err);

From 84d391c5bf867c5e0a615cfa277eb8eb5b6d6453 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 4 Mar 2014 01:10:04 +0000
Subject: [PATCH 0215/1185] arm64: enable generic CPU feature modalias matching
 for this architecture

This enables support for the generic CPU feature modalias implementation that
wires up optional CPU features to udev based module autoprobing.

A file <asm/cpufeature.h> is provided that maps CPU feature numbers to
elf_hwcap bits, which is the standard way on arm64 to advertise optional CPU
features both internally and to user space.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[catalin.marinas@arm.com: removed unnecessary "!!"]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

Conflicts:
	arch/arm64/Kconfig

Change-Id: Ief16b3197cd0564d8cf8aa82e9614bcda6399fe5
---
 arch/arm64/Kconfig                  |  2 ++
 arch/arm64/include/asm/cpufeature.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 arch/arm64/include/asm/cpufeature.h

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8832ac02ffef..3ee27d34a020 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -10,6 +10,8 @@ config ARM64
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
new file mode 100644
index 000000000000..cd4ac0516488
--- /dev/null
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <asm/hwcap.h>
+
+/*
+ * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
+ * in the kernel and for user space to keep track of which optional features
+ * are supported by the current system. So let's map feature 'x' to HWCAP_x.
+ * Note that HWCAP_x constants are bit fields so we need to take the log.
+ */
+
+#define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
+#define cpu_feature(x)		ilog2(HWCAP_ ## x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+	return elf_hwcap & (1UL << num);
+}
+
+#endif

From 820140883b83b548cd09151732ae783abc2976a0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 24 Sep 2013 09:28:03 +0200
Subject: [PATCH 0216/1185] arm64: pull in <asm/simd.h> from asm-generic

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/include/asm/Kbuild | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 79a642d199f2..fb6e0c4d46a3 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -36,6 +36,7 @@ generic-y += segment.h
 generic-y += sembuf.h
 generic-y += serial.h
 generic-y += shmbuf.h
+generic-y += simd.h
 generic-y += sizes.h
 generic-y += socket.h
 generic-y += sockios.h

From 4d856c4e9fa9d9ce997e8482c8e878d157462f8c Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 30 Apr 2014 19:48:28 -0500
Subject: [PATCH 0217/1185] tty/serial: add back missing
 setup_early_serial8250_console

Commit d2fd6810a823bcd (tty/serial: convert 8250 to generic earlycon)
removed setup_early_serial8250_console, but there are still 2 callers
in:

arch/mips/mti-malta/malta-init.c
drivers/firmware/pcdp.c

Add back the function implemented as a wrapper to setup_earlycon.

Reported-by: Yinghai Lu <yinghai@kernel.org>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: linux-serial@vger.kernel.org

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit fe1cf8af918af3ff0dd58ce92e5a5da117cb1d92)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
(cherry picked from commit bd3132993d81a28b3a165ef6cc1abe0c4a0c6ac6)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/tty/serial/8250/8250_early.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index e83c9db3300c..cfef801a49d4 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -156,6 +156,16 @@ static int __init early_serial8250_setup(struct earlycon_device *device,
 EARLYCON_DECLARE(uart8250, early_serial8250_setup);
 EARLYCON_DECLARE(uart, early_serial8250_setup);
 
+int __init setup_early_serial8250_console(char *cmdline)
+{
+	char match[] = "uart8250";
+
+	if (cmdline && cmdline[4] == ',')
+		match[4] = '\0';
+
+	return setup_earlycon(cmdline, match, early_serial8250_setup);
+}
+
 int serial8250_find_port_for_earlycon(void)
 {
 	struct earlycon_device *device = early_device;

From 2f56ce0e13c04d5342807755115fe842a1caa6c4 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 12 Jun 2014 12:52:44 -0500
Subject: [PATCH 0218/1185] tty/serial: fix 8250 early console option passing
 to regular console

In the conversion to generic early console, the passing of options from
the early 8250 console to the regular ttyS console was broken. This
resulted in the baud rate changing when switching consoles during boot.

This feature allows specifying a single console option on the kernel
command line rather than both an early console and regular serial tty
console. It would be nice to generalize this feature. However, it only
works if the correct baud rate can be probed early which is not the
case on many platforms which have non-standard UART clock rates. So for
now, this is left as an 8250 specific feature.

Reported-and-tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 60efcf0414be5876d81276e3c1fd12680ba2ce71)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
(cherry picked from commit 5eb6ba4882c9c0b3b83486ab66b0c9e68189de4b)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/tty/serial/8250/8250_early.c | 5 ++++-
 drivers/tty/serial/earlycon.c        | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index cfef801a49d4..4858b8a99d3b 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -144,8 +144,11 @@ static int __init early_serial8250_setup(struct earlycon_device *device,
 	if (!(device->port.membase || device->port.iobase))
 		return 0;
 
-	if (!device->baud)
+	if (!device->baud) {
 		device->baud = probe_baud(&device->port);
+		snprintf(device->options, sizeof(device->options), "%u",
+			 device->baud);
+	}
 
 	init_port(device);
 
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index 73bf1e21aae0..4f27f788ac6f 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -23,7 +23,7 @@
 #include <asm/serial.h>
 
 static struct console early_con = {
-	.name =		"earlycon",
+	.name =		"uart", /* 8250 console switch requires this name */
 	.flags =	CON_PRINTBUFFER | CON_BOOT,
 	.index =	-1,
 };

From bf485e6f51d505bbe4ab5eaafcfe7789ec83e7ee Mon Sep 17 00:00:00 2001
From: Greg Hackmann <ghackmann@google.com>
Date: Thu, 28 Aug 2014 14:00:10 -0700
Subject: [PATCH 0219/1185] arm64: check for upper PAGE_SHIFT bits in
 pfn_valid()

pfn_valid() returns a false positive when the lower (64 - PAGE_SHIFT)
bits match a valid pfn but some of the upper bits are set.  This caused
a kernel panic in kpageflags_read() when a userspace utility parsed
/proc/*/pagemap, neglected to discard the upper flag bits, and tried to
lseek()+read() from the corresponding offset in /proc/kpageflags.

A valid pfn will never have the upper PAGE_SHIFT bits set, so simply
check for this before passing the pfn to memblock_is_memory().

Change-Id: Ief5d8cd4dd93cbecd545a634a8d5885865cb5970
Signed-off-by: Greg Hackmann <ghackmann@google.com>
---
 arch/arm64/mm/init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f497ca77925a..61599b516c66 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -109,9 +109,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 }
 
 #ifdef CONFIG_HAVE_ARCH_PFN_VALID
+#define PFN_MASK ((1UL << (64 - PAGE_SHIFT)) - 1)
+
 int pfn_valid(unsigned long pfn)
 {
-	return memblock_is_memory(pfn << PAGE_SHIFT);
+	return (pfn & PFN_MASK) == pfn && memblock_is_memory(pfn << PAGE_SHIFT);
 }
 EXPORT_SYMBOL(pfn_valid);
 #endif

From 02273a4cd3ebada77f088ae13229813ca918af44 Mon Sep 17 00:00:00 2001
From: Jean Pihet <jean.pihet@linaro.org>
Date: Mon, 3 Feb 2014 19:18:29 +0100
Subject: [PATCH 0220/1185] ARM64: perf: support dwarf unwinding in compat mode

Add support for unwinding using the dwarf information in compat
mode. Using the correct user stack pointer allows perf to record
the frames correctly in the native and compat modes.

Note that although the dwarf frame unwinding works ok using
libunwind in native mode (on ARMv7 & ARMv8), some changes are
required to the libunwind code for the compat mode. Those changes
are posted separately on the libunwind mailing list.

Tested on ARMv8 platform with v8 and compat v7 binaries, the latter
are statically built.

Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/compat.h | 2 +-
 arch/arm64/include/asm/ptrace.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 899af807ef0f..7058eec269ab 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -214,7 +214,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-#define compat_user_stack_pointer() (current_pt_regs()->compat_sp)
+#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs()))
 
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 41a71ee4c3df..c2a199727e4f 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -131,7 +131,7 @@ struct pt_regs {
 	(!((regs)->pstate & PSR_F_BIT))
 
 #define user_stack_pointer(regs) \
-	((regs)->sp)
+	(!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp)
 
 /*
  * Are the current registers suitable for user mode? (used to maintain

From 8db0b1851e5e636d61c9d0d15fc4f5de4af0857c Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 10 Jul 2014 11:37:40 +0100
Subject: [PATCH 0221/1185] arm64: Cast KSTK_(EIP|ESP) to unsigned long

This is for similarity with thread_saved_(pc|sp) and to avoid some
compiler warnings in the audit code.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/processor.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ab239b2c456f..db3112886968 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -131,8 +131,8 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
 
-#define KSTK_EIP(tsk)	task_pt_regs(tsk)->pc
-#define KSTK_ESP(tsk)	task_pt_regs(tsk)->sp
+#define KSTK_EIP(tsk)	((unsigned long)task_pt_regs(tsk)->pc)
+#define KSTK_ESP(tsk)	((unsigned long)task_pt_regs(tsk)->sp)
 
 /*
  * Prefetching support

From dc52724b7222f368c49b578787b2fd876c796fc7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 29 Aug 2014 16:11:10 +0100
Subject: [PATCH 0222/1185] arm64: report correct stack pointer in KSTK_ESP for
 compat tasks

The KSTK_ESP macro is used to determine the user stack pointer for a
given task. In particular, this is used to to report the '[stack]' VMA
in /proc/self/maps, which is used by Android to determine the stack
location for children of the main thread.

This patch fixes the macro to use user_stack_pointer instead of directly
returning sp. This means that we report w13 instead of sp, since the
former is used as the stack pointer when executing in AArch32 state.

Cc: <stable@vger.kernel.org>
Reported-by: Serban Constantinescu <Serban.Constantinescu@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/processor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index db3112886968..3b7bb031f98f 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -132,7 +132,7 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
 
 #define KSTK_EIP(tsk)	((unsigned long)task_pt_regs(tsk)->pc)
-#define KSTK_ESP(tsk)	((unsigned long)task_pt_regs(tsk)->sp)
+#define KSTK_ESP(tsk)	user_stack_pointer(task_pt_regs(tsk))
 
 /*
  * Prefetching support

From 597ff3a0f3ba6e471ce4dc93bf0b41abcebe9f8c Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 29 Aug 2014 16:08:02 +0100
Subject: [PATCH 0223/1185] arm64: Add brackets around user_stack_pointer()

Commit 5f888a1d33 (ARM64: perf: support dwarf unwinding in compat mode)
changes user_stack_pointer() to return the compat SP for 32-bit tasks
but without brackets around the whole definition, with possible issues
on the call sites (noticed with a subsequent fix for KSTK_ESP).

Fixes: 5f888a1d33c4 (ARM64: perf: support dwarf unwinding in compat mode)
Reported-by: Sudeep Holla <sudeep.holla@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/ptrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index c2a199727e4f..7304fa2fd9fa 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -131,7 +131,7 @@ struct pt_regs {
 	(!((regs)->pstate & PSR_F_BIT))
 
 #define user_stack_pointer(regs) \
-	(!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp)
+	(!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp)
 
 /*
  * Are the current registers suitable for user mode? (used to maintain

From c7ca25a4ad7498bba8c20ede3c5f4a3b7931887a Mon Sep 17 00:00:00 2001
From: Leo Yan <leoy@marvell.com>
Date: Mon, 1 Sep 2014 11:09:51 +0800
Subject: [PATCH 0224/1185] arm64: fix bug for reloading FPSIMD state after cpu
 power off

Now arm64 defers reloading FPSIMD state, but this optimization also
introduces the bug after cpu resume back from low power mode.

The reason is after the cpu has been powered off, s/w need set the
cpu's fpsimd_last_state to NULL so that it will force to reload
FPSIMD state for the thread, otherwise there has the chance to meet
the condition for both the task's fpsimd_state.cpu field contains the
id of the current cpu, and the cpu's fpsimd_last_state per-cpu variable
points to the task's fpsimd_state, so finally kernel will skip to reload
the context during it return back to userland.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Leo Yan <leoy@marvell.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 0f27578a7039..093bee27e527 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -268,6 +268,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
 	case CPU_PM_ENTER:
 		if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
 			fpsimd_save_state(&current->thread.fpsimd_state);
+		this_cpu_write(fpsimd_last_state, NULL);
 		break;
 	case CPU_PM_EXIT:
 		if (current->mm)

From f50be83d236b9b0169b85c47b8878f61bb22f448 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 6 May 2013 03:10:35 +0100
Subject: [PATCH 0225/1185] apparmor: no need to delay vfree()

vfree() can be called from interrupt contexts now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/apparmor/lib.c | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 7430298116d6..5b62af7254ca 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -104,19 +104,6 @@ void *kvmalloc(size_t size)
 	return buffer;
 }
 
-/**
- * do_vfree - workqueue routine for freeing vmalloced memory
- * @work: data to be freed
- *
- * The work_struct is overlaid to the data being freed, as at the point
- * the work is scheduled the data is no longer valid, be its freeing
- * needs to be delayed until safe.
- */
-static void do_vfree(struct work_struct *work)
-{
-	vfree(work);
-}
-
 /**
  * kvfree - free an allocation do by kvmalloc
  * @buffer: buffer to free (MAYBE_NULL)
@@ -125,13 +112,8 @@ static void do_vfree(struct work_struct *work)
  */
 void kvfree(void *buffer)
 {
-	if (is_vmalloc_addr(buffer)) {
-		/* Data is no longer valid so just use the allocated space
-		 * as the work_struct
-		 */
-		struct work_struct *work = (struct work_struct *) buffer;
-		INIT_WORK(work, do_vfree);
-		schedule_work(work);
-	} else
+	if (is_vmalloc_addr(buffer))
+		vfree(buffer);
+	else
 		kfree(buffer);
 }

From 40ef955d6541361f2e22044a2362ee13f0fde89b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 6 May 2014 14:02:53 -0400
Subject: [PATCH 0226/1185] nick kvfree() from apparmor

too many places open-code it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Conflicts:
	mm/util.c
	security/apparmor/include/apparmor.h

Change-Id: Ie8602e0199282dc462921cb7217158d1998853b0
---
 include/linux/mm.h                   |  2 ++
 mm/util.c                            | 10 ++++++++++
 security/apparmor/include/apparmor.h |  1 -
 security/apparmor/lib.c              | 14 --------------
 4 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 01eb01df9225..ff7f6375f33f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -324,6 +324,8 @@ static inline int is_vmalloc_or_module_addr(const void *x)
 }
 #endif
 
+extern void kvfree(const void *addr);
+
 static inline void compound_lock(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/mm/util.c b/mm/util.c
index ab1424dbe2e6..36aa4815e3dd 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -7,6 +7,7 @@
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/vmalloc.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -384,6 +385,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_mmap);
 
+void kvfree(const void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		kfree(addr);
+}
+EXPORT_SYMBOL(kvfree);
+
 struct address_space *page_mapping(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 40aedd9f73ea..4a8cbfeef8b3 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -65,7 +65,6 @@ extern int apparmor_initialized __initdata;
 char *aa_split_fqname(char *args, char **ns_name);
 void aa_info_message(const char *str);
 void *kvmalloc(size_t size);
-void kvfree(void *buffer);
 
 
 /**
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 5b62af7254ca..ce8d9a84ab2d 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -103,17 +103,3 @@ void *kvmalloc(size_t size)
 	}
 	return buffer;
 }
-
-/**
- * kvfree - free an allocation do by kvmalloc
- * @buffer: buffer to free (MAYBE_NULL)
- *
- * Free a buffer allocated by kvmalloc
- */
-void kvfree(void *buffer)
-{
-	if (is_vmalloc_addr(buffer))
-		vfree(buffer);
-	else
-		kfree(buffer);
-}

From 8b9ca310b11458aeeb14861e34f896d67c1333ff Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 2 Jul 2014 15:22:37 -0700
Subject: [PATCH 0227/1185] fs/seq_file: fallback to vmalloc allocation

There are a couple of seq_files which use the single_open() interface.
This interface requires that the whole output must fit into a single
buffer.

E.g.  for /proc/stat allocation failures have been observed because an
order-4 memory allocation failed due to memory fragmentation.  In such
situations reading /proc/stat is not possible anymore.

Therefore change the seq_file code to fallback to vmalloc allocations
which will usually result in a couple of order-0 allocations and hence
also work if memory is fragmented.

For reference a call trace where reading from /proc/stat failed:

  sadc: page allocation failure: order:4, mode:0x1040d0
  CPU: 1 PID: 192063 Comm: sadc Not tainted 3.10.0-123.el7.s390x #1
  [...]
  Call Trace:
    show_stack+0x6c/0xe8
    warn_alloc_failed+0xd6/0x138
    __alloc_pages_nodemask+0x9da/0xb68
    __get_free_pages+0x2e/0x58
    kmalloc_order_trace+0x44/0xc0
    stat_open+0x5a/0xd8
    proc_reg_open+0x8a/0x140
    do_dentry_open+0x1bc/0x2c8
    finish_open+0x46/0x60
    do_last+0x382/0x10d0
    path_openat+0xc8/0x4f8
    do_filp_open+0x46/0xa8
    do_sys_open+0x114/0x1f0
    sysc_tracego+0x14/0x1a

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: David Rientjes <rientjes@google.com>
Cc: Ian Kent <raven@themaw.net>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Thorsten Diehl <thorsten.diehl@de.ibm.com>
Cc: Andrea Righi <andrea@betterlinux.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Stefan Bader <stefan.bader@canonical.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Conflicts:
	fs/seq_file.c

Change-Id: I009080dd017b020ffd5e812e5b472bdb8349217a
---
 fs/seq_file.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 774c1eb7f1c9..749b84104014 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -8,8 +8,10 @@
 #include <linux/fs.h>
 #include <linux/export.h>
 #include <linux/seq_file.h>
+#include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/cred.h>
+#include <linux/mm.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_file *m)
 	m->count = m->size;
 }
 
+static void *seq_buf_alloc(unsigned long size)
+{
+	void *buf;
+
+	buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
+	if (!buf && size > PAGE_SIZE)
+		buf = vmalloc(size);
+	return buf;
+}
+
 /**
  *	seq_open -	initialize sequential file
  *	@file: file we initialize
@@ -96,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset)
 		return 0;
 	}
 	if (!m->buf) {
-		m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+		m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
 		if (!m->buf)
 			return -ENOMEM;
 	}
@@ -135,8 +147,8 @@ static int traverse(struct seq_file *m, loff_t offset)
 
 Eoverflow:
 	m->op->stop(m, p);
-	kfree(m->buf);
-	m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+	kvfree(m->buf);
+	m->buf = seq_buf_alloc(m->size <<= 1);
 	return !m->buf ? -ENOMEM : -EAGAIN;
 }
 
@@ -191,7 +203,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 
 	/* grab buffer if we didn't have one */
 	if (!m->buf) {
-		m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+		m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
 		if (!m->buf)
 			goto Enomem;
 	}
@@ -231,8 +243,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 		if (m->count < m->size)
 			goto Fill;
 		m->op->stop(m, p);
-		kfree(m->buf);
-		m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+		kvfree(m->buf);
+		m->buf = seq_buf_alloc(m->size <<= 1);
 		if (!m->buf)
 			goto Enomem;
 		m->count = 0;
@@ -347,7 +359,7 @@ EXPORT_SYMBOL(seq_lseek);
 int seq_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = file->private_data;
-	kfree(m->buf);
+	kvfree(m->buf);
 	kfree(m);
 	return 0;
 }
@@ -602,13 +614,13 @@ EXPORT_SYMBOL(single_open);
 int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
 		void *data, size_t size)
 {
-	char *buf = kmalloc(size, GFP_KERNEL);
+	char *buf = seq_buf_alloc(size);
 	int ret;
 	if (!buf)
 		return -ENOMEM;
 	ret = single_open(file, show, data);
 	if (ret) {
-		kfree(buf);
+		kvfree(buf);
 		return ret;
 	}
 	((struct seq_file *)file->private_data)->buf = buf;

From 2d8da662dc9dc79d52847beddf0ae1a6e559bbca Mon Sep 17 00:00:00 2001
From: JP Abgrall <jpa@google.com>
Date: Wed, 3 Sep 2014 17:36:44 -0700
Subject: [PATCH 0228/1185] android: base-cfg: enforce the needed
 XFRM_MODE_TUNNEL (for VPN)

Change-Id: I587023d56877d32806079676790751155c768982
Signed-off-by: JP Abgrall <jpa@google.com>
---
 android/configs/android-base.cfg | 1 +
 1 file changed, 1 insertion(+)

diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg
index 5b888487ede1..bdc4749a195f 100644
--- a/android/configs/android-base.cfg
+++ b/android/configs/android-base.cfg
@@ -24,6 +24,7 @@ CONFIG_INET6_ESP=y
 CONFIG_INET6_IPCOMP=y
 CONFIG_INET=y
 CONFIG_INET_ESP=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
 CONFIG_IP6_NF_FILTER=y
 CONFIG_IP6_NF_IPTABLES=y
 CONFIG_IP6_NF_MANGLE=y

From 38d467b3effd465be24a20e2d861b9049d4f333f Mon Sep 17 00:00:00 2001
From: Dave Chiluk <chiluk@canonical.com>
Date: Tue, 24 Jun 2014 10:11:26 -0500
Subject: [PATCH 0229/1185] stable_kernel_rules: Add pointer to netdev-FAQ for
 network patches

commit b76fc285337b6b256e9ba20a40cfd043f70c27af upstream.

Stable_kernel_rules should point submitters of network stable patches to the
netdev_FAQ.txt as requests for stable network patches should go to netdev
first.

Signed-off-by: Dave Chiluk <chiluk@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/stable_kernel_rules.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt
index b0714d8f678a..8dfb6a5f427d 100644
--- a/Documentation/stable_kernel_rules.txt
+++ b/Documentation/stable_kernel_rules.txt
@@ -29,6 +29,9 @@ Rules on what kind of patches are accepted, and which ones are not, into the
 
 Procedure for submitting patches to the -stable tree:
 
+ - If the patch covers files in net/ or drivers/net please follow netdev stable
+   submission guidelines as described in
+   Documentation/networking/netdev-FAQ.txt
  - Send the patch, after verifying that it follows the above rules, to
    stable@vger.kernel.org.  You must note the upstream commit ID in the
    changelog of your submission, as well as the kernel version you wish

From 4292001d4de0681e2f1eb59d13511012369324e0 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 21 Aug 2014 09:57:17 -0500
Subject: [PATCH 0230/1185] HID: logitech: perform bounds checking on device_id
 early enough

commit ad3e14d7c5268c2e24477c6ef54bbdf88add5d36 upstream.

device_index is a char type and the size of paired_dj_deivces is 7
elements, therefore proper bounds checking has to be applied to
device_index before it is used.

We are currently performing the bounds checking in
logi_dj_recv_add_djhid_device(), which is too late, as malicious device
could send REPORT_TYPE_NOTIF_DEVICE_UNPAIRED early enough and trigger the
problem in one of the report forwarding functions called from
logi_dj_raw_event().

Fix this by performing the check at the earliest possible ocasion in
logi_dj_raw_event().

Reported-by: Ben Hawkes <hawkes@google.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-logitech-dj.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 1be9156a3950..51b1a5088c0d 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -237,13 +237,6 @@ static void logi_dj_recv_add_djhid_device(struct dj_receiver_dev *djrcv_dev,
 		return;
 	}
 
-	if ((dj_report->device_index < DJ_DEVICE_INDEX_MIN) ||
-	    (dj_report->device_index > DJ_DEVICE_INDEX_MAX)) {
-		dev_err(&djrcv_hdev->dev, "%s: invalid device index:%d\n",
-			__func__, dj_report->device_index);
-		return;
-	}
-
 	if (djrcv_dev->paired_dj_devices[dj_report->device_index]) {
 		/* The device is already known. No need to reallocate it. */
 		dbg_hid("%s: device is already known\n", __func__);
@@ -713,6 +706,12 @@ static int logi_dj_raw_event(struct hid_device *hdev,
 	 * device (via hid_input_report() ) and return 1 so hid-core does not do
 	 * anything else with it.
 	 */
+	if ((dj_report->device_index < DJ_DEVICE_INDEX_MIN) ||
+	    (dj_report->device_index > DJ_DEVICE_INDEX_MAX)) {
+		dev_err(&hdev->dev, "%s: invalid device index:%d\n",
+				__func__, dj_report->device_index);
+		return false;
+	}
 
 	spin_lock_irqsave(&djrcv_dev->lock, flags);
 	if (dj_report->report_id == REPORT_ID_DJ_SHORT) {

From 0c9fdd4c5af24ea49424903296cb1f7420505e9e Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 21 Aug 2014 09:57:48 -0500
Subject: [PATCH 0231/1185] HID: fix a couple of off-by-ones

commit 4ab25786c87eb20857bbb715c3ae34ec8fd6a214 upstream.

There are a few very theoretical off-by-one bugs in report descriptor size
checking when performing a pre-parsing fixup. Fix those.

Reported-by: Ben Hawkes <hawkes@google.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-cherry.c   | 2 +-
 drivers/hid/hid-kye.c      | 2 +-
 drivers/hid/hid-lg.c       | 4 ++--
 drivers/hid/hid-monterey.c | 2 +-
 drivers/hid/hid-petalynx.c | 2 +-
 drivers/hid/hid-sunplus.c  | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/hid/hid-cherry.c b/drivers/hid/hid-cherry.c
index 1bdcccc54a1d..f745d2c1325e 100644
--- a/drivers/hid/hid-cherry.c
+++ b/drivers/hid/hid-cherry.c
@@ -28,7 +28,7 @@
 static __u8 *ch_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize)
 {
-	if (*rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) {
+	if (*rsize >= 18 && rdesc[11] == 0x3c && rdesc[12] == 0x02) {
 		hid_info(hdev, "fixing up Cherry Cymotion report descriptor\n");
 		rdesc[11] = rdesc[16] = 0xff;
 		rdesc[12] = rdesc[17] = 0x03;
diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c
index 6af90dbdc3d4..843f2dd55200 100644
--- a/drivers/hid/hid-kye.c
+++ b/drivers/hid/hid-kye.c
@@ -280,7 +280,7 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		 *   - change the button usage range to 4-7 for the extra
 		 *     buttons
 		 */
-		if (*rsize >= 74 &&
+		if (*rsize >= 75 &&
 			rdesc[61] == 0x05 && rdesc[62] == 0x08 &&
 			rdesc[63] == 0x19 && rdesc[64] == 0x08 &&
 			rdesc[65] == 0x29 && rdesc[66] == 0x0f &&
diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
index 06eb45fa6331..12fc48c968e6 100644
--- a/drivers/hid/hid-lg.c
+++ b/drivers/hid/hid-lg.c
@@ -345,14 +345,14 @@ static __u8 *lg_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 	struct usb_device_descriptor *udesc;
 	__u16 bcdDevice, rev_maj, rev_min;
 
-	if ((drv_data->quirks & LG_RDESC) && *rsize >= 90 && rdesc[83] == 0x26 &&
+	if ((drv_data->quirks & LG_RDESC) && *rsize >= 91 && rdesc[83] == 0x26 &&
 			rdesc[84] == 0x8c && rdesc[85] == 0x02) {
 		hid_info(hdev,
 			 "fixing up Logitech keyboard report descriptor\n");
 		rdesc[84] = rdesc[89] = 0x4d;
 		rdesc[85] = rdesc[90] = 0x10;
 	}
-	if ((drv_data->quirks & LG_RDESC_REL_ABS) && *rsize >= 50 &&
+	if ((drv_data->quirks & LG_RDESC_REL_ABS) && *rsize >= 51 &&
 			rdesc[32] == 0x81 && rdesc[33] == 0x06 &&
 			rdesc[49] == 0x81 && rdesc[50] == 0x06) {
 		hid_info(hdev,
diff --git a/drivers/hid/hid-monterey.c b/drivers/hid/hid-monterey.c
index 9e14c00eb1b6..25daf28b26bd 100644
--- a/drivers/hid/hid-monterey.c
+++ b/drivers/hid/hid-monterey.c
@@ -24,7 +24,7 @@
 static __u8 *mr_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize)
 {
-	if (*rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) {
+	if (*rsize >= 31 && rdesc[29] == 0x05 && rdesc[30] == 0x09) {
 		hid_info(hdev, "fixing up button/consumer in HID report descriptor\n");
 		rdesc[30] = 0x0c;
 	}
diff --git a/drivers/hid/hid-petalynx.c b/drivers/hid/hid-petalynx.c
index 736b2502df4f..6aca4f2554bf 100644
--- a/drivers/hid/hid-petalynx.c
+++ b/drivers/hid/hid-petalynx.c
@@ -25,7 +25,7 @@
 static __u8 *pl_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize)
 {
-	if (*rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 &&
+	if (*rsize >= 62 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 &&
 			rdesc[41] == 0x00 && rdesc[59] == 0x26 &&
 			rdesc[60] == 0xf9 && rdesc[61] == 0x00) {
 		hid_info(hdev, "fixing up Petalynx Maxter Remote report descriptor\n");
diff --git a/drivers/hid/hid-sunplus.c b/drivers/hid/hid-sunplus.c
index 87fc91e1c8de..91072fa54663 100644
--- a/drivers/hid/hid-sunplus.c
+++ b/drivers/hid/hid-sunplus.c
@@ -24,7 +24,7 @@
 static __u8 *sp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize)
 {
-	if (*rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 &&
+	if (*rsize >= 112 && rdesc[104] == 0x26 && rdesc[105] == 0x80 &&
 			rdesc[106] == 0x03) {
 		hid_info(hdev, "fixing up Sunplus Wireless Desktop report descriptor\n");
 		rdesc[105] = rdesc[110] = 0x03;

From d4be3e07222e7572df4af6c4dd91e4b569a3ce20 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sun, 17 Aug 2014 11:49:57 +0200
Subject: [PATCH 0232/1185] isofs: Fix unbounded recursion when processing
 relocated directories

commit 410dd3cf4c9b36f27ed4542ee18b1af5e68645a4 upstream.

We did not check relocated directory in any way when processing Rock
Ridge 'CL' tag. Thus a corrupted isofs image can possibly have a CL
entry pointing to another CL entry leading to possibly unbounded
recursion in kernel code and thus stack overflow or deadlocks (if there
is a loop created from CL entries).

Fix the problem by not allowing CL entry to point to a directory entry
with CL entry (such use makes no good sense anyway) and by checking
whether CL entry doesn't point to itself.

Reported-by: Chris Evans <cevans@google.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/isofs/inode.c | 15 ++++++++-------
 fs/isofs/isofs.h | 23 +++++++++++++++++++----
 fs/isofs/rock.c  | 39 ++++++++++++++++++++++++++++-----------
 3 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d3705490ff9c..10489bbd40fc 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -69,7 +69,7 @@ static void isofs_put_super(struct super_block *sb)
 	return;
 }
 
-static int isofs_read_inode(struct inode *);
+static int isofs_read_inode(struct inode *, int relocated);
 static int isofs_statfs (struct dentry *, struct kstatfs *);
 
 static struct kmem_cache *isofs_inode_cachep;
@@ -1274,7 +1274,7 @@ static int isofs_read_level3_size(struct inode *inode)
 	goto out;
 }
 
-static int isofs_read_inode(struct inode *inode)
+static int isofs_read_inode(struct inode *inode, int relocated)
 {
 	struct super_block *sb = inode->i_sb;
 	struct isofs_sb_info *sbi = ISOFS_SB(sb);
@@ -1419,7 +1419,7 @@ static int isofs_read_inode(struct inode *inode)
 	 */
 
 	if (!high_sierra) {
-		parse_rock_ridge_inode(de, inode);
+		parse_rock_ridge_inode(de, inode, relocated);
 		/* if we want uid/gid set, override the rock ridge setting */
 		if (sbi->s_uid_set)
 			inode->i_uid = sbi->s_uid;
@@ -1498,9 +1498,10 @@ static int isofs_iget5_set(struct inode *ino, void *data)
  * offset that point to the underlying meta-data for the inode.  The
  * code below is otherwise similar to the iget() code in
  * include/linux/fs.h */
-struct inode *isofs_iget(struct super_block *sb,
-			 unsigned long block,
-			 unsigned long offset)
+struct inode *__isofs_iget(struct super_block *sb,
+			   unsigned long block,
+			   unsigned long offset,
+			   int relocated)
 {
 	unsigned long hashval;
 	struct inode *inode;
@@ -1522,7 +1523,7 @@ struct inode *isofs_iget(struct super_block *sb,
 		return ERR_PTR(-ENOMEM);
 
 	if (inode->i_state & I_NEW) {
-		ret = isofs_read_inode(inode);
+		ret = isofs_read_inode(inode, relocated);
 		if (ret < 0) {
 			iget_failed(inode);
 			inode = ERR_PTR(ret);
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 99167238518d..0ac4c1f73fbd 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -107,7 +107,7 @@ extern int iso_date(char *, int);
 
 struct inode;		/* To make gcc happy */
 
-extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *);
+extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated);
 extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *);
 extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *);
 
@@ -118,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int
 extern struct buffer_head *isofs_bread(struct inode *, sector_t);
 extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long);
 
-extern struct inode *isofs_iget(struct super_block *sb,
-                                unsigned long block,
-                                unsigned long offset);
+struct inode *__isofs_iget(struct super_block *sb,
+			   unsigned long block,
+			   unsigned long offset,
+			   int relocated);
+
+static inline struct inode *isofs_iget(struct super_block *sb,
+				       unsigned long block,
+				       unsigned long offset)
+{
+	return __isofs_iget(sb, block, offset, 0);
+}
+
+static inline struct inode *isofs_iget_reloc(struct super_block *sb,
+					     unsigned long block,
+					     unsigned long offset)
+{
+	return __isofs_iget(sb, block, offset, 1);
+}
 
 /* Because the inode number is no longer relevant to finding the
  * underlying meta-data for an inode, we are free to choose a more
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c0bf42472e40..f488bbae541a 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -288,12 +288,16 @@ int get_rock_ridge_filename(struct iso_directory_record *de,
 	goto out;
 }
 
+#define RR_REGARD_XA 1
+#define RR_RELOC_DE 2
+
 static int
 parse_rock_ridge_inode_internal(struct iso_directory_record *de,
-				struct inode *inode, int regard_xa)
+				struct inode *inode, int flags)
 {
 	int symlink_len = 0;
 	int cnt, sig;
+	unsigned int reloc_block;
 	struct inode *reloc;
 	struct rock_ridge *rr;
 	int rootflag;
@@ -305,7 +309,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 
 	init_rock_state(&rs, inode);
 	setup_rock_ridge(de, inode, &rs);
-	if (regard_xa) {
+	if (flags & RR_REGARD_XA) {
 		rs.chr += 14;
 		rs.len -= 14;
 		if (rs.len < 0)
@@ -485,12 +489,22 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 					"relocated directory\n");
 			goto out;
 		case SIG('C', 'L'):
-			ISOFS_I(inode)->i_first_extent =
-			    isonum_733(rr->u.CL.location);
-			reloc =
-			    isofs_iget(inode->i_sb,
-				       ISOFS_I(inode)->i_first_extent,
-				       0);
+			if (flags & RR_RELOC_DE) {
+				printk(KERN_ERR
+				       "ISOFS: Recursive directory relocation "
+				       "is not supported\n");
+				goto eio;
+			}
+			reloc_block = isonum_733(rr->u.CL.location);
+			if (reloc_block == ISOFS_I(inode)->i_iget5_block &&
+			    ISOFS_I(inode)->i_iget5_offset == 0) {
+				printk(KERN_ERR
+				       "ISOFS: Directory relocation points to "
+				       "itself\n");
+				goto eio;
+			}
+			ISOFS_I(inode)->i_first_extent = reloc_block;
+			reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0);
 			if (IS_ERR(reloc)) {
 				ret = PTR_ERR(reloc);
 				goto out;
@@ -637,9 +651,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
 	return rpnt;
 }
 
-int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
+int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode,
+			   int relocated)
 {
-	int result = parse_rock_ridge_inode_internal(de, inode, 0);
+	int flags = relocated ? RR_RELOC_DE : 0;
+	int result = parse_rock_ridge_inode_internal(de, inode, flags);
 
 	/*
 	 * if rockridge flag was reset and we didn't look for attributes
@@ -647,7 +663,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
 	 */
 	if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1)
 	    && (ISOFS_SB(inode->i_sb)->s_rock == 2)) {
-		result = parse_rock_ridge_inode_internal(de, inode, 14);
+		result = parse_rock_ridge_inode_internal(de, inode,
+							 flags | RR_REGARD_XA);
 	}
 	return result;
 }

From e7b094f88420d840c65a7b5499e3000d2a4c00ec Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 17 Jul 2014 16:34:29 -0400
Subject: [PATCH 0233/1185] USB: OHCI: don't lose track of EDs when a
 controller dies

commit 977dcfdc60311e7aa571cabf6f39c36dde13339e upstream.

This patch fixes a bug in ohci-hcd.  When an URB is unlinked, the
corresponding Endpoint Descriptor is added to the ed_rm_list and taken
off the hardware schedule.  Once the ED is no longer visible to the
hardware, finish_unlinks() handles the URBs that were unlinked or have
completed.  If any URBs remain attached to the ED, the ED is added
back to the hardware schedule -- but only if the controller is
running.

This fails when a controller dies.  A non-empty ED does not get added
back to the hardware schedule and does not remain on the ed_rm_list;
ohci-hcd loses track of it.  The remaining URBs cannot be unlinked,
which causes the USB stack to hang.

The patch changes finish_unlinks() so that non-empty EDs remain on
the ed_rm_list if the controller isn't running.  This requires moving
some of the existing code around, to avoid modifying the ED's hardware
fields more than once.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ohci-q.c | 46 ++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c
index 37dc8373200a..1e1563da1812 100644
--- a/drivers/usb/host/ohci-q.c
+++ b/drivers/usb/host/ohci-q.c
@@ -314,8 +314,7 @@ static void periodic_unlink (struct ohci_hcd *ohci, struct ed *ed)
  *  - ED_OPER: when there's any request queued, the ED gets rescheduled
  *    immediately.  HC should be working on them.
  *
- *  - ED_IDLE:  when there's no TD queue. there's no reason for the HC
- *    to care about this ED; safe to disable the endpoint.
+ *  - ED_IDLE: when there's no TD queue or the HC isn't running.
  *
  * When finish_unlinks() runs later, after SOF interrupt, it will often
  * complete one or more URB unlinks before making that state change.
@@ -928,6 +927,10 @@ finish_unlinks (struct ohci_hcd *ohci, u16 tick)
 		int			completed, modified;
 		__hc32			*prev;
 
+		/* Is this ED already invisible to the hardware? */
+		if (ed->state == ED_IDLE)
+			goto ed_idle;
+
 		/* only take off EDs that the HC isn't using, accounting for
 		 * frame counter wraps and EDs with partially retired TDs
 		 */
@@ -957,12 +960,20 @@ finish_unlinks (struct ohci_hcd *ohci, u16 tick)
 			}
 		}
 
+		/* ED's now officially unlinked, hc doesn't see */
+		ed->state = ED_IDLE;
+		if (quirk_zfmicro(ohci) && ed->type == PIPE_INTERRUPT)
+			ohci->eds_scheduled--;
+		ed->hwHeadP &= ~cpu_to_hc32(ohci, ED_H);
+		ed->hwNextED = 0;
+		wmb();
+		ed->hwINFO &= ~cpu_to_hc32(ohci, ED_SKIP | ED_DEQUEUE);
+ed_idle:
+
 		/* reentrancy:  if we drop the schedule lock, someone might
 		 * have modified this list.  normally it's just prepending
 		 * entries (which we'd ignore), but paranoia won't hurt.
 		 */
-		*last = ed->ed_next;
-		ed->ed_next = NULL;
 		modified = 0;
 
 		/* unlink urbs as requested, but rescan the list after
@@ -1020,19 +1031,20 @@ finish_unlinks (struct ohci_hcd *ohci, u16 tick)
 		if (completed && !list_empty (&ed->td_list))
 			goto rescan_this;
 
-		/* ED's now officially unlinked, hc doesn't see */
-		ed->state = ED_IDLE;
-		if (quirk_zfmicro(ohci) && ed->type == PIPE_INTERRUPT)
-			ohci->eds_scheduled--;
-		ed->hwHeadP &= ~cpu_to_hc32(ohci, ED_H);
-		ed->hwNextED = 0;
-		wmb ();
-		ed->hwINFO &= ~cpu_to_hc32 (ohci, ED_SKIP | ED_DEQUEUE);
-
-		/* but if there's work queued, reschedule */
-		if (!list_empty (&ed->td_list)) {
-			if (ohci->rh_state == OHCI_RH_RUNNING)
-				ed_schedule (ohci, ed);
+		/*
+		 * If no TDs are queued, take ED off the ed_rm_list.
+		 * Otherwise, if the HC is running, reschedule.
+		 * If not, leave it on the list for further dequeues.
+		 */
+		if (list_empty(&ed->td_list)) {
+			*last = ed->ed_next;
+			ed->ed_next = NULL;
+		} else if (ohci->rh_state == OHCI_RH_RUNNING) {
+			*last = ed->ed_next;
+			ed->ed_next = NULL;
+			ed_schedule(ohci, ed);
+		} else {
+			last = &ed->ed_next;
 		}
 
 		if (modified)

From fe0d903cb746d5caf776e40412418fa23b6efa97 Mon Sep 17 00:00:00 2001
From: Patrick Riphagen <patrick.riphagen@xsens.com>
Date: Thu, 24 Jul 2014 09:12:52 +0200
Subject: [PATCH 0234/1185] USB: serial: ftdi_sio: Annotate the current Xsens
 PID assignments

commit 9273b8a270878906540349422ab24558b9d65716 upstream.

The converters are used in specific products. It can be useful to know
which they are exactly.

Signed-off-by: Patrick Riphagen <patrick.riphagen@xsens.com>
Signed-off-by: Frans Klaver <frans.klaver@xsens.com>
Cc: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ftdi_sio_ids.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index c4777bc6aee0..3fc789701e45 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -140,12 +140,12 @@
 /*
  * Xsens Technologies BV products (http://www.xsens.com).
  */
-#define XSENS_CONVERTER_0_PID	0xD388
-#define XSENS_CONVERTER_1_PID	0xD389
+#define XSENS_CONVERTER_0_PID	0xD388	/* Xsens USB converter */
+#define XSENS_CONVERTER_1_PID	0xD389	/* Xsens Wireless Receiver */
 #define XSENS_CONVERTER_2_PID	0xD38A
-#define XSENS_CONVERTER_3_PID	0xD38B
-#define XSENS_CONVERTER_4_PID	0xD38C
-#define XSENS_CONVERTER_5_PID	0xD38D
+#define XSENS_CONVERTER_3_PID	0xD38B	/* Xsens USB-serial converter */
+#define XSENS_CONVERTER_4_PID	0xD38C	/* Xsens Wireless Receiver */
+#define XSENS_CONVERTER_5_PID	0xD38D	/* Xsens Awinda Station */
 #define XSENS_CONVERTER_6_PID	0xD38E
 #define XSENS_CONVERTER_7_PID	0xD38F
 

From e57bd1dc6328d63bd268b91ab01f742fd6994db2 Mon Sep 17 00:00:00 2001
From: Patrick Riphagen <patrick.riphagen@xsens.com>
Date: Thu, 24 Jul 2014 09:09:50 +0200
Subject: [PATCH 0235/1185] USB: serial: ftdi_sio: Add support for new Xsens
 devices

commit 4bdcde358b4bda74e356841d351945ca3f2245dd upstream.

This adds support for new Xsens devices, using Xsens' own Vendor ID.

Signed-off-by: Patrick Riphagen <patrick.riphagen@xsens.com>
Signed-off-by: Frans Klaver <frans.klaver@xsens.com>
Cc: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ftdi_sio.c     | 2 ++
 drivers/usb/serial/ftdi_sio_ids.h | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 9e75e3eaea4f..bc27998c3a2d 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -676,6 +676,8 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_5_PID) },
 	{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_6_PID) },
 	{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_7_PID) },
+	{ USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) },
+	{ USB_DEVICE(XSENS_VID, XSENS_MTW_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_OMNI1509) },
 	{ USB_DEVICE(MOBILITY_VID, MOBILITY_USB_SERIAL_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ACTIVE_ROBOTS_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 3fc789701e45..1e58d90a0b6c 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -140,6 +140,9 @@
 /*
  * Xsens Technologies BV products (http://www.xsens.com).
  */
+#define XSENS_VID		0x2639
+#define XSENS_CONVERTER_PID	0xD00D	/* Xsens USB-serial converter */
+#define XSENS_MTW_PID		0x0200	/* Xsens MTw */
 #define XSENS_CONVERTER_0_PID	0xD388	/* Xsens USB converter */
 #define XSENS_CONVERTER_1_PID	0xD389	/* Xsens Wireless Receiver */
 #define XSENS_CONVERTER_2_PID	0xD38A

From f788fb41375b8b174551289bfef17680ecf2416b Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@intel.com>
Date: Wed, 2 Jul 2014 01:58:18 -0700
Subject: [PATCH 0236/1185] USB: ehci-pci: USB host controller support for
 Intel Quark X1000

commit 6e693739e9b603b3ca9ce0d4f4178f0633458465 upstream.

The EHCI packet buffer in/out threshold is programmable for Intel Quark X1000
USB host controller, and the default value is 0x20 dwords. The in/out threshold
can be programmed to 0x80 dwords (512 Bytes) to maximize the perfomrance,
but only when isochronous/interrupt transactions are not initiated by the USB
host controller. This patch is to reconfigure the packet buffer in/out
threshold as maximal as possible to maximize the performance, and 0x7F dwords
(508 Bytes) should be used because the USB host controller initiates
isochronous/interrupt transactions.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@intel.com>
Signed-off-by: Alvin (Weike) Chen <alvin.chen@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Reviewed-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-pci.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index 8fe401c7d152..fe131565d090 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -35,6 +35,21 @@ static const char hcd_name[] = "ehci-pci";
 #define PCI_DEVICE_ID_INTEL_CE4100_USB	0x2e70
 
 /*-------------------------------------------------------------------------*/
+#define PCI_DEVICE_ID_INTEL_QUARK_X1000_SOC		0x0939
+static inline bool is_intel_quark_x1000(struct pci_dev *pdev)
+{
+	return pdev->vendor == PCI_VENDOR_ID_INTEL &&
+		pdev->device == PCI_DEVICE_ID_INTEL_QUARK_X1000_SOC;
+}
+
+/*
+ * 0x84 is the offset of in/out threshold register,
+ * and it is the same offset as the register of 'hostpc'.
+ */
+#define	intel_quark_x1000_insnreg01	hostpc
+
+/* Maximum usable threshold value is 0x7f dwords for both IN and OUT */
+#define INTEL_QUARK_X1000_EHCI_MAX_THRESHOLD	0x007f007f
 
 /* called after powerup, by probe or system-pm "wakeup" */
 static int ehci_pci_reinit(struct ehci_hcd *ehci, struct pci_dev *pdev)
@@ -50,6 +65,16 @@ static int ehci_pci_reinit(struct ehci_hcd *ehci, struct pci_dev *pdev)
 	if (!retval)
 		ehci_dbg(ehci, "MWI active\n");
 
+	/* Reset the threshold limit */
+	if (is_intel_quark_x1000(pdev)) {
+		/*
+		 * For the Intel QUARK X1000, raise the I/O threshold to the
+		 * maximum usable value in order to improve performance.
+		 */
+		ehci_writel(ehci, INTEL_QUARK_X1000_EHCI_MAX_THRESHOLD,
+			ehci->regs->intel_quark_x1000_insnreg01);
+	}
+
 	return 0;
 }
 

From eee49a52d540423742ee28afb95a3610c7e954d1 Mon Sep 17 00:00:00 2001
From: Pratyush Anand <pratyush.anand@st.com>
Date: Fri, 18 Jul 2014 12:37:10 +0530
Subject: [PATCH 0237/1185] USB: Fix persist resume of some SS USB devices

commit a40178b2fa6ad87670fb1e5fa4024db00c149629 upstream.

Problem Summary: Problem has been observed generally with PM states
where VBUS goes off during suspend. There are some SS USB devices which
take longer time for link training compared to many others.  Such
devices fail to reconnect with same old address which was associated
with it before suspend.

When system resumes, at some point of time (dpm_run_callback->
usb_dev_resume->usb_resume->usb_resume_both->usb_resume_device->
usb_port_resume) SW reads hub status. If device is present,
then it finishes port resume and re-enumerates device with same
address. If device is not present then, SW thinks that device was
removed during suspend and therefore does logical disconnection
and removes all the resource allocated for this device.

Now, if I put sufficient delay just before root hub status read in
usb_resume_device then, SW sees always that device is present. In normal
course(without any delay) SW sees that no device is present and then SW
removes all resource associated with the device at this port.  In the
latter case, after sometime, device says that hey I am here, now host
enumerates it, but with new address.

Problem had been reproduced when I connect verbatim USB3.0 hard disc
with my STiH407 XHCI host running with 3.10 kernel.

I see that similar problem has been reported here.
https://bugzilla.kernel.org/show_bug.cgi?id=53211
Reading above it seems that bug was not in 3.6.6 and was present in 3.8
and again it was not present for some in 3.12.6, while it was present
for few others. I tested with 3.13-FC19 running at i686 desktop, problem
was still there. However, I was failed to reproduce it with 3.16-RC4
running at same i686 machine. I would say it is just a random
observation. Problem for few devices is always there, as I am unable to
find a proper fix for the issue.

So, now question is what should be the amount of delay so that host is
always able to recognize suspended device after resume.

XHCI specs 4.19.4 says that when Link training is successful, port sets
CSC bit to 1. So if SW reads port status before successful link
training, then it will not find device to be present.  USB Analyzer log
with such buggy devices show that in some cases device switch on the
RX termination after long delay of host enabling the VBUS. In few other
cases it has been seen that device fails to negotiate link training in
first attempt. It has been reported till now that few devices take as
long as 2000 ms to train the link after host enabling its VBUS and
RX termination. This patch implements a 2000 ms timeout for CSC bit to set
ie for link training. If in a case link trains before timeout, loop will
exit earlier.

This patch implements above delay, but only for SS device and when
persist is enabled.

So, for the good device overhead is almost none. While for the bad
devices penalty could be the time which it take for link training.
But, If a device was connected before suspend, and was removed
while system was asleep, then the penalty would be the timeout ie
2000 ms.

Results:

Verbatim USB SS hard disk connected with STiH407 USB host running 3.10
Kernel resumes in 461 msecs without this patch, but hard disk is
assigned a new device address. Same system resumes in 790 msecs with
this patch, but with old device address.

Signed-off-by: Pratyush Anand <pratyush.anand@st.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 63c217053668..7e90d146d7dd 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3165,6 +3165,43 @@ static int finish_port_resume(struct usb_device *udev)
 	return status;
 }
 
+/*
+ * There are some SS USB devices which take longer time for link training.
+ * XHCI specs 4.19.4 says that when Link training is successful, port
+ * sets CSC bit to 1. So if SW reads port status before successful link
+ * training, then it will not find device to be present.
+ * USB Analyzer log with such buggy devices show that in some cases
+ * device switch on the RX termination after long delay of host enabling
+ * the VBUS. In few other cases it has been seen that device fails to
+ * negotiate link training in first attempt. It has been
+ * reported till now that few devices take as long as 2000 ms to train
+ * the link after host enabling its VBUS and termination. Following
+ * routine implements a 2000 ms timeout for link training. If in a case
+ * link trains before timeout, loop will exit earlier.
+ *
+ * FIXME: If a device was connected before suspend, but was removed
+ * while system was asleep, then the loop in the following routine will
+ * only exit at timeout.
+ *
+ * This routine should only be called when persist is enabled for a SS
+ * device.
+ */
+static int wait_for_ss_port_enable(struct usb_device *udev,
+		struct usb_hub *hub, int *port1,
+		u16 *portchange, u16 *portstatus)
+{
+	int status = 0, delay_ms = 0;
+
+	while (delay_ms < 2000) {
+		if (status || *portstatus & USB_PORT_STAT_CONNECTION)
+			break;
+		msleep(20);
+		delay_ms += 20;
+		status = hub_port_status(hub, *port1, portstatus, portchange);
+	}
+	return status;
+}
+
 /*
  * usb_port_resume - re-activate a suspended usb device's upstream port
  * @udev: device to re-activate, not a root hub
@@ -3267,6 +3304,10 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 
 	clear_bit(port1, hub->busy_bits);
 
+	if (udev->persist_enabled && hub_is_superspeed(hub->hdev))
+		status = wait_for_ss_port_enable(udev, hub, &port1, &portchange,
+				&portstatus);
+
 	status = check_port_resume_type(udev,
 			hub, port1, status, portchange, portstatus);
 	if (status == 0)

From 159902f39c3b38b626a50ce46874b13ea8d68aa9 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Wed, 30 Jul 2014 11:11:48 +0800
Subject: [PATCH 0238/1185] ALSA: hda - fix an external mic jack problem on a
 HP machine

commit 7440850c20b69658f322119d20a94dc914127cc7 upstream.

ON the machine, two pin complex (0xb and 0xe) are both routed to
the same external right-side mic jack, this makes the jack can't work.

To fix this problem, set the 0xe to "not connected".

BugLink: https://bugs.launchpad.net/bugs/1350148
Tested-by: Franz Hsieh <franz.hsieh@canonical.com>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_sigmatel.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 0c521b7752b2..132b4c802a47 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -84,6 +84,7 @@ enum {
 	STAC_DELL_EQ,
 	STAC_ALIENWARE_M17X,
 	STAC_92HD89XX_HP_FRONT_JACK,
+	STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK,
 	STAC_92HD73XX_MODELS
 };
 
@@ -1783,6 +1784,11 @@ static const struct hda_pintbl stac92hd89xx_hp_front_jack_pin_configs[] = {
 	{}
 };
 
+static const struct hda_pintbl stac92hd89xx_hp_z1_g2_right_mic_jack_pin_configs[] = {
+	{ 0x0e, 0x400000f0 },
+	{}
+};
+
 static void stac92hd73xx_fixup_ref(struct hda_codec *codec,
 				   const struct hda_fixup *fix, int action)
 {
@@ -1905,6 +1911,10 @@ static const struct hda_fixup stac92hd73xx_fixups[] = {
 	[STAC_92HD89XX_HP_FRONT_JACK] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = stac92hd89xx_hp_front_jack_pin_configs,
+	},
+	[STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = stac92hd89xx_hp_z1_g2_right_mic_jack_pin_configs,
 	}
 };
 
@@ -1965,6 +1975,8 @@ static const struct snd_pci_quirk stac92hd73xx_fixup_tbl[] = {
 		      "Alienware M17x", STAC_ALIENWARE_M17X),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0490,
 		      "Alienware M17x R3", STAC_DELL_EQ),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1927,
+				"HP Z1 G2", STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2b17,
 				"unknown HP", STAC_92HD89XX_HP_FRONT_JACK),
 	{} /* terminator */

From 07a0ed1d0e6b32747e7f4723966c54e2652e3c74 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 4 Aug 2014 15:17:55 +0200
Subject: [PATCH 0239/1185] ALSA: virtuoso: add Xonar Essence STX II support

commit f42bb22243d2ae264d721b055f836059fe35321f upstream.

Just add the PCI ID for the STX II.  It appears to work the same as the
STX, except for the addition of the not-yet-supported daughterboard.

Tested-by: Mario <fugazzi99@gmail.com>
Tested-by: corubba <corubba@gmx.de>
Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/sound/alsa/ALSA-Configuration.txt |  4 ++--
 sound/pci/Kconfig                               |  4 ++--
 sound/pci/oxygen/virtuoso.c                     |  1 +
 sound/pci/oxygen/xonar_pcm179x.c                | 12 ++++++++++--
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 95731a08f257..8f08b2a71791 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -2026,8 +2026,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
   -------------------
 
     Module for sound cards based on the Asus AV66/AV100/AV200 chips,
-    i.e., Xonar D1, DX, D2, D2X, DS, Essence ST (Deluxe), Essence STX,
-    HDAV1.3 (Deluxe), and HDAV1.3 Slim.
+    i.e., Xonar D1, DX, D2, D2X, DS, DSX, Essence ST (Deluxe),
+    Essence STX (II), HDAV1.3 (Deluxe), and HDAV1.3 Slim.
 
     This module supports autoprobe and multiple cards.
 
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index daac7c7ebe9e..3397ddbdfc0c 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -856,8 +856,8 @@ config SND_VIRTUOSO
 	select SND_JACK if INPUT=y || INPUT=SND
 	help
 	  Say Y here to include support for sound cards based on the
-	  Asus AV66/AV100/AV200 chips, i.e., Xonar D1, DX, D2, D2X, DS,
-	  Essence ST (Deluxe), and Essence STX.
+	  Asus AV66/AV100/AV200 chips, i.e., Xonar D1, DX, D2, D2X, DS, DSX,
+	  Essence ST (Deluxe), and Essence STX (II).
 	  Support for the HDAV1.3 (Deluxe) and HDAV1.3 Slim is experimental;
 	  for the Xense, missing.
 
diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c
index 64b9fda5f04a..dbbbacfd535e 100644
--- a/sound/pci/oxygen/virtuoso.c
+++ b/sound/pci/oxygen/virtuoso.c
@@ -53,6 +53,7 @@ static DEFINE_PCI_DEVICE_TABLE(xonar_ids) = {
 	{ OXYGEN_PCI_SUBID(0x1043, 0x835e) },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x838e) },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x8522) },
+	{ OXYGEN_PCI_SUBID(0x1043, 0x85f4) },
 	{ OXYGEN_PCI_SUBID_BROKEN_EEPROM },
 	{ }
 };
diff --git a/sound/pci/oxygen/xonar_pcm179x.c b/sound/pci/oxygen/xonar_pcm179x.c
index c8c7f2c9b355..e02605931669 100644
--- a/sound/pci/oxygen/xonar_pcm179x.c
+++ b/sound/pci/oxygen/xonar_pcm179x.c
@@ -100,8 +100,8 @@
  */
 
 /*
- * Xonar Essence ST (Deluxe)/STX
- * -----------------------------
+ * Xonar Essence ST (Deluxe)/STX (II)
+ * ----------------------------------
  *
  * CMI8788:
  *
@@ -1138,6 +1138,14 @@ int get_xonar_pcm179x_model(struct oxygen *chip,
 		chip->model.resume = xonar_stx_resume;
 		chip->model.set_dac_params = set_pcm1796_params;
 		break;
+	case 0x85f4:
+		chip->model = model_xonar_st;
+		/* TODO: daughterboard support */
+		chip->model.shortname = "Xonar STX II";
+		chip->model.init = xonar_stx_init;
+		chip->model.resume = xonar_stx_resume;
+		chip->model.set_dac_params = set_pcm1796_params;
+		break;
 	default:
 		return -EINVAL;
 	}

From 65d6bdd5e4438a64d7806c22ba85bfa5f3a03f89 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 10 Aug 2014 13:30:08 +0200
Subject: [PATCH 0240/1185] ALSA: hda/ca0132 - Don't try loading firmware at
 resume when already failed

commit e24aa0a4c5ac92a171d9dd74a8d3dbf652990d36 upstream.

CA0132 driver tries to reload the firmware at resume.  Usually this
works since the firmware loader core caches the firmware contents by
itself.  However, if the driver failed to load the firmwares
(e.g. missing files), reloading the firmware at resume goes through
the actual file loading code path, and triggers a kernel WARNING like:

 WARNING: CPU: 10 PID:11371 at drivers/base/firmware_class.c:1105 _request_firmware+0x9ab/0x9d0()

For avoiding this situation, this patch makes CA0132 skipping the f/w
loading at resume when it failed at probe time.

Reported-and-tested-by: Janek Kozicki <cosurgi@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_ca0132.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 01fefbe29e4a..4126f3d9edb6 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -4379,6 +4379,9 @@ static void ca0132_download_dsp(struct hda_codec *codec)
 	return; /* NOP */
 #endif
 
+	if (spec->dsp_state == DSP_DOWNLOAD_FAILED)
+		return; /* don't retry failures */
+
 	chipio_enable_clocks(codec);
 	spec->dsp_state = DSP_DOWNLOADING;
 	if (!ca0132_download_dsp_images(codec))
@@ -4555,7 +4558,8 @@ static int ca0132_init(struct hda_codec *codec)
 	struct auto_pin_cfg *cfg = &spec->autocfg;
 	int i;
 
-	spec->dsp_state = DSP_DOWNLOAD_INIT;
+	if (spec->dsp_state != DSP_DOWNLOAD_FAILED)
+		spec->dsp_state = DSP_DOWNLOAD_INIT;
 	spec->curr_chip_addx = INVALID_CHIP_ADDRESS;
 
 	snd_hda_power_up(codec);
@@ -4666,6 +4670,7 @@ static int patch_ca0132(struct hda_codec *codec)
 	codec->spec = spec;
 	spec->codec = codec;
 
+	spec->dsp_state = DSP_DOWNLOAD_INIT;
 	spec->num_mixers = 1;
 	spec->mixers[0] = ca0132_mixer;
 

From 8666dec8958fc91913fe398178d26f33c1ea5745 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 15 Aug 2014 17:35:00 +0200
Subject: [PATCH 0241/1185] ALSA: hda/realtek - Avoid setting wrong COEF on
 ALC269 & co

commit f3ee07d8b6e061bf34a7167c3f564e8da4360a99 upstream.

ALC269 & co have many vendor-specific setups with COEF verbs.
However, some verbs seem specific to some codec versions and they
result in the codec stalling.  Typically, such a case can be avoided
by checking the return value from reading a COEF.  If the return value
is -1, it implies that the COEF is invalid, thus it shouldn't be
written.

This patch adds the invalid COEF checks in appropriate places
accessing ALC269 and its variants.  The patch actually fixes the
resume problem on Acer AO725 laptop.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=52181
Tested-by: Francesco Muzio <muziofg@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0b85e857f1c7..593090e5bd77 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -175,6 +175,8 @@ static void alc_fix_pll(struct hda_codec *codec)
 			    spec->pll_coef_idx);
 	val = snd_hda_codec_read(codec, spec->pll_nid, 0,
 				 AC_VERB_GET_PROC_COEF, 0);
+	if (val == -1)
+		return;
 	snd_hda_codec_write(codec, spec->pll_nid, 0, AC_VERB_SET_COEF_INDEX,
 			    spec->pll_coef_idx);
 	snd_hda_codec_write(codec, spec->pll_nid, 0, AC_VERB_SET_PROC_COEF,
@@ -2677,6 +2679,8 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
 static void alc269vb_toggle_power_output(struct hda_codec *codec, int power_up)
 {
 	int val = alc_read_coef_idx(codec, 0x04);
+	if (val == -1)
+		return;
 	if (power_up)
 		val |= 1 << 11;
 	else
@@ -3822,27 +3826,30 @@ static void alc269_fill_coef(struct hda_codec *codec)
 	if ((alc_get_coef0(codec) & 0x00ff) == 0x017) {
 		val = alc_read_coef_idx(codec, 0x04);
 		/* Power up output pin */
-		alc_write_coef_idx(codec, 0x04, val | (1<<11));
+		if (val != -1)
+			alc_write_coef_idx(codec, 0x04, val | (1<<11));
 	}
 
 	if ((alc_get_coef0(codec) & 0x00ff) == 0x018) {
 		val = alc_read_coef_idx(codec, 0xd);
-		if ((val & 0x0c00) >> 10 != 0x1) {
+		if (val != -1 && (val & 0x0c00) >> 10 != 0x1) {
 			/* Capless ramp up clock control */
 			alc_write_coef_idx(codec, 0xd, val | (1<<10));
 		}
 		val = alc_read_coef_idx(codec, 0x17);
-		if ((val & 0x01c0) >> 6 != 0x4) {
+		if (val != -1 && (val & 0x01c0) >> 6 != 0x4) {
 			/* Class D power on reset */
 			alc_write_coef_idx(codec, 0x17, val | (1<<7));
 		}
 	}
 
 	val = alc_read_coef_idx(codec, 0xd); /* Class D */
-	alc_write_coef_idx(codec, 0xd, val | (1<<14));
+	if (val != -1)
+		alc_write_coef_idx(codec, 0xd, val | (1<<14));
 
 	val = alc_read_coef_idx(codec, 0x4); /* HP */
-	alc_write_coef_idx(codec, 0x4, val | (1<<11));
+	if (val != -1)
+		alc_write_coef_idx(codec, 0x4, val | (1<<11));
 }
 
 /*

From 2cffa7238a408b7ff5ce9a4352485ff035fb7b19 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Thu, 17 Jul 2014 10:53:35 +0300
Subject: [PATCH 0242/1185] mei: start disconnect request timer consistently

commit 22b987a325701223f9a37db700c6eb20b9924c6f upstream.

Link must be reset in case the fw doesn't
respond to client disconnect request.
We did charge the timer only in irq path
from mei_cl_irq_close and not in mei_cl_disconnect

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/client.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 0bb2aa2c6fb0..da2385bb3536 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -405,6 +405,7 @@ int mei_cl_disconnect(struct mei_cl *cl)
 			dev_err(&dev->pdev->dev, "failed to disconnect.\n");
 			goto free;
 		}
+		cl->timer_count = MEI_CONNECT_TIMEOUT;
 		mdelay(10); /* Wait for hardware disconnection ready */
 		list_add_tail(&cb->list, &dev->ctrl_rd_list.list);
 	} else {

From 324b23e38db599cd1eb610fb92b0142d5c0be4a8 Mon Sep 17 00:00:00 2001
From: Jeremy Vial <jvial@adeneo-embedded.com>
Date: Thu, 31 Jul 2014 15:10:33 +0200
Subject: [PATCH 0243/1185] ARM: OMAP3: Fix choice of omap3_restore_es function
 in OMAP34XX rev3.1.2 case.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 9b5f7428f8b16bd8980213f2b70baf1dd0b9e36c upstream.

According to the comment “restore_es3: applies to 34xx >= ES3.0" in
"arch/arm/mach-omap2/sleep34xx.S”, omap3_restore_es3 should be used
if the revision of an OMAP34xx is ES3.1.2.

Signed-off-by: Jeremy Vial <jvial@adeneo-embedded.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/control.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c
index 2adb2683f074..6124da1a07d4 100644
--- a/arch/arm/mach-omap2/control.c
+++ b/arch/arm/mach-omap2/control.c
@@ -323,7 +323,8 @@ void omap3_save_scratchpad_contents(void)
 		scratchpad_contents.public_restore_ptr =
 			virt_to_phys(omap3_restore_3630);
 	else if (omap_rev() != OMAP3430_REV_ES3_0 &&
-					omap_rev() != OMAP3430_REV_ES3_1)
+					omap_rev() != OMAP3430_REV_ES3_1 &&
+					omap_rev() != OMAP3430_REV_ES3_1_2)
 		scratchpad_contents.public_restore_ptr =
 			virt_to_phys(omap3_restore);
 	else

From 61d2b2bea78efae53eee7d47c20a0637a1099c26 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 12 Jul 2014 10:53:41 +0100
Subject: [PATCH 0244/1185] drm: omapdrm: fix compiler errors

commit 2d31ca3ad7d5d44c8adc7f253c96ce33f3a2e931 upstream.

Regular randconfig nightly testing has detected problems with omapdrm.

omapdrm fails to build when the kernel is built to support 64-bit DMA
addresses and/or 64-bit physical addresses due to an assumption about
the width of these types.

Use %pad to print DMA addresses, rather than %x or %Zx (which is even
more wrong than %x).  Avoid passing a uint32_t pointer into a function
which expects dma_addr_t pointer.

drivers/gpu/drm/omapdrm/omap_plane.c: In function 'omap_plane_pre_apply':
drivers/gpu/drm/omapdrm/omap_plane.c:145:2: error: format '%x' expects argument of type 'unsigned int', but argument 5 has type 'dma_addr_t' [-Werror=format]
drivers/gpu/drm/omapdrm/omap_plane.c:145:2: error: format '%x' expects argument of type 'unsigned int', but argument 6 has type 'dma_addr_t' [-Werror=format]
make[5]: *** [drivers/gpu/drm/omapdrm/omap_plane.o] Error 1
drivers/gpu/drm/omapdrm/omap_gem.c: In function 'omap_gem_get_paddr':
drivers/gpu/drm/omapdrm/omap_gem.c:794:4: error: format '%x' expects argument of type 'unsigned int', but argument 3 has type 'dma_addr_t' [-Werror=format]
drivers/gpu/drm/omapdrm/omap_gem.c: In function 'omap_gem_describe':
drivers/gpu/drm/omapdrm/omap_gem.c:991:4: error: format '%Zx' expects argument of type 'size_t', but argument 7 has type 'dma_addr_t' [-Werror=format]
drivers/gpu/drm/omapdrm/omap_gem.c: In function 'omap_gem_init':
drivers/gpu/drm/omapdrm/omap_gem.c:1470:4: error: format '%x' expects argument of type 'unsigned int', but argument 7 has type 'dma_addr_t' [-Werror=format]
make[5]: *** [drivers/gpu/drm/omapdrm/omap_gem.o] Error 1
drivers/gpu/drm/omapdrm/omap_dmm_tiler.c: In function 'dmm_txn_append':
drivers/gpu/drm/omapdrm/omap_dmm_tiler.c:226:2: error: passing argument 3 of 'alloc_dma' from incompatible pointer type [-Werror]
make[5]: *** [drivers/gpu/drm/omapdrm/omap_dmm_tiler.o] Error 1
make[5]: Target `__build' not remade because of errors.
make[4]: *** [drivers/gpu/drm/omapdrm] Error 2

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/omapdrm/omap_dmm_tiler.c |  6 ++++--
 drivers/gpu/drm/omapdrm/omap_gem.c       | 10 +++++-----
 drivers/gpu/drm/omapdrm/omap_plane.c     |  4 ++--
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
index 9b794c933c81..b5df614660a8 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
@@ -199,7 +199,7 @@ static struct dmm_txn *dmm_txn_init(struct dmm *dmm, struct tcm *tcm)
 static void dmm_txn_append(struct dmm_txn *txn, struct pat_area *area,
 		struct page **pages, uint32_t npages, uint32_t roll)
 {
-	dma_addr_t pat_pa = 0;
+	dma_addr_t pat_pa = 0, data_pa = 0;
 	uint32_t *data;
 	struct pat *pat;
 	struct refill_engine *engine = txn->engine_handle;
@@ -223,7 +223,9 @@ static void dmm_txn_append(struct dmm_txn *txn, struct pat_area *area,
 			.lut_id = engine->tcm->lut_id,
 		};
 
-	data = alloc_dma(txn, 4*i, &pat->data_pa);
+	data = alloc_dma(txn, 4*i, &data_pa);
+	/* FIXME: what if data_pa is more than 32-bit ? */
+	pat->data_pa = data_pa;
 
 	while (i--) {
 		int n = i + roll;
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index ebbdf4132e9c..2272c66f1842 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -806,7 +806,7 @@ int omap_gem_get_paddr(struct drm_gem_object *obj,
 			omap_obj->paddr = tiler_ssptr(block);
 			omap_obj->block = block;
 
-			DBG("got paddr: %08x", omap_obj->paddr);
+			DBG("got paddr: %pad", &omap_obj->paddr);
 		}
 
 		omap_obj->paddr_cnt++;
@@ -1004,9 +1004,9 @@ void omap_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
 	if (obj->map_list.map)
 		off = (uint64_t)obj->map_list.hash.key;
 
-	seq_printf(m, "%08x: %2d (%2d) %08llx %08Zx (%2d) %p %4d",
+	seq_printf(m, "%08x: %2d (%2d) %08llx %pad (%2d) %p %4d",
 			omap_obj->flags, obj->name, obj->refcount.refcount.counter,
-			off, omap_obj->paddr, omap_obj->paddr_cnt,
+			off, &omap_obj->paddr, omap_obj->paddr_cnt,
 			omap_obj->vaddr, omap_obj->roll);
 
 	if (omap_obj->flags & OMAP_BO_TILED) {
@@ -1489,8 +1489,8 @@ void omap_gem_init(struct drm_device *dev)
 			entry->paddr = tiler_ssptr(block);
 			entry->block = block;
 
-			DBG("%d:%d: %dx%d: paddr=%08x stride=%d", i, j, w, h,
-					entry->paddr,
+			DBG("%d:%d: %dx%d: paddr=%pad stride=%d", i, j, w, h,
+					&entry->paddr,
 					usergart[i].stride_pfn << PAGE_SHIFT);
 		}
 	}
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c
index 8d225d7ff4e3..6d01c2ad8428 100644
--- a/drivers/gpu/drm/omapdrm/omap_plane.c
+++ b/drivers/gpu/drm/omapdrm/omap_plane.c
@@ -146,8 +146,8 @@ static void omap_plane_pre_apply(struct omap_drm_apply *apply)
 	DBG("%dx%d -> %dx%d (%d)", info->width, info->height,
 			info->out_width, info->out_height,
 			info->screen_width);
-	DBG("%d,%d %08x %08x", info->pos_x, info->pos_y,
-			info->paddr, info->p_uv_addr);
+	DBG("%d,%d %pad %pad", info->pos_x, info->pos_y,
+			&info->paddr, &info->p_uv_addr);
 
 	/* TODO: */
 	ilace = false;

From 5fafb69d9854a1b38ff0fe1b0058544b94702871 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 31 Jul 2014 22:27:04 +0800
Subject: [PATCH 0245/1185] hwmon: (sis5595) Prevent overflow problem when
 writing large limits

commit cc336546ddca8c22de83720632431c16a5f9fe9a upstream.

On platforms with sizeof(int) < sizeof(long), writing a temperature
limit larger than MAXINT will result in unpredictable limit values
written to the chip. Avoid auto-conversion from long to int to fix
the problem.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/sis5595.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/sis5595.c b/drivers/hwmon/sis5595.c
index 72a889702f0d..9ec7d2e2542c 100644
--- a/drivers/hwmon/sis5595.c
+++ b/drivers/hwmon/sis5595.c
@@ -159,7 +159,7 @@ static inline int TEMP_FROM_REG(s8 val)
 {
 	return val * 830 + 52120;
 }
-static inline s8 TEMP_TO_REG(int val)
+static inline s8 TEMP_TO_REG(long val)
 {
 	int nval = clamp_val(val, -54120, 157530) ;
 	return nval < 0 ? (nval - 5212 - 415) / 830 : (nval - 5212 + 415) / 830;

From 070d6526cc0b0402c44868b31957d39b5d40cdd2 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 29 Jul 2014 20:48:59 -0700
Subject: [PATCH 0246/1185] hwmon: (lm78) Fix overflow problems seen when
 writing large temperature limits

commit 1074d683a51f1aded3562add9ef313e75d557327 upstream.

On platforms with sizeof(int) < sizeof(long), writing a temperature
limit larger than MAXINT will result in unpredictable limit values
written to the chip. Avoid auto-conversion from long to int to fix
the problem.

Cc: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/lm78.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c
index a2f3b4a365e4..b879427e9a46 100644
--- a/drivers/hwmon/lm78.c
+++ b/drivers/hwmon/lm78.c
@@ -108,7 +108,7 @@ static inline int FAN_FROM_REG(u8 val, int div)
  * TEMP: mC (-128C to +127C)
  * REG: 1C/bit, two's complement
  */
-static inline s8 TEMP_TO_REG(int val)
+static inline s8 TEMP_TO_REG(long val)
 {
 	int nval = clamp_val(val, -128000, 127000) ;
 	return nval < 0 ? (nval - 500) / 1000 : (nval + 500) / 1000;

From 6dbbe154751f97308e0cd029f0b15f7685897ecf Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 2 Aug 2014 13:36:38 +0800
Subject: [PATCH 0247/1185] hwmon: (gpio-fan) Prevent overflow problem when
 writing large limits

commit 2565fb05d1e9fc0831f7b1c083bcfcb1cba1f020 upstream.

On platforms with sizeof(int) < sizeof(unsigned long), writing a rpm value
larger than MAXINT will result in unpredictable limit values written to the
chip. Avoid auto-conversion from unsigned long to int to fix the problem.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/gpio-fan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c
index 3104149795c5..ce1d82762ba6 100644
--- a/drivers/hwmon/gpio-fan.c
+++ b/drivers/hwmon/gpio-fan.c
@@ -172,7 +172,7 @@ static int get_fan_speed_index(struct gpio_fan_data *fan_data)
 	return -EINVAL;
 }
 
-static int rpm_to_speed_index(struct gpio_fan_data *fan_data, int rpm)
+static int rpm_to_speed_index(struct gpio_fan_data *fan_data, unsigned long rpm)
 {
 	struct gpio_fan_speed *speed = fan_data->speed;
 	int i;

From 06f770aa658cef847c8022bdff436885bde3f7bf Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 30 Jul 2014 11:13:52 +0800
Subject: [PATCH 0248/1185] hwmon: (ads1015) Fix off-by-one for valid channel
 index checking

commit 56de1377ad92f72ee4e5cb0faf7a9b6048fdf0bf upstream.

Current code uses channel as array index, so the valid channel value is
0 .. ADS1015_CHANNELS - 1.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/ads1015.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c
index 2798246ad814..c581602e1e07 100644
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c
@@ -184,7 +184,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
 		}
 
 		channel = be32_to_cpup(property);
-		if (channel > ADS1015_CHANNELS) {
+		if (channel >= ADS1015_CHANNELS) {
 			dev_err(&client->dev,
 				"invalid channel index %d on %s\n",
 				channel, node->full_name);

From 53f281f2b4288f7b5ebfff3d76629e96bdd07f28 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 29 Jul 2014 22:23:12 -0700
Subject: [PATCH 0249/1185] hwmon: (lm85) Fix various errors on attribute
 writes

commit 3248c3b771ddd9d31695da17ba350eb6e1b80a53 upstream.

Temperature limit register writes did not account for negative numbers.
As a result, writing -127000 resulted in -126000 written into the
temperature limit register. This problem affected temp[1-3]_min,
temp[1-3]_max, temp[1-3]_auto_temp_crit, and temp[1-3]_auto_temp_min.

When writing pwm[1-3]_freq, a long variable was auto-converted into an int
without range check. Wiring values larger than MAXINT resulted in unexpected
register values.

When writing temp[1-3]_auto_temp_max, an unsigned long variable was
auto-converted into an int without range check. Writing values larger than
MAXINT resulted in unexpected register values.

vrm is an u8, so the written value needs to be limited to [0, 255].

Cc: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/lm85.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 3894c408fda3..b9d6e7d0ba37 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -158,7 +158,7 @@ static inline u16 FAN_TO_REG(unsigned long val)
 
 /* Temperature is reported in .001 degC increments */
 #define TEMP_TO_REG(val)	\
-		clamp_val(SCALE(val, 1000, 1), -127, 127)
+		DIV_ROUND_CLOSEST(clamp_val((val), -127000, 127000), 1000)
 #define TEMPEXT_FROM_REG(val, ext)	\
 		SCALE(((val) << 4) + (ext), 16, 1000)
 #define TEMP_FROM_REG(val)	((val) * 1000)
@@ -192,7 +192,7 @@ static const int lm85_range_map[] = {
 	13300, 16000, 20000, 26600, 32000, 40000, 53300, 80000
 };
 
-static int RANGE_TO_REG(int range)
+static int RANGE_TO_REG(long range)
 {
 	int i;
 
@@ -214,7 +214,7 @@ static const int adm1027_freq_map[8] = { /* 1 Hz */
 	11, 15, 22, 29, 35, 44, 59, 88
 };
 
-static int FREQ_TO_REG(const int *map, int freq)
+static int FREQ_TO_REG(const int *map, unsigned long freq)
 {
 	int i;
 
@@ -463,6 +463,9 @@ static ssize_t store_vrm_reg(struct device *dev, struct device_attribute *attr,
 	if (err)
 		return err;
 
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 	return count;
 }

From f93978fdb50ffde874bd40d012e54ce0ad275ff3 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 5 Aug 2014 09:59:49 +0800
Subject: [PATCH 0250/1185] hwmon: (ads1015) Fix out-of-bounds array access

commit e981429557cbe10c780fab1c1a237cb832757652 upstream.

Current code uses data_rate as array index in ads1015_read_adc() and uses pga
as array index in ads1015_reg_to_mv, so we must make sure both data_rate and
pga settings are in valid value range.
Return -EINVAL if the setting is out-of-range.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/ads1015.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c
index c581602e1e07..3930a7e7a56d 100644
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c
@@ -198,6 +198,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
 				dev_err(&client->dev,
 					"invalid gain on %s\n",
 					node->full_name);
+				return -EINVAL;
 			}
 		}
 
@@ -208,6 +209,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
 				dev_err(&client->dev,
 					"invalid data_rate on %s\n",
 					node->full_name);
+				return -EINVAL;
 			}
 		}
 

From 30b72362ba1daff9c7f3b7c19a9c93ba03c4172d Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 6 Aug 2014 08:02:44 +0800
Subject: [PATCH 0251/1185] hwmon: (dme1737) Prevent overflow problem when
 writing large limits

commit d58e47d787c09fe5c61af3c6ce7d784762f29c3d upstream.

On platforms with sizeof(int) < sizeof(long), writing a temperature
limit larger than MAXINT will result in unpredictable limit values
written to the chip. Avoid auto-conversion from long to int to fix
the problem.

Voltage limits, fan minimum speed, pwm frequency, pwm ramp rate, and
other attributes have the same problem, fix them as well.

Zone temperature limits are signed, but were cached as u8, causing
unepected values to be reported for negative temperatures. Cache as
s8 to fix the problem.

vrm is an u8, so the written value needs to be limited to [0, 255].

Signed-off-by: Axel Lin <axel.lin@ingics.com>
[Guenter Roeck: Fix zone temperature cache]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/dme1737.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index 4ae3fff13f44..bea0a344fab5 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -247,8 +247,8 @@ struct dme1737_data {
 	u8  pwm_acz[3];
 	u8  pwm_freq[6];
 	u8  pwm_rr[2];
-	u8  zone_low[3];
-	u8  zone_abs[3];
+	s8  zone_low[3];
+	s8  zone_abs[3];
 	u8  zone_hyst[2];
 	u32 alarms;
 };
@@ -277,7 +277,7 @@ static inline int IN_FROM_REG(int reg, int nominal, int res)
 	return (reg * nominal + (3 << (res - 3))) / (3 << (res - 2));
 }
 
-static inline int IN_TO_REG(int val, int nominal)
+static inline int IN_TO_REG(long val, int nominal)
 {
 	return clamp_val((val * 192 + nominal / 2) / nominal, 0, 255);
 }
@@ -293,7 +293,7 @@ static inline int TEMP_FROM_REG(int reg, int res)
 	return (reg * 1000) >> (res - 8);
 }
 
-static inline int TEMP_TO_REG(int val)
+static inline int TEMP_TO_REG(long val)
 {
 	return clamp_val((val < 0 ? val - 500 : val + 500) / 1000, -128, 127);
 }
@@ -308,7 +308,7 @@ static inline int TEMP_RANGE_FROM_REG(int reg)
 	return TEMP_RANGE[(reg >> 4) & 0x0f];
 }
 
-static int TEMP_RANGE_TO_REG(int val, int reg)
+static int TEMP_RANGE_TO_REG(long val, int reg)
 {
 	int i;
 
@@ -331,7 +331,7 @@ static inline int TEMP_HYST_FROM_REG(int reg, int ix)
 	return (((ix == 1) ? reg : reg >> 4) & 0x0f) * 1000;
 }
 
-static inline int TEMP_HYST_TO_REG(int val, int ix, int reg)
+static inline int TEMP_HYST_TO_REG(long val, int ix, int reg)
 {
 	int hyst = clamp_val((val + 500) / 1000, 0, 15);
 
@@ -347,7 +347,7 @@ static inline int FAN_FROM_REG(int reg, int tpc)
 		return (reg == 0 || reg == 0xffff) ? 0 : 90000 * 60 / reg;
 }
 
-static inline int FAN_TO_REG(int val, int tpc)
+static inline int FAN_TO_REG(long val, int tpc)
 {
 	if (tpc) {
 		return clamp_val(val / tpc, 0, 0xffff);
@@ -379,7 +379,7 @@ static inline int FAN_TYPE_FROM_REG(int reg)
 	return (edge > 0) ? 1 << (edge - 1) : 0;
 }
 
-static inline int FAN_TYPE_TO_REG(int val, int reg)
+static inline int FAN_TYPE_TO_REG(long val, int reg)
 {
 	int edge = (val == 4) ? 3 : val;
 
@@ -402,7 +402,7 @@ static int FAN_MAX_FROM_REG(int reg)
 	return 1000 + i * 500;
 }
 
-static int FAN_MAX_TO_REG(int val)
+static int FAN_MAX_TO_REG(long val)
 {
 	int i;
 
@@ -460,7 +460,7 @@ static inline int PWM_ACZ_FROM_REG(int reg)
 	return acz[(reg >> 5) & 0x07];
 }
 
-static inline int PWM_ACZ_TO_REG(int val, int reg)
+static inline int PWM_ACZ_TO_REG(long val, int reg)
 {
 	int acz = (val == 4) ? 2 : val - 1;
 
@@ -476,7 +476,7 @@ static inline int PWM_FREQ_FROM_REG(int reg)
 	return PWM_FREQ[reg & 0x0f];
 }
 
-static int PWM_FREQ_TO_REG(int val, int reg)
+static int PWM_FREQ_TO_REG(long val, int reg)
 {
 	int i;
 
@@ -510,7 +510,7 @@ static inline int PWM_RR_FROM_REG(int reg, int ix)
 	return (rr & 0x08) ? PWM_RR[rr & 0x07] : 0;
 }
 
-static int PWM_RR_TO_REG(int val, int ix, int reg)
+static int PWM_RR_TO_REG(long val, int ix, int reg)
 {
 	int i;
 
@@ -528,7 +528,7 @@ static inline int PWM_RR_EN_FROM_REG(int reg, int ix)
 	return PWM_RR_FROM_REG(reg, ix) ? 1 : 0;
 }
 
-static inline int PWM_RR_EN_TO_REG(int val, int ix, int reg)
+static inline int PWM_RR_EN_TO_REG(long val, int ix, int reg)
 {
 	int en = (ix == 1) ? 0x80 : 0x08;
 
@@ -1481,13 +1481,16 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
 		       const char *buf, size_t count)
 {
 	struct dme1737_data *data = dev_get_drvdata(dev);
-	long val;
+	unsigned long val;
 	int err;
 
-	err = kstrtol(buf, 10, &val);
+	err = kstrtoul(buf, 10, &val);
 	if (err)
 		return err;
 
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 	return count;
 }

From b3a80775fa94e2896ecbd6e591bb3c5ff79003c4 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 21 Jul 2014 11:42:03 +0200
Subject: [PATCH 0252/1185] drivers/i2c/busses: use correct type for
 dma_map/unmap

commit 28772ac8711e4d7268c06e765887dd8cb6924f98 upstream.

dma_{un}map_* uses 'enum dma_data_direction' not 'enum dma_transfer_direction'.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-at91.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index 6bb839b688be..f35ea236497b 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c
@@ -211,7 +211,7 @@ static void at91_twi_write_data_dma_callback(void *data)
 	struct at91_twi_dev *dev = (struct at91_twi_dev *)data;
 
 	dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg),
-			 dev->buf_len, DMA_MEM_TO_DEV);
+			 dev->buf_len, DMA_TO_DEVICE);
 
 	at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 }
@@ -290,7 +290,7 @@ static void at91_twi_read_data_dma_callback(void *data)
 	struct at91_twi_dev *dev = (struct at91_twi_dev *)data;
 
 	dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg),
-			 dev->buf_len, DMA_DEV_TO_MEM);
+			 dev->buf_len, DMA_FROM_DEVICE);
 
 	/* The last two bytes have to be read without using dma */
 	dev->buf += dev->buf_len - 2;

From 0ec5ac16b3d5b70cd0b34249addea6fb104e4305 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 30 Jul 2014 22:17:17 -0400
Subject: [PATCH 0253/1185] ext4: fix ext4_discard_allocated_blocks() if we
 can't allocate the pa struct

commit 86f0afd463215fc3e58020493482faa4ac3a4d69 upstream.

If there is a failure while allocating the preallocation structure, a
number of blocks can end up getting marked in the in-memory buddy
bitmap, and then not getting released.  This can result in the
following corruption getting reported by the kernel:

EXT4-fs error (device sda3): ext4_mb_generate_buddy:758: group 1126,
12793 clusters in bitmap, 12729 in gd

In that case, we need to release the blocks using mb_free_blocks().

Tested: fs smoke test; also demonstrated that with injected errors,
	the file system is no longer getting corrupted

Google-Bug-Id: 16657874

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/mballoc.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 16bb6591561b..35220866fbbf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3177,8 +3177,27 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
 static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
 {
 	struct ext4_prealloc_space *pa = ac->ac_pa;
+	struct ext4_buddy e4b;
+	int err;
 
-	if (pa && pa->pa_type == MB_INODE_PA)
+	if (pa == NULL) {
+		err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
+		if (err) {
+			/*
+			 * This should never happen since we pin the
+			 * pages in the ext4_allocation_context so
+			 * ext4_mb_load_buddy() should never fail.
+			 */
+			WARN(1, "mb_load_buddy failed (%d)", err);
+			return;
+		}
+		ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+		mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
+			       ac->ac_f_ex.fe_len);
+		ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+		return;
+	}
+	if (pa->pa_type == MB_INODE_PA)
 		pa->pa_free += ac->ac_b_ex.fe_len;
 }
 

From fca04198d5b8e48a535df52ada1b6b384474cacc Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 9 Jul 2014 09:21:14 -0400
Subject: [PATCH 0254/1185] serial: core: Preserve termios c_cflag for console
 resume

commit ae84db9661cafc63d179e1d985a2c5b841ff0ac4 upstream.

When a tty is opened for the serial console, the termios c_cflag
settings are inherited from the console line settings.
However, if the tty is subsequently closed, the termios settings
are lost. This results in a garbled console if the console is later
suspended and resumed.

Preserve the termios c_cflag for the serial console when the tty
is shutdown; this reflects the most recent line settings.

Fixes: Bugzilla #69751, 'serial console does not wake from S3'
Reported-by: Valerio Vanni <valerio.vanni@inwind.it>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial_core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index f87dbfd32770..0f1cc2c8c22a 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -241,6 +241,9 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state)
 		/*
 		 * Turn off DTR and RTS early.
 		 */
+		if (uart_console(uport) && tty)
+			uport->cons->cflag = tty->termios.c_cflag;
+
 		if (!tty || (tty->termios.c_cflag & HUPCL))
 			uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
 

From 68344064b7c978712e32448d03935ad1ac6d75cd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Jun 2014 13:43:02 +0200
Subject: [PATCH 0255/1185] crypto: ux500 - make interrupt mode plausible

commit e1f8859ee265fc89bd21b4dca79e8e983a044892 upstream.

The interrupt handler in the ux500 crypto driver has an obviously
incorrect way to access the data buffer, which for a while has
caused this build warning:

../ux500/cryp/cryp_core.c: In function 'cryp_interrupt_handler':
../ux500/cryp/cryp_core.c:234:5: warning: passing argument 1 of '__fswab32' makes integer from pointer without a cast [enabled by default]
     writel_relaxed(ctx->indata,
     ^
In file included from ../include/linux/swab.h:4:0,
                 from ../include/uapi/linux/byteorder/big_endian.h:12,
                 from ../include/linux/byteorder/big_endian.h:4,
                 from ../arch/arm/include/uapi/asm/byteorder.h:19,
                 from ../include/asm-generic/bitops/le.h:5,
                 from ../arch/arm/include/asm/bitops.h:340,
                 from ../include/linux/bitops.h:33,
                 from ../include/linux/kernel.h:10,
                 from ../include/linux/clk.h:16,
                 from ../drivers/crypto/ux500/cryp/cryp_core.c:12:
../include/uapi/linux/swab.h:57:119: note: expected '__u32' but argument is of type 'const u8 *'
 static inline __attribute_const__ __u32 __fswab32(__u32 val)

There are at least two, possibly three problems here:
a) when writing into the FIFO, we copy the pointer rather than the
   actual data we want to give to the hardware
b) the data pointer is an array of 8-bit values, while the FIFO
   is 32-bit wide, so both the read and write access fail to do
   a proper type conversion
c) This seems incorrect for big-endian kernels, on which we need to
   byte-swap any register access, but not normally FIFO accesses,
   at least the DMA case doesn't do it either.

This converts the bogus loop to use the same readsl/writesl pair
that we use for the two other modes (DMA and polling). This is
more efficient and consistent, and probably correct for endianess.

The bug has existed since the driver was first merged, and was
probably never detected because nobody tried to use interrupt mode.
It might make sense to backport this fix to stable kernels, depending
on how the crypto maintainers feel about that.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: linux-crypto@vger.kernel.org
Cc: Fabio Baltieri <fabio.baltieri@linaro.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/ux500/cryp/cryp_core.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index 32f480622b97..3833bd71cc5d 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -190,7 +190,7 @@ static void add_session_id(struct cryp_ctx *ctx)
 static irqreturn_t cryp_interrupt_handler(int irq, void *param)
 {
 	struct cryp_ctx *ctx;
-	int i;
+	int count;
 	struct cryp_device_data *device_data;
 
 	if (param == NULL) {
@@ -215,12 +215,11 @@ static irqreturn_t cryp_interrupt_handler(int irq, void *param)
 	if (cryp_pending_irq_src(device_data,
 				 CRYP_IRQ_SRC_OUTPUT_FIFO)) {
 		if (ctx->outlen / ctx->blocksize > 0) {
-			for (i = 0; i < ctx->blocksize / 4; i++) {
-				*(ctx->outdata) = readl_relaxed(
-						&device_data->base->dout);
-				ctx->outdata += 4;
-				ctx->outlen -= 4;
-			}
+			count = ctx->blocksize / 4;
+
+			readsl(&device_data->base->dout, ctx->outdata, count);
+			ctx->outdata += count;
+			ctx->outlen -= count;
 
 			if (ctx->outlen == 0) {
 				cryp_disable_irq_src(device_data,
@@ -230,12 +229,12 @@ static irqreturn_t cryp_interrupt_handler(int irq, void *param)
 	} else if (cryp_pending_irq_src(device_data,
 					CRYP_IRQ_SRC_INPUT_FIFO)) {
 		if (ctx->datalen / ctx->blocksize > 0) {
-			for (i = 0 ; i < ctx->blocksize / 4; i++) {
-				writel_relaxed(ctx->indata,
-						&device_data->base->din);
-				ctx->indata += 4;
-				ctx->datalen -= 4;
-			}
+			count = ctx->blocksize / 4;
+
+			writesl(&device_data->base->din, ctx->indata, count);
+
+			ctx->indata += count;
+			ctx->datalen -= count;
 
 			if (ctx->datalen == 0)
 				cryp_disable_irq_src(device_data,

From 8277c1d67e4c273858e5a423a9d091074056cc06 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Sun, 15 Jun 2014 16:12:59 +0300
Subject: [PATCH 0256/1185] KVM: x86: Inter-privilege level ret emulation is
 not implemeneted

commit 9e8919ae793f4edfaa29694a70f71a515ae9942a upstream.

Return unhandlable error on inter-privilege level ret instruction.  This is
since the current emulation does not check the privilege level correctly when
loading the CS, and does not pop RSP/SS as needed.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/emulate.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5484d54582ca..fb3fddc322f8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2209,6 +2209,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
 	unsigned long cs;
+	int cpl = ctxt->ops->cpl(ctxt);
 
 	rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
@@ -2218,6 +2219,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
+	/* Outer-privilege level return is not implemented */
+	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
+		return X86EMUL_UNHANDLEABLE;
 	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
 	return rc;
 }

From 1933d1c5482b928599eabd7032672b35a15df066 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 30 Jul 2014 18:07:24 +0200
Subject: [PATCH 0257/1185] KVM: x86: always exit on EOIs for interrupts listed
 in the IOAPIC redir table

commit 0f6c0a740b7d3e1f3697395922d674000f83d060 upstream.

Currently, the EOI exit bitmap (used for APICv) does not include
interrupts that are masked.  However, this can cause a bug that manifests
as an interrupt storm inside the guest.  Alex Williamson reported the
bug and is the one who really debugged this; I only wrote the patch. :)

The scenario involves a multi-function PCI device with OHCI and EHCI
USB functions and an audio function, all assigned to the guest, where
both USB functions use legacy INTx interrupts.

As soon as the guest boots, interrupts for these devices turn into an
interrupt storm in the guest; the host does not see the interrupt storm.
Basically the EOI path does not work, and the guest continues to see the
interrupt over and over, even after it attempts to mask it at the APIC.
The bug is only visible with older kernels (RHEL6.5, based on 2.6.32
with not many changes in the area of APIC/IOAPIC handling).

Alex then tried forcing bit 59 (corresponding to the USB functions' IRQ)
on in the eoi_exit_bitmap and TMR, and things then work.  What happens
is that VFIO asserts IRQ11, then KVM recomputes the EOI exit bitmap.
It does not have set bit 59 because the RTE was masked, so the IOAPIC
never sees the EOI and the interrupt continues to fire in the guest.

My guess was that the guest is masking the interrupt in the redirection
table in the interrupt routine, i.e. while the interrupt is set in a
LAPIC's ISR, The simplest fix is to ignore the masking state, we would
rather have an unnecessary exit rather than a missed IRQ ACK and anyway
IOAPIC interrupts are not as performance-sensitive as for example MSIs.
Alex tested this patch and it fixed his bug.

[Thanks to Alex for his precise description of the problem
 and initial debugging effort.  A lot of the text above is
 based on emails exchanged with him.]

Reported-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/ioapic.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 39dc5bc742e0..5eaf18f90e83 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -203,10 +203,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
 	spin_lock(&ioapic->lock);
 	for (index = 0; index < IOAPIC_NUM_PINS; index++) {
 		e = &ioapic->redirtbl[index];
-		if (!e->fields.mask &&
-			(e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
-			 kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC,
-				 index) || index == RTC_GSI)) {
+		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
+		    kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
+		    index == RTC_GSI) {
 			if (kvm_apic_match_dest(vcpu, NULL, 0,
 				e->fields.dest_id, e->fields.dest_mode)) {
 				__set_bit(e->fields.vector,

From d175e30c03decba32c5373379f5bab065267a074 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Tue, 5 Aug 2014 12:42:24 +0800
Subject: [PATCH 0258/1185] KVM: nVMX: fix "acknowledge interrupt on exit" when
 APICv is in use

commit 56cc2406d68c0f09505c389e276f27a99f495cbd upstream.

After commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info
if L1 asks us to), "Acknowledge interrupt on exit" behavior can be
emulated. To do so, KVM will ask the APIC for the interrupt vector if
during a nested vmexit if VM_EXIT_ACK_INTR_ON_EXIT is set.  With APICv,
kvm_get_apic_interrupt would return -1 and give the following WARNING:

Call Trace:
 [<ffffffff81493563>] dump_stack+0x49/0x5e
 [<ffffffff8103f0eb>] warn_slowpath_common+0x7c/0x96
 [<ffffffffa059709a>] ? nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
 [<ffffffff8103f11a>] warn_slowpath_null+0x15/0x17
 [<ffffffffa059709a>] nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
 [<ffffffffa0594295>] ? nested_vmx_exit_handled+0x6a/0x39e [kvm_intel]
 [<ffffffffa0537931>] ? kvm_apic_has_interrupt+0x80/0xd5 [kvm]
 [<ffffffffa05972ec>] vmx_check_nested_events+0xc3/0xd3 [kvm_intel]
 [<ffffffffa051ebe9>] inject_pending_event+0xd0/0x16e [kvm]
 [<ffffffffa051efa0>] vcpu_enter_guest+0x319/0x704 [kvm]

To fix this, we cannot rely on the processor's virtual interrupt delivery,
because "acknowledge interrupt on exit" must only update the virtual
ISR/PPR/IRR registers (and SVI, which is just a cache of the virtual ISR)
but it should not deliver the interrupt through the IDT.  Thus, KVM has
to deliver the interrupt "by hand", similar to the treatment of EOI in
commit fc57ac2c9ca8 (KVM: lapic: sync highest ISR to hardware apic on
EOI, 2014-05-14).

The patch modifies kvm_cpu_get_interrupt to always acknowledge an
interrupt; there are only two callers, and the other is not affected
because it is never reached with kvm_apic_vid_enabled() == true.  Then it
modifies apic_set_isr and apic_clear_irr to update SVI and RVI in addition
to the registers.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Suggested-by: "Zhang, Yang Z" <yang.z.zhang@intel.com>
Tested-by: Liu, RongrongX <rongrongx.liu@intel.com>
Tested-by: Felipe Reyes <freyes@suse.com>
Fixes: 77b0f5d67ff2781f36831cba79674c3e97bd7acf
Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/irq.c   |  2 +-
 arch/x86/kvm/lapic.c | 52 +++++++++++++++++++++++++++++++++-----------
 2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 484bc874688b..3ec38cb56bd5 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 
 	vector = kvm_cpu_get_extint(v);
 
-	if (kvm_apic_vid_enabled(v->kvm) || vector != -1)
+	if (vector != -1)
 		return vector;			/* PIC */
 
 	return kvm_get_apic_interrupt(v);	/* APIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 279d093524b4..681e4e251f00 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -362,25 +362,46 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 
 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 {
-	apic->irr_pending = false;
+	struct kvm_vcpu *vcpu;
+
+	vcpu = apic->vcpu;
+
 	apic_clear_vector(vec, apic->regs + APIC_IRR);
-	if (apic_search_irr(apic) != -1)
-		apic->irr_pending = true;
+	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+		/* try to update RVI */
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+	else {
+		vec = apic_search_irr(apic);
+		apic->irr_pending = (vec != -1);
+	}
 }
 
 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 {
-	/* Note that we never get here with APIC virtualization enabled.  */
+	struct kvm_vcpu *vcpu;
+
+	if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
+		return;
+
+	vcpu = apic->vcpu;
 
-	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
-		++apic->isr_count;
-	BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
 	/*
-	 * ISR (in service register) bit is set when injecting an interrupt.
-	 * The highest vector is injected. Thus the latest bit set matches
-	 * the highest bit in ISR.
+	 * With APIC virtualization enabled, all caching is disabled
+	 * because the processor can modify ISR under the hood.  Instead
+	 * just set SVI.
 	 */
-	apic->highest_isr_cache = vec;
+	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+		kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
+	else {
+		++apic->isr_count;
+		BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
+		/*
+		 * ISR (in service register) bit is set when injecting an interrupt.
+		 * The highest vector is injected. Thus the latest bit set matches
+		 * the highest bit in ISR.
+		 */
+		apic->highest_isr_cache = vec;
+	}
 }
 
 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
@@ -1641,11 +1662,16 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 	int vector = kvm_apic_has_interrupt(vcpu);
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	/* Note that we never get here with APIC virtualization enabled.  */
-
 	if (vector == -1)
 		return -1;
 
+	/*
+	 * We get here even with APIC virtualization enabled, if doing
+	 * nested virtualization and L1 runs with the "acknowledge interrupt
+	 * on exit" mode.  Then we cannot inject the interrupt via RVI,
+	 * because the process would deliver it through the IDT.
+	 */
+
 	apic_set_isr(vector, apic);
 	apic_update_ppr(apic);
 	apic_clear_irr(vector, apic);

From d3cf5ab75bba12328cc3a3960ee1b2dff623960c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Aug 2014 16:39:48 +0200
Subject: [PATCH 0259/1185] Revert "KVM: x86: Increase the number of fixed MTRR
 regs to 10"

commit 0d234daf7e0a3290a3a20c8087eefbd6335a5bd4 upstream.

This reverts commit 682367c494869008eb89ef733f196e99415ae862,
which causes 32-bit SMP Windows 7 guests to panic.

SeaBIOS has a limit on the number of MTRRs that it can handle,
and this patch exceeded the limit.  Better revert it.
Thanks to Nadav Amit for debugging the cause.

Reported-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8b320722de7a..f7f20f7fac3c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -92,7 +92,7 @@
 #define KVM_REFILL_PAGES 25
 #define KVM_MAX_CPUID_ENTRIES 80
 #define KVM_NR_FIXED_MTRR_REGION 88
-#define KVM_NR_VAR_MTRR 10
+#define KVM_NR_VAR_MTRR 8
 
 #define ASYNC_PF_PER_VCPU 64
 

From 6e0db2f1e545f8848220e8692e4d3485c845c9cb Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 19 Aug 2014 19:14:50 +0800
Subject: [PATCH 0260/1185] kvm: iommu: fix the third parameter of
 kvm_iommu_put_pages (CVE-2014-3601)

commit 350b8bdd689cd2ab2c67c8a86a0be86cfa0751a7 upstream.

The third parameter of kvm_iommu_put_pages is wrong,
It should be 'gfn - slot->base_gfn'.

By making gfn very large, malicious guest or userspace can cause kvm to
go to this error path, and subsequently to pass a huge value as size.
Alternatively if gfn is small, then pages would be pinned but never
unpinned, causing host memory leak and local DOS.

Passing a reasonable but large value could be the most dangerous case,
because it would unpin a page that should have stayed pinned, and thus
allow the device to DMA into arbitrary memory.  However, this cannot
happen because of the condition that can trigger the error:

- out of memory (where you can't allocate even a single page)
  should not be possible for the attacker to trigger

- when exceeding the iommu's address space, guest pages after gfn
  will also exceed the iommu's address space, and inside
  kvm_iommu_put_pages() the iommu_iova_to_phys() will fail.  The
  page thus would not be unpinned at all.

Reported-by: Jack Morgenstein <jackm@mellanox.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/iommu.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index c329c8fc57f4..dec997188dfb 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -61,6 +61,14 @@ static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
 	return pfn;
 }
 
+static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
+{
+	unsigned long i;
+
+	for (i = 0; i < npages; ++i)
+		kvm_release_pfn_clean(pfn + i);
+}
+
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
 	gfn_t gfn, end_gfn;
@@ -123,6 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%llx\n", pfn);
+			kvm_unpin_pages(kvm, pfn, page_size);
 			goto unmap_pages;
 		}
 
@@ -134,7 +143,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 	return 0;
 
 unmap_pages:
-	kvm_iommu_put_pages(kvm, slot->base_gfn, gfn);
+	kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
 	return r;
 }
 
@@ -272,14 +281,6 @@ int kvm_iommu_map_guest(struct kvm *kvm)
 	return r;
 }
 
-static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
-{
-	unsigned long i;
-
-	for (i = 0; i < npages; ++i)
-		kvm_release_pfn_clean(pfn + i);
-}
-
 static void kvm_iommu_put_pages(struct kvm *kvm,
 				gfn_t base_gfn, unsigned long npages)
 {

From ccdbe7da071912c422eb71fbbb873f16fd666db8 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 23 Aug 2014 17:47:28 -0400
Subject: [PATCH 0261/1185] ext4: fix BUG_ON in mb_free_blocks()

commit c99d1e6e83b06744c75d9f5e491ed495a7086b7b upstream.

If we suffer a block allocation failure (for example due to a memory
allocation failure), it's possible that we will call
ext4_discard_allocated_blocks() before we've actually allocated any
blocks.  In that case, fe_len and fe_start in ac->ac_f_ex will still
be zero, and this will result in mb_free_blocks(inode, e4b, 0, 0)
triggering the BUG_ON on mb_free_blocks():

	BUG_ON(last >= (sb->s_blocksize << 3));

Fix this by bailing out of ext4_discard_allocated_blocks() if fs_len
is zero.

Also fix a missing ext4_mb_unload_buddy() call in
ext4_discard_allocated_blocks().

Google-Bug-Id: 16844242

Fixes: 86f0afd463215fc3e58020493482faa4ac3a4d69
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/mballoc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 35220866fbbf..162b80d527a0 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1396,6 +1396,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 	int last = first + count - 1;
 	struct super_block *sb = e4b->bd_sb;
 
+	if (WARN_ON(count == 0))
+		return;
 	BUG_ON(last >= (sb->s_blocksize << 3));
 	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
 	mb_check_buddy(e4b);
@@ -3181,6 +3183,8 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
 	int err;
 
 	if (pa == NULL) {
+		if (ac->ac_f_ex.fe_len == 0)
+			return;
 		err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
 		if (err) {
 			/*
@@ -3195,6 +3199,7 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
 		mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
 			       ac->ac_f_ex.fe_len);
 		ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+		ext4_mb_unload_buddy(&e4b);
 		return;
 	}
 	if (pa->pa_type == MB_INODE_PA)

From db58c6f5ec4c48a6073c90a25495d290229ae17c Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 21 Aug 2014 10:55:07 -0400
Subject: [PATCH 0262/1185] drm/radeon: add additional SI pci ids

commit 37dbeab788a8f23fd946c0be083e5484d6f929a1 upstream.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/drm/drm_pciids.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index ecaef57f9f6c..3a1cff56ef20 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -143,8 +143,11 @@
 	{0x1002, 0x6601, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6602, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6603, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6604, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6605, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6606, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6607, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6608, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6610, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6611, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6613, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \
@@ -256,6 +259,7 @@
 	{0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x682A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x682C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \

From 6dc6da0cc9a64f26304f2589fbd262e5198e207d Mon Sep 17 00:00:00 2001
From: Christoph Schulz <develop@kristov.de>
Date: Wed, 16 Jul 2014 10:00:57 +0200
Subject: [PATCH 0263/1185] x86: don't exclude low BIOS area when allocating
 address space for non-PCI cards

commit cbace46a9710a480cae51e4611697df5de41713e upstream.

Commit 30919b0bf356 ("x86: avoid low BIOS area when allocating address
space") moved the test for resource allocations that fall within the first
1MB of address space from the PCI-specific path to a generic path, such
that all resource allocations will avoid this area.  However, this breaks
ISA cards which need to allocate a memory region within the first 1MB.  An
example is the i82365 PCMCIA controller and derivatives like the Ricoh
RF5C296/396 which map part of the PCMCIA socket memory address space into
the first 1MB of system memory address space.  They do not work anymore as
no usable memory region exists due to this change:

  Intel ISA PCIC probe: Ricoh RF5C296/396 ISA-to-PCMCIA at port 0x3e0 ofs 0x00, 2 sockets
  host opts [0]: none
  host opts [1]: none
  ISA irqs (scanned) = 3,4,5,9,10 status change on irq 10
  pcmcia_socket pcmcia_socket1: pccard: PCMCIA card inserted into slot 1
  pcmcia_socket pcmcia_socket0: cs: IO port probe 0xc00-0xcff: excluding 0xcf8-0xcff
  pcmcia_socket pcmcia_socket0: cs: IO port probe 0xa00-0xaff: clean.
  pcmcia_socket pcmcia_socket0: cs: IO port probe 0x100-0x3ff: excluding 0x170-0x177 0x1f0-0x1f7 0x2f8-0x2ff 0x370-0x37f 0x3c0-0x3e7 0x3f0-0x3ff
  pcmcia_socket pcmcia_socket0: cs: memory probe 0x0a0000-0x0affff: excluding 0xa0000-0xaffff
  pcmcia_socket pcmcia_socket0: cs: memory probe 0x0b0000-0x0bffff: excluding 0xb0000-0xbffff
  pcmcia_socket pcmcia_socket0: cs: memory probe 0x0c0000-0x0cffff: excluding 0xc0000-0xcbfff
  pcmcia_socket pcmcia_socket0: cs: memory probe 0x0d0000-0x0dffff: clean.
  pcmcia_socket pcmcia_socket0: cs: memory probe 0x0e0000-0x0effff: clean.
  pcmcia_socket pcmcia_socket0: cs: memory probe 0x60000000-0x60ffffff: clean.
  pcmcia_socket pcmcia_socket0: cs: memory probe 0xa0000000-0xa0ffffff: clean.
  pcmcia_socket pcmcia_socket1: cs: IO port probe 0xc00-0xcff: excluding 0xcf8-0xcff
  pcmcia_socket pcmcia_socket1: cs: IO port probe 0xa00-0xaff: clean.
  pcmcia_socket pcmcia_socket1: cs: IO port probe 0x100-0x3ff: excluding 0x170-0x177 0x1f0-0x1f7 0x2f8-0x2ff 0x370-0x37f 0x3c0-0x3e7 0x3f0-0x3ff
  pcmcia_socket pcmcia_socket1: cs: memory probe 0x0a0000-0x0affff: excluding 0xa0000-0xaffff
  pcmcia_socket pcmcia_socket1: cs: memory probe 0x0b0000-0x0bffff: excluding 0xb0000-0xbffff
  pcmcia_socket pcmcia_socket1: cs: memory probe 0x0c0000-0x0cffff: excluding 0xc0000-0xcbfff
  pcmcia_socket pcmcia_socket1: cs: memory probe 0x0d0000-0x0dffff: clean.
  pcmcia_socket pcmcia_socket1: cs: memory probe 0x0e0000-0x0effff: clean.
  pcmcia_socket pcmcia_socket1: cs: memory probe 0x60000000-0x60ffffff: clean.
  pcmcia_socket pcmcia_socket1: cs: memory probe 0xa0000000-0xa0ffffff: clean.
  pcmcia_socket pcmcia_socket1: cs: memory probe 0x0cc000-0x0effff: excluding 0xe0000-0xeffff
  pcmcia_socket pcmcia_socket1: cs: unable to map card memory!

If filtering out the first 1MB is reverted, everything works as expected.

Tested-by: Robert Resch <fli4l@robert.reschpara.de>
Signed-off-by: Christoph Schulz <develop@kristov.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/resource.c | 8 +++++---
 arch/x86/pci/i386.c        | 4 ++++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index 2a26819bb6a8..80eab01c1a68 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -37,10 +37,12 @@ static void remove_e820_regions(struct resource *avail)
 
 void arch_remove_reservations(struct resource *avail)
 {
-	/* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */
+	/*
+	 * Trim out BIOS area (high 2MB) and E820 regions. We do not remove
+	 * the low 1MB unconditionally, as this area is needed for some ISA
+	 * cards requiring a memory range, e.g. the i82365 PCMCIA controller.
+	 */
 	if (avail->flags & IORESOURCE_MEM) {
-		if (avail->start < BIOS_END)
-			avail->start = BIOS_END;
 		resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
 
 		remove_e820_regions(avail);
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 94919e307f8e..2883f0840201 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -162,6 +162,10 @@ pcibios_align_resource(void *data, const struct resource *res,
 			return start;
 		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
+	} else if (res->flags & IORESOURCE_MEM) {
+		/* The low 1MB range is reserved for ISA cards */
+		if (start < BIOS_END)
+			start = BIOS_END;
 	}
 	return start;
 }

From 27cca923cd5ddcbb57262216b96ffb89ef7372e4 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Fri, 25 Jul 2014 16:30:27 -0700
Subject: [PATCH 0264/1185] x86_64/vsyscall: Fix warn_bad_vsyscall log output

commit 53b884ac3745353de220d92ef792515c3ae692f0 upstream.

This commit in Linux 3.6:

    commit c767a54ba0657e52e6edaa97cbe0b0a8bf1c1655
    Author: Joe Perches <joe@perches.com>
    Date:   Mon May 21 19:50:07 2012 -0700

        x86/debug: Add KERN_<LEVEL> to bare printks, convert printks to pr_<level>

caused warn_bad_vsyscall to output garbage in the middle of the
line.  Revert the bad part of it.

The printk in question isn't actually bare; the level is "%s".

The bug this fixes is purely cosmetic; backports are optional.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/03eac1f24110bbe496ecc12a4df467e0d88466d4.1406330947.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/vsyscall_64.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 9a907a67be8f..c52c07efe970 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -125,10 +125,10 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
 	if (!show_unhandled_signals)
 		return;
 
-	pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
-			      level, current->comm, task_pid_nr(current),
-			      message, regs->ip, regs->cs,
-			      regs->sp, regs->ax, regs->si, regs->di);
+	printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
+			   level, current->comm, task_pid_nr(current),
+			   message, regs->ip, regs->cs,
+			   regs->sp, regs->ax, regs->si, regs->di);
 }
 
 static int addr_to_vsyscall_nr(unsigned long addr)

From 29571876956efa104778957519c23792c66d3140 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 11 Jul 2014 08:45:25 +0100
Subject: [PATCH 0265/1185] x86/efi: Enforce CONFIG_RELOCATABLE for EFI boot
 stub
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 7b2a583afb4ab894f78bc0f8bd136e96b6499a7e upstream.

Without CONFIG_RELOCATABLE the early boot code will decompress the
kernel to LOAD_PHYSICAL_ADDR. While this may have been fine in the BIOS
days, that isn't going to fly with UEFI since parts of the firmware
code/data may be located at LOAD_PHYSICAL_ADDR.

Straying outside of the bounds of the regions we've explicitly requested
from the firmware will cause all sorts of trouble. Bruno reports that
his machine resets while trying to decompress the kernel image.

We already go to great pains to ensure the kernel is loaded into a
suitably aligned buffer, it's just that the address isn't necessarily
LOAD_PHYSICAL_ADDR, because we can't guarantee that address isn't in-use
by the firmware.

Explicitly enforce CONFIG_RELOCATABLE for the EFI boot stub, so that we
can load the kernel at any address with the correct alignment.

Reported-by: Bruno Prémont <bonbons@linux-vserver.org>
Tested-by: Bruno Prémont <bonbons@linux-vserver.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a649cb686692..4e5b80d883c8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1578,6 +1578,7 @@ config EFI
 config EFI_STUB
        bool "EFI stub support"
        depends on EFI
+       select RELOCATABLE
        ---help---
           This kernel feature allows a bzImage to be loaded directly
 	  by EFI firmware without the use of a bootloader.

From 1c2cdf1f81de70a91bca8f0c5c4a6ae0f852490d Mon Sep 17 00:00:00 2001
From: "Stephen M. Cameron" <scameron@beardog.cce.hp.com>
Date: Thu, 3 Jul 2014 10:18:03 -0500
Subject: [PATCH 0266/1185] hpsa: fix bad -ENOMEM return value in
 hpsa_big_passthru_ioctl

commit 0758f4f732b08b6ef07f2e5f735655cf69fea477 upstream.

When copy_from_user fails, return -EFAULT, not -ENOMEM

Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Reported-by: Robert Elliott <elliott@hp.com>
Reviewed-by: Joe Handzik <joseph.t.handzik@hp.com>
Reviewed-by: Scott Teel <scott.teel@hp.com>
Reviewed by: Mike MIller <michael.miller@canonical.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/hpsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 0353d7f2172b..287667c20c6a 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3118,7 +3118,7 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 		}
 		if (ioc->Request.Type.Direction == XFER_WRITE) {
 			if (copy_from_user(buff[sg_used], data_ptr, sz)) {
-				status = -ENOMEM;
+				status = -EFAULT;
 				goto cleanup1;
 			}
 		} else

From a9c37c8a72a50312a38bd846f7a944ea1a46a4f1 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Sat, 9 Aug 2014 21:22:27 +0100
Subject: [PATCH 0267/1185] Btrfs: fix csum tree corruption, duplicate and
 outdated checksums

commit 27b9a8122ff71a8cadfbffb9c4f0694300464f3b upstream.

Under rare circumstances we can end up leaving 2 versions of a checksum
for the same file extent range.

The reason for this is that after calling btrfs_next_leaf we process
slot 0 of the leaf it returns, instead of processing the slot set in
path->slots[0]. Most of the time (by far) path->slots[0] is 0, but after
btrfs_next_leaf() releases the path and before it searches for the next
leaf, another task might cause a split of the next leaf, which migrates
some of its keys to the leaf we were processing before calling
btrfs_next_leaf(). In this case btrfs_next_leaf() returns again the
same leaf but with path->slots[0] having a slot number corresponding
to the first new key it got, that is, a slot number that didn't exist
before calling btrfs_next_leaf(), as the leaf now has more keys than
it had before. So we must really process the returned leaf starting at
path->slots[0] always, as it isn't always 0, and the key at slot 0 can
have an offset much lower than our search offset/bytenr.

For example, consider the following scenario, where we have:

sums->bytenr: 40157184, sums->len: 16384, sums end: 40173568
four 4kb file data blocks with offsets 40157184, 40161280, 40165376, 40169472

  Leaf N:

    slot = 0                           slot = btrfs_header_nritems() - 1
  |-------------------------------------------------------------------|
  | [(CSUM CSUM 39239680), size 8] ... [(CSUM CSUM 40116224), size 4] |
  |-------------------------------------------------------------------|

  Leaf N + 1:

      slot = 0                          slot = btrfs_header_nritems() - 1
  |--------------------------------------------------------------------|
  | [(CSUM CSUM 40161280), size 32] ... [((CSUM CSUM 40615936), size 8 |
  |--------------------------------------------------------------------|

Because we are at the last slot of leaf N, we call btrfs_next_leaf() to
find the next highest key, which releases the current path and then searches
for that next key. However after releasing the path and before finding that
next key, the item at slot 0 of leaf N + 1 gets moved to leaf N, due to a call
to ctree.c:push_leaf_left() (via ctree.c:split_leaf()), and therefore
btrfs_next_leaf() will returns us a path again with leaf N but with the slot
pointing to its new last key (CSUM CSUM 40161280). This new version of leaf N
is then:

    slot = 0                        slot = btrfs_header_nritems() - 2  slot = btrfs_header_nritems() - 1
  |----------------------------------------------------------------------------------------------------|
  | [(CSUM CSUM 39239680), size 8] ... [(CSUM CSUM 40116224), size 4]  [(CSUM CSUM 40161280), size 32] |
  |----------------------------------------------------------------------------------------------------|

And incorrecly using slot 0, makes us set next_offset to 39239680 and we jump
into the "insert:" label, which will set tmp to:

    tmp = min((sums->len - total_bytes) >> blocksize_bits,
        (next_offset - file_key.offset) >> blocksize_bits) =
    min((16384 - 0) >> 12, (39239680 - 40157184) >> 12) =
    min(4, (u64)-917504 = 18446744073708634112 >> 12) = 4

and

   ins_size = csum_size * tmp = 4 * 4 = 16 bytes.

In other words, we insert a new csum item in the tree with key
(CSUM_OBJECTID CSUM_KEY 40157184 = sums->bytenr) that contains the checksums
for all the data (4 blocks of 4096 bytes each = sums->len). Which is wrong,
because the item with key (CSUM CSUM 40161280) (the one that was moved from
leaf N + 1 to the end of leaf N) contains the old checksums of the last 12288
bytes of our data and won't get those old checksums removed.

So this leaves us 2 different checksums for 3 4kb blocks of data in the tree,
and breaks the logical rule:

   Key_N+1.offset >= Key_N.offset + length_of_data_its_checksums_cover

An obvious bad effect of this is that a subsequent csum tree lookup to get
the checksum of any of the blocks with logical offset of 40161280, 40165376
or 40169472 (the last 3 4kb blocks of file data), will get the old checksums.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/file-item.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b193bf324a41..3af77aa051d2 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -754,7 +754,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 				found_next = 1;
 			if (ret != 0)
 				goto insert;
-			slot = 0;
+			slot = path->slots[0];
 		}
 		btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
 		if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||

From 5935bef5cd35378a1e58333b6472b687c1a8b9cb Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Tue, 12 Aug 2014 18:07:56 +0300
Subject: [PATCH 0268/1185] mei: reset client state on queued connect request

commit 73ab4232388b7a08f17c8d08141ff2099fa0b161 upstream.

If connect request is queued (e.g. device in pg) set client state
to initializing, thus avoid preliminary exit in wait if current
state is disconnected.

This is regression from:

commit e4d8270e604c3202131bac607969605ac397b893
Author: Alexander Usyskin <alexander.usyskin@intel.com>
mei: set connecting state just upon connection request is sent to the fw

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/client.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index da2385bb3536..07ed4b5b1659 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -512,6 +512,7 @@ int mei_cl_connect(struct mei_cl *cl, struct file *file)
 		cl->timer_count = MEI_CONNECT_TIMEOUT;
 		list_add_tail(&cb->list, &dev->ctrl_rd_list.list);
 	} else {
+		cl->state = MEI_FILE_INITIALIZING;
 		list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
 	}
 

From d9fab037c6646f853cc71c0d5c740bd2981c48a2 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Tue, 12 Aug 2014 18:07:57 +0300
Subject: [PATCH 0269/1185] mei: nfc: fix memory leak in error path

commit 8e8248b1369c97c7bb6f8bcaee1f05deeabab8ef upstream.

NFC will leak buffer if send failed.
Use single exit point that does the freeing

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/nfc.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c
index 994ca4aff1a3..4b7ea3fb143c 100644
--- a/drivers/misc/mei/nfc.c
+++ b/drivers/misc/mei/nfc.c
@@ -342,9 +342,10 @@ static int mei_nfc_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
 	ndev = (struct mei_nfc_dev *) cldev->priv_data;
 	dev = ndev->cl->dev;
 
+	err = -ENOMEM;
 	mei_buf = kzalloc(length + MEI_NFC_HEADER_SIZE, GFP_KERNEL);
 	if (!mei_buf)
-		return -ENOMEM;
+		goto out;
 
 	hdr = (struct mei_nfc_hci_hdr *) mei_buf;
 	hdr->cmd = MEI_NFC_CMD_HCI_SEND;
@@ -354,12 +355,9 @@ static int mei_nfc_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
 	hdr->data_size = length;
 
 	memcpy(mei_buf + MEI_NFC_HEADER_SIZE, buf, length);
-
 	err = __mei_cl_send(ndev->cl, mei_buf, length + MEI_NFC_HEADER_SIZE);
 	if (err < 0)
-		return err;
-
-	kfree(mei_buf);
+		goto out;
 
 	if (!wait_event_interruptible_timeout(ndev->send_wq,
 				ndev->recv_req_id == ndev->req_id, HZ)) {
@@ -368,7 +366,8 @@ static int mei_nfc_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
 	} else {
 		ndev->req_id++;
 	}
-
+out:
+	kfree(mei_buf);
 	return err;
 }
 

From 666cec8db793a67bf9071b9f0fd96c8af424a9b9 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 27 Aug 2014 18:40:05 -0400
Subject: [PATCH 0270/1185] jbd2: fix infinite loop when recovering corrupt
 journal blocks

commit 022eaa7517017efe4f6538750c2b59a804dc7df7 upstream.

When recovering the journal, don't fall into an infinite loop if we
encounter a corrupt journal block.  Instead, just skip the block and
return an error, which fails the mount and thus forces the user to run
a full filesystem fsck.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jbd2/recovery.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 626846bac32f..3bacc1909ddb 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -427,6 +427,7 @@ static int do_one_pass(journal_t *journal,
 	int			tag_bytes = journal_tag_bytes(journal);
 	__u32			crc32_sum = ~0; /* Transactional Checksums */
 	int			descr_csum_size = 0;
+	int			block_error = 0;
 
 	/*
 	 * First thing is to establish what we expect to find in the log
@@ -599,7 +600,8 @@ static int do_one_pass(journal_t *journal,
 						       "checksum recovering "
 						       "block %llu in log\n",
 						       blocknr);
-						continue;
+						block_error = 1;
+						goto skip_write;
 					}
 
 					/* Find a buffer for the new
@@ -798,7 +800,8 @@ static int do_one_pass(journal_t *journal,
 				success = -EIO;
 		}
 	}
-
+	if (block_error && success == 0)
+		success = -EIO;
 	return success;
 
  failed:

From a4e85832884d8aaf26f0b7cfe152f144f92eb80c Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 19 May 2014 01:03:06 +0100
Subject: [PATCH 0271/1185] Staging: speakup: Update
 __speakup_paste_selection() tty (ab)usage to match vt

commit 28a821c306889b9f2c3fff49abedc9b2c743eb73 upstream.

This function is largely a duplicate of paste_selection() in
drivers/tty/vt/selection.c, but with its own selection state.  The
speakup selection mechanism should really be merged with vt.

For now, apply the changes from 'TTY: vt, fix paste_selection ldisc
handling', 'tty: Make ldisc input flow control concurrency-friendly',
and 'tty: Fix unsafe vt paste_selection()'.

References: https://bugs.debian.org/735202
References: https://bugs.debian.org/744015
Reported-by: Paul Gevers <elbrus@debian.org>
Reported-and-tested-by: Jarek Czekalski <jarekczek@poczta.onet.pl>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
[bwh: Backported to 3.10:
 - Only apply the changes from 'TTY: vt, fix paste_selection ldisc handling'
 - Add the same FIXME comment as vt's paste_selection() has in this version]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/speakup/selection.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c
index f67941e78e4a..b9359753784e 100644
--- a/drivers/staging/speakup/selection.c
+++ b/drivers/staging/speakup/selection.c
@@ -5,6 +5,7 @@
 #include <linux/device.h> /* for dev_warn */
 #include <linux/selection.h>
 #include <linux/workqueue.h>
+#include <linux/tty.h>
 #include <asm/cmpxchg.h>
 
 #include "speakup.h"
@@ -135,8 +136,12 @@ static void __speakup_paste_selection(struct work_struct *work)
 	struct tty_struct *tty = xchg(&spw->tty, NULL);
 	struct vc_data *vc = (struct vc_data *) tty->driver_data;
 	int pasted = 0, count;
+	struct tty_ldisc *ld;
 	DECLARE_WAITQUEUE(wait, current);
 
+	ld = tty_ldisc_ref_wait(tty);
+
+	/* FIXME: this is completely unsafe */
 	add_wait_queue(&vc->paste_wait, &wait);
 	while (sel_buffer && sel_buffer_lth > pasted) {
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -146,12 +151,13 @@ static void __speakup_paste_selection(struct work_struct *work)
 		}
 		count = sel_buffer_lth - pasted;
 		count = min_t(int, count, tty->receive_room);
-		tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
-			NULL, count);
+		ld->ops->receive_buf(tty, sel_buffer + pasted, NULL, count);
 		pasted += count;
 	}
 	remove_wait_queue(&vc->paste_wait, &wait);
 	current->state = TASK_RUNNING;
+
+	tty_ldisc_deref(ld);
 	tty_kref_put(tty);
 }
 

From 512c454e2639148b2385468c894244d9d91570a5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 19 Aug 2014 15:17:56 +0300
Subject: [PATCH 0272/1185] xhci: Treat not finding the event_seg on COMP_STOP
 the same as COMP_STOP_INVAL

commit 9a54886342e227433aebc9d374f8ae268a836475 upstream.

When using a Renesas uPD720231 chipset usb-3 uas to sata bridge with a 120G
Crucial M500 ssd, model string: Crucial_ CT120M500SSD1, together with a
the integrated Intel xhci controller on a Haswell laptop:

00:14.0 USB controller [0c03]: Intel Corporation 8 Series USB xHCI HC [8086:9c31] (rev 04)

The following error gets logged to dmesg:

xhci error: Transfer event TRB DMA ptr not part of current TD

Treating COMP_STOP the same as COMP_STOP_INVAL when no event_seg gets found
fixes this.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index fe42cae6d1ef..df5834bd71f3 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2532,7 +2532,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		 * last TRB of the previous TD. The command completion handle
 		 * will take care the rest.
 		 */
-		if (!event_seg && trb_comp_code == COMP_STOP_INVAL) {
+		if (!event_seg && (trb_comp_code == COMP_STOP ||
+				   trb_comp_code == COMP_STOP_INVAL)) {
 			ret = 0;
 			goto cleanup;
 		}

From 607a00ad38bb2bc7fe3e3aebc4e6a0fe17d8351e Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 19 Aug 2014 15:17:57 +0300
Subject: [PATCH 0273/1185] usb: xhci: amd chipset also needs short TX quirk

commit 2597fe99bb0259387111d0431691f5daac84f5a5 upstream.

AMD xHC also needs short tx quirk after tested on most of chipset
generations. That's because there is the same incorrect behavior like
Fresco Logic host. Please see below message with on USB webcam
attached on xHC host:

[  139.262944] xhci_hcd 0000:00:10.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?
[  139.266934] xhci_hcd 0000:00:10.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?
[  139.270913] xhci_hcd 0000:00:10.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?
[  139.274937] xhci_hcd 0000:00:10.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?
[  139.278914] xhci_hcd 0000:00:10.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?
[  139.282936] xhci_hcd 0000:00:10.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?
[  139.286915] xhci_hcd 0000:00:10.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?
[  139.290938] xhci_hcd 0000:00:10.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?
[  139.294913] xhci_hcd 0000:00:10.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?
[  139.298917] xhci_hcd 0000:00:10.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?

Reported-by: Arindam Nath <arindam.nath@amd.com>
Tested-by: Shriraj-Rai P <shriraj-rai.p@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index a736d82695cb..0e57bcb8e3f7 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -87,6 +87,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	/* AMD PLL quirk */
 	if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
 		xhci->quirks |= XHCI_AMD_PLL_FIX;
+
+	if (pdev->vendor == PCI_VENDOR_ID_AMD)
+		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
 		xhci->quirks |= XHCI_LPM_SUPPORT;
 		xhci->quirks |= XHCI_INTEL_HOST;

From 4268973202c86b58348366fdf81117b04adcadfd Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 Aug 2014 16:15:35 -0700
Subject: [PATCH 0274/1185] ARM: OMAP2+: hwmod: Rearm wake-up interrupts for DT
 when MUSB is idled

commit cc824534d4fef0e46e4486d5c1e10d3c6b1ebadc upstream.

Looks like MUSB cable removal can cause wake-up interrupts to
stop working for device tree based booting at least for UART3
even as nothing is dynamically remuxed. This can be fixed by
calling reconfigure_io_chain() for device tree based booting
in hwmod code. Note that we already do that for legacy booting
if the legacy mux is configured.

My guess is that this is related to UART3 and MUSB ULPI
hsusb0_data0 and hsusb0_data1 support for Carkit mode that
somehow affect the configured IO chain for UART3 and require
rearming the wake-up interrupts.

In general, for device tree based booting, pinctrl-single
calls the rearm hook that in turn calls reconfigure_io_chain
so calling reconfigure_io_chain should not be needed from the
hwmod code for other events.

So let's limit the hwmod rearming of iochain only to
HWMOD_FORCE_MSTANDBY where MUSB is currently the only user
of it. If we see other devices needing similar changes we can
add more checks for it.

Cc: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/omap_hwmod.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 44c609a1ec5d..62e40a9fffa9 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -2177,6 +2177,8 @@ static int _enable(struct omap_hwmod *oh)
 			 oh->mux->pads_dynamic))) {
 		omap_hwmod_mux(oh->mux, _HWMOD_STATE_ENABLED);
 		_reconfigure_io_chain();
+	} else if (oh->flags & HWMOD_FORCE_MSTANDBY) {
+		_reconfigure_io_chain();
 	}
 
 	_add_initiator_dep(oh, mpu_oh);
@@ -2283,6 +2285,8 @@ static int _idle(struct omap_hwmod *oh)
 	if (oh->mux && oh->mux->pads_dynamic) {
 		omap_hwmod_mux(oh->mux, _HWMOD_STATE_IDLE);
 		_reconfigure_io_chain();
+	} else if (oh->flags & HWMOD_FORCE_MSTANDBY) {
+		_reconfigure_io_chain();
 	}
 
 	oh->_state = _HWMOD_STATE_IDLE;

From 22de64f496ee7a1fc6460d5131f6fc506a2f7305 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 13 Aug 2014 17:56:52 +0200
Subject: [PATCH 0275/1185] USB: ftdi_sio: add Basic Micro ATOM Nano USB2Serial
 PID

commit 6552cc7f09261db2aeaae389aa2c05a74b3a93b4 upstream.

Add device id for Basic Micro ATOM Nano USB2Serial adapters.

Reported-by: Nicolas Alt <n.alt@mytum.de>
Tested-by: Nicolas Alt <n.alt@mytum.de>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ftdi_sio.c     | 1 +
 drivers/usb/serial/ftdi_sio_ids.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index bc27998c3a2d..538a70d7e7c7 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -154,6 +154,7 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_CANUSB_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_CANDAPTER_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_BM_ATOM_NANO_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_NXTCAM_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_EV3CON_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_0_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 1e58d90a0b6c..3168a0191973 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -42,6 +42,8 @@
 /* www.candapter.com Ewert Energy Systems CANdapter device */
 #define FTDI_CANDAPTER_PID 0x9F80 /* Product Id */
 
+#define FTDI_BM_ATOM_NANO_PID	0xa559	/* Basic Micro ATOM Nano USB2Serial */
+
 /*
  * Texas Instruments XDS100v2 JTAG / BeagleBone A3
  * http://processors.wiki.ti.com/index.php/XDS100

From 17912b6285ac739e9c461a3601a4a7c7eda7a5a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ja=C5=A1a=20Bartelj?= <jasa.bartelj@gmail.com>
Date: Sat, 16 Aug 2014 12:44:27 +0200
Subject: [PATCH 0276/1185] USB: ftdi_sio: Added PID for new ekey device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 646907f5bfb0782c731ae9ff6fb63471a3566132 upstream.

Added support to the ftdi_sio driver for ekey Converter USB which
uses an FT232BM chip.

Signed-off-by: Jaša Bartelj <jasa.bartelj@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ftdi_sio.c     | 2 ++
 drivers/usb/serial/ftdi_sio_ids.h | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 538a70d7e7c7..120fff399c10 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -951,6 +951,8 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_2_PID) },
 	{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_3_PID) },
 	{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_4_PID) },
+	/* ekey Devices */
+	{ USB_DEVICE(FTDI_VID, FTDI_EKEY_CONV_USB_PID) },
 	/* Infineon Devices */
 	{ USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_PID, 1) },
 	{ },					/* Optional parameter entry */
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 3168a0191973..70b0b1d88ae9 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -1380,3 +1380,8 @@
 #define BRAINBOXES_US_160_6_PID		0x9006 /* US-160 16xRS232 1Mbaud Port 11 and 12 */
 #define BRAINBOXES_US_160_7_PID		0x9007 /* US-160 16xRS232 1Mbaud Port 13 and 14 */
 #define BRAINBOXES_US_160_8_PID		0x9008 /* US-160 16xRS232 1Mbaud Port 15 and 16 */
+
+/*
+ * ekey biometric systems GmbH (http://ekey.net/)
+ */
+#define FTDI_EKEY_CONV_USB_PID		0xCB08	/* Converter USB */

From d0e6e29e2c9820d39b83fa275bf68c7c8bc7935e Mon Sep 17 00:00:00 2001
From: James Forshaw <forshaw@google.com>
Date: Sat, 23 Aug 2014 14:39:48 -0700
Subject: [PATCH 0277/1185] USB: whiteheat: Added bounds checking for bulk
 command response

commit 6817ae225cd650fb1c3295d769298c38b1eba818 upstream.

This patch fixes a potential security issue in the whiteheat USB driver
which might allow a local attacker to cause kernel memory corrpution. This
is due to an unchecked memcpy into a fixed size buffer (of 64 bytes). On
EHCI and XHCI busses it's possible to craft responses greater than 64
bytes leading a buffer overflow.

Signed-off-by: James Forshaw <forshaw@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/whiteheat.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 347caad47a12..5e3dd9f87ff5 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -521,6 +521,10 @@ static void command_port_read_callback(struct urb *urb)
 		dev_dbg(&urb->dev->dev, "%s - command_info is NULL, exiting.\n", __func__);
 		return;
 	}
+	if (!urb->actual_length) {
+		dev_dbg(&urb->dev->dev, "%s - empty response, exiting.\n", __func__);
+		return;
+	}
 	if (status) {
 		dev_dbg(&urb->dev->dev, "%s - nonzero urb status: %d\n", __func__, status);
 		if (status != -ENOENT)
@@ -541,7 +545,8 @@ static void command_port_read_callback(struct urb *urb)
 		/* These are unsolicited reports from the firmware, hence no
 		   waiting command to wakeup */
 		dev_dbg(&urb->dev->dev, "%s - event received\n", __func__);
-	} else if (data[0] == WHITEHEAT_GET_DTR_RTS) {
+	} else if ((data[0] == WHITEHEAT_GET_DTR_RTS) &&
+		(urb->actual_length - 1 <= sizeof(command_info->result_buffer))) {
 		memcpy(command_info->result_buffer, &data[1],
 						urb->actual_length - 1);
 		command_info->command_finished = WHITEHEAT_CMD_COMPLETE;

From 6a5335b46b463c469ca03ddb706c0562e881d2ed Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Mon, 4 Aug 2014 12:44:46 +0300
Subject: [PATCH 0278/1185] usb: hub: Prevent hub autosuspend if
 usbcore.autosuspend is -1

commit bdd405d2a5287bdb9b04670ea255e1f122138e66 upstream.

If user specifies that USB autosuspend must be disabled by module
parameter "usbcore.autosuspend=-1" then we must prevent
autosuspend of USB hub devices as well.

commit 596d789a211d introduced in v3.8 changed the original behaivour
and stopped respecting the usbcore.autosuspend parameter for hubs.

Fixes: 596d789a211d "USB: set hub's default autosuspend delay as 0"

Signed-off-by: Roger Quadros <rogerq@ti.com>
Tested-by: Michael Welling <mwelling@emacinc.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 7e90d146d7dd..813697d72466 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1699,8 +1699,12 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	 * - Change autosuspend delay of hub can avoid unnecessary auto
 	 *   suspend timer for hub, also may decrease power consumption
 	 *   of USB bus.
+	 *
+	 * - If user has indicated to prevent autosuspend by passing
+	 *   usbcore.autosuspend = -1 then keep autosuspend disabled.
 	 */
-	pm_runtime_set_autosuspend_delay(&hdev->dev, 0);
+	if (hdev->dev.power.autosuspend_delay >= 0)
+		pm_runtime_set_autosuspend_delay(&hdev->dev, 0);
 
 	/*
 	 * Hubs have proper suspend/resume support, except for root hubs

From caacbac7bf646a29049bec3d9f5fcc20c846b3b2 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Wed, 30 Jul 2014 21:26:05 +0800
Subject: [PATCH 0279/1185] NFSD: Decrease nfsd_users in nfsd_startup_generic
 fail

commit d9499a95716db0d4bc9b67e88fd162133e7d6b08 upstream.

A memory allocation failure could cause nfsd_startup_generic to fail, in
which case nfsd_users wouldn't be incorrectly left elevated.

After nfsd restarts nfsd_startup_generic will then succeed without doing
anything--the first consequence is likely nfs4_start_net finding a bad
laundry_wq and crashing.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Fixes: 4539f14981ce "nfsd: replace boolean nfsd_up flag by users counter"
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfssvc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 262df5ccbf59..8016892f3f05 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -220,7 +220,8 @@ static int nfsd_startup_generic(int nrservs)
 	 */
 	ret = nfsd_racache_init(2*nrservs);
 	if (ret)
-		return ret;
+		goto dec_users;
+
 	ret = nfs4_state_start();
 	if (ret)
 		goto out_racache;
@@ -228,6 +229,8 @@ static int nfsd_startup_generic(int nrservs)
 
 out_racache:
 	nfsd_racache_shutdown();
+dec_users:
+	nfsd_users--;
 	return ret;
 }
 

From c73df6f73c8167b9ed68d653d1a5c761c209d2b5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 16 Jul 2014 15:38:32 -0400
Subject: [PATCH 0280/1185] svcrdma: Select NFSv4.1 backchannel transport based
 on forward channel

commit 3c45ddf823d679a820adddd53b52c6699c9a05ac upstream.

The current code always selects XPRT_TRANSPORT_BC_TCP for the back
channel, even when the forward channel was not TCP (eg, RDMA). When
a 4.1 mount is attempted with RDMA, the server panics in the TCP BC
code when trying to send CB_NULL.

Instead, construct the transport protocol number from the forward
channel transport or'd with XPRT_TRANSPORT_BC. Transports that do
not support bi-directional RPC will not have registered a "BC"
transport, causing create_backchannel_client() to fail immediately.

Fixes: https://bugzilla.linux-nfs.org/show_bug.cgi?id=265
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfs4callback.c                   | 3 ++-
 include/linux/sunrpc/svc_xprt.h          | 1 +
 net/sunrpc/svcsock.c                     | 2 ++
 net/sunrpc/xprt.c                        | 2 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 +
 5 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3eaa6e30a2dc..cc8c5b32043c 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -672,7 +672,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		clp->cl_cb_session = ses;
 		args.bc_xprt = conn->cb_xprt;
 		args.prognumber = clp->cl_cb_session->se_cb_prog;
-		args.protocol = XPRT_TRANSPORT_BC_TCP;
+		args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
+				XPRT_TRANSPORT_BC;
 		args.authflavor = ses->se_cb_sec.flavor;
 	}
 	/* Create RPC client */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index b05963f09ebf..f5bfb1a80abe 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -32,6 +32,7 @@ struct svc_xprt_class {
 	struct svc_xprt_ops	*xcl_ops;
 	struct list_head	xcl_list;
 	u32			xcl_max_payload;
+	int			xcl_ident;
 };
 
 /*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 422759bae60d..5c62c5e89b46 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -683,6 +683,7 @@ static struct svc_xprt_class svc_udp_class = {
 	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_udp_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
+	.xcl_ident = XPRT_TRANSPORT_UDP,
 };
 
 static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
@@ -1275,6 +1276,7 @@ static struct svc_xprt_class svc_tcp_class = {
 	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_tcp_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+	.xcl_ident = XPRT_TRANSPORT_TCP,
 };
 
 void svc_init_xprt_sock(void)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 095363eee764..42ce6bfc729d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1290,7 +1290,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 		}
 	}
 	spin_unlock(&xprt_list_lock);
-	printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
+	dprintk("RPC: transport (%d) not supported\n", args->ident);
 	return ERR_PTR(-EIO);
 
 found:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 62e4f9bcc387..ed36cb52cd86 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -89,6 +89,7 @@ struct svc_xprt_class svc_rdma_class = {
 	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_rdma_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+	.xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
 struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)

From b6f70b7027f157fbba45091518f34faf40ad81b8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 24 Aug 2014 14:46:48 -0400
Subject: [PATCH 0281/1185] NFSv3: Fix another acl regression

commit f87d928f6d98644d39809a013a22f981d39017cf upstream.

When creating a new object on the NFS server, we should not be sending
posix setacl requests unless the preceding posix_acl_create returned a
non-trivial acl. Doing so, causes Solaris servers in particular to
return an EINVAL.

Fixes: 013cdf1088d72 (nfs: use generic posix ACL infrastructure,,,)
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1132786
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs3acl.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 4a1aafba6a20..8c34f57a9aef 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -305,7 +305,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 		.rpc_argp	= &args,
 		.rpc_resp	= &fattr,
 	};
-	int status;
+	int status = 0;
+
+	if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL))
+		goto out;
 
 	status = -EOPNOTSUPP;
 	if (!nfs_server_capable(inode, NFS_CAP_ACLS))

From 569ae35a436502bf1aaaa7391b94e1d04b61ffca Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 25 Aug 2014 22:33:12 -0400
Subject: [PATCH 0282/1185] NFSv4: Fix problems with close in the presence of a
 delegation

commit aee7af356e151494d5014f57b33460b162f181b5 upstream.

In the presence of delegations, we can no longer assume that the
state->n_rdwr, state->n_rdonly, state->n_wronly reflect the open
stateid share mode, and so we need to calculate the initial value
for calldata->arg.fmode using the state->flags.

Reported-by: James Drews <drews@engr.wisc.edu>
Fixes: 88069f77e1ac5 (NFSv41: Fix a potential state leakage when...)
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4proc.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1ae7dd5956c5..25559821771b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2287,6 +2287,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 	struct nfs4_closedata *calldata = data;
 	struct nfs4_state *state = calldata->state;
 	struct inode *inode = calldata->inode;
+	bool is_rdonly, is_wronly, is_rdwr;
 	int call_close = 0;
 
 	dprintk("%s: begin!\n", __func__);
@@ -2294,18 +2295,24 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		goto out_wait;
 
 	task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
-	calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
 	spin_lock(&state->owner->so_lock);
+	is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
+	is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
+	is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
+	/* Calculate the current open share mode */
+	calldata->arg.fmode = 0;
+	if (is_rdonly || is_rdwr)
+		calldata->arg.fmode |= FMODE_READ;
+	if (is_wronly || is_rdwr)
+		calldata->arg.fmode |= FMODE_WRITE;
 	/* Calculate the change in open mode */
 	if (state->n_rdwr == 0) {
 		if (state->n_rdonly == 0) {
-			call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
-			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+			call_close |= is_rdonly || is_rdwr;
 			calldata->arg.fmode &= ~FMODE_READ;
 		}
 		if (state->n_wronly == 0) {
-			call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
-			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+			call_close |= is_wronly || is_rdwr;
 			calldata->arg.fmode &= ~FMODE_WRITE;
 		}
 	}

From 3d81c4733b6b25c0b99c3c3c16cfd93183c27b03 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 27 Aug 2014 16:55:29 -0700
Subject: [PATCH 0283/1185] USB: fix build error with CONFIG_PM_RUNTIME
 disabled

commit a9ef803d740bfadf5e505fbc57efa57692e27025 upstream.

commit bdd405d2a528 ("usb: hub: Prevent hub autosuspend if
usbcore.autosuspend is -1") causes a build error if CONFIG_PM_RUNTIME is
disabled.  Fix that by doing a simple #ifdef guard around it.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Cc: Roger Quadros <rogerq@ti.com>
Cc: Michael Welling <mwelling@emacinc.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 813697d72466..a5631849017f 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1703,8 +1703,10 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	 * - If user has indicated to prevent autosuspend by passing
 	 *   usbcore.autosuspend = -1 then keep autosuspend disabled.
 	 */
+#ifdef CONFIG_PM_RUNTIME
 	if (hdev->dev.power.autosuspend_delay >= 0)
 		pm_runtime_set_autosuspend_delay(&hdev->dev, 0);
+#endif
 
 	/*
 	 * Hubs have proper suspend/resume support, except for root hubs

From a8d97b1bd0c91fbc1be54d068b5f051b4f70b4f7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 5 Sep 2014 16:32:00 -0700
Subject: [PATCH 0284/1185] Linux 3.10.54

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 2ac415a7e937..9429aa5e89de 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 53
+SUBLEVEL = 54
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From 42df511db0372b4878b322d812b975e4632a2928 Mon Sep 17 00:00:00 2001
From: Todd Poynor <toddpoynor@google.com>
Date: Fri, 5 Sep 2014 18:27:38 -0700
Subject: [PATCH 0285/1185] cpufreq: interactive: make common_tunables static

From: Cylen Yao <cylen.yao@mediatek.com>

common_tunables should be static.

Change-Id: I502ee3062bece5082fea7861eff2f6237e25cede
Signed-off-by: Todd Poynor <toddpoynor@google.com>
---
 drivers/cpufreq/cpufreq_interactive.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
index 437aaed57057..72563be3537c 100644
--- a/drivers/cpufreq/cpufreq_interactive.c
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -117,7 +117,7 @@ struct cpufreq_interactive_tunables {
 };
 
 /* For cases where we have single governor instance for system */
-struct cpufreq_interactive_tunables *common_tunables;
+static struct cpufreq_interactive_tunables *common_tunables;
 
 static struct attribute_group *get_sysfs_attr(void);
 

From 89563feb836a95316305bab9fe75f85c64170175 Mon Sep 17 00:00:00 2001
From: Greg Hackmann <ghackmann@google.com>
Date: Tue, 9 Sep 2014 17:36:05 -0700
Subject: [PATCH 0286/1185] arm64: process: dump memory around registers when
 displaying regs

A port of 8608d7c4418c75841c562a90cddd9beae5798a48 to ARM64.  Both the
original code and this port are limited to dumping kernel addresses, so
don't bother if the registers are from a userspace process.

Change-Id: Idc76804c54efaaeb70311cbb500c54db6dac4525
Signed-off-by: Greg Hackmann <ghackmann@google.com>
---
 arch/arm64/kernel/process.c | 66 +++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index e2eb9453d3a1..7c11c74f7f54 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -141,6 +141,70 @@ void machine_restart(char *cmd)
 	while (1);
 }
 
+/*
+ * dump a block of kernel memory from around the given address
+ */
+static void show_data(unsigned long addr, int nbytes, const char *name)
+{
+	int	i, j;
+	int	nlines;
+	u32	*p;
+
+	/*
+	 * don't attempt to dump non-kernel addresses or
+	 * values that are probably just small negative numbers
+	 */
+	if (addr < PAGE_OFFSET || addr > -256UL)
+		return;
+
+	printk("\n%s: %#lx:\n", name, addr);
+
+	/*
+	 * round address down to a 32 bit boundary
+	 * and always dump a multiple of 32 bytes
+	 */
+	p = (u32 *)(addr & ~(sizeof(u32) - 1));
+	nbytes += (addr & (sizeof(u32) - 1));
+	nlines = (nbytes + 31) / 32;
+
+
+	for (i = 0; i < nlines; i++) {
+		/*
+		 * just display low 16 bits of address to keep
+		 * each line of the dump < 80 characters
+		 */
+		printk("%04lx ", (unsigned long)p & 0xffff);
+		for (j = 0; j < 8; j++) {
+			u32	data;
+			if (probe_kernel_address(p, data)) {
+				printk(" ********");
+			} else {
+				printk(" %08x", data);
+			}
+			++p;
+		}
+		printk("\n");
+	}
+}
+
+static void show_extra_register_data(struct pt_regs *regs, int nbytes)
+{
+	mm_segment_t fs;
+	unsigned int i;
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	show_data(regs->pc - nbytes, nbytes * 2, "PC");
+	show_data(regs->regs[30] - nbytes, nbytes * 2, "LR");
+	show_data(regs->sp - nbytes, nbytes * 2, "SP");
+	for (i = 0; i < 30; i++) {
+		char name[4];
+		snprintf(name, sizeof(name), "X%u", i);
+		show_data(regs->regs[i] - nbytes, nbytes * 2, name);
+	}
+	set_fs(fs);
+}
+
 void __show_regs(struct pt_regs *regs)
 {
 	int i;
@@ -156,6 +220,8 @@ void __show_regs(struct pt_regs *regs)
 		if (i % 2 == 0)
 			printk("\n");
 	}
+	if (!user_mode(regs))
+		show_extra_register_data(regs, 128);
 	printk("\n");
 }
 

From c23e1b19b17aff981414a77eaca235e1592b0852 Mon Sep 17 00:00:00 2001
From: Greg Hackmann <ghackmann@google.com>
Date: Tue, 9 Sep 2014 12:14:40 -0700
Subject: [PATCH 0287/1185] arm64: add HWCAP_EVTSTRM and associated hwcap
 refactoring

Take the hwcaps changes from 46efe547aca8498d51b64460c02366ae4032ca32 to
facilitate cherry-picking later hwcaps changes, while skipping the timer
changes that actually enable event streams for now.  The timer changes
depend on some non-trivial changes made after 3.10, and can safely be
dropped: the kernel will just continue reporting that HWCAP_EVTSTRM is
not available.

Bug: 17431179

Change-Id: I41548846f8cd7ae8147a2b115cc0f84708e29552
Signed-off-by: Greg Hackmann <ghackmann@google.com>
---
 arch/arm64/include/asm/hwcap.h      | 11 ++++++-----
 arch/arm64/include/uapi/asm/hwcap.h |  1 +
 arch/arm64/kernel/setup.c           | 11 +++++++++++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 6d4482fa35bc..3a48433dfb57 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -30,6 +30,7 @@
 #define COMPAT_HWCAP_IDIVA	(1 << 17)
 #define COMPAT_HWCAP_IDIVT	(1 << 18)
 #define COMPAT_HWCAP_IDIV	(COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
+#define COMPAT_HWCAP_EVTSTRM	(1 << 21)
 
 #ifndef __ASSEMBLY__
 /*
@@ -37,11 +38,11 @@
  * instruction set this cpu supports.
  */
 #define ELF_HWCAP		(elf_hwcap)
-#define COMPAT_ELF_HWCAP	(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
-				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
-				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
-				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
-				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
+
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP	(compat_elf_hwcap)
+extern unsigned int compat_elf_hwcap;
+#endif
 
 extern unsigned int elf_hwcap;
 #endif
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index eea497578b87..9b12476e9c85 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -21,6 +21,7 @@
  */
 #define HWCAP_FP		(1 << 0)
 #define HWCAP_ASIMD		(1 << 1)
+#define HWCAP_EVTSTRM		(1 << 2)
 
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index add6ea616843..57cb0063d74c 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -60,6 +60,16 @@ EXPORT_SYMBOL(processor_id);
 unsigned int elf_hwcap __read_mostly;
 EXPORT_SYMBOL_GPL(elf_hwcap);
 
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP_DEFAULT	\
+				(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
+				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
+				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
+				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
+				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
+unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+#endif
+
 static const char *cpu_name;
 static const char *machine_name;
 phys_addr_t __fdt_pointer __initdata;
@@ -309,6 +319,7 @@ subsys_initcall(topology_init);
 static const char *hwcap_str[] = {
 	"fp",
 	"asimd",
+	"evtstrm",
 	NULL
 };
 

From d648cd8603215f0c5518780869016cde67084ba5 Mon Sep 17 00:00:00 2001
From: Steve Capper <Steve.Capper@arm.com>
Date: Wed, 18 Sep 2013 16:14:28 +0100
Subject: [PATCH 0288/1185] arm64: Widen hwcap to be 64 bit

Under arm64 elf_hwcap is a 32 bit quantity, but it is stored in
a 64 bit auxiliary ELF field and glibc reads hwcap as 64 bit.

This patch widens elf_hwcap to be 64 bit.

Signed-off-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/hwcap.h | 2 +-
 arch/arm64/kernel/setup.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 3a48433dfb57..6cddbb0c9f54 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -44,6 +44,6 @@
 extern unsigned int compat_elf_hwcap;
 #endif
 
-extern unsigned int elf_hwcap;
+extern unsigned long elf_hwcap;
 #endif
 #endif
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 57cb0063d74c..d6c340eb45cc 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -57,7 +57,7 @@
 unsigned int processor_id;
 EXPORT_SYMBOL(processor_id);
 
-unsigned int elf_hwcap __read_mostly;
+unsigned long elf_hwcap __read_mostly;
 EXPORT_SYMBOL_GPL(elf_hwcap);
 
 #ifdef CONFIG_COMPAT

From 500cee98c8e2b2abdedbbe81bc1f9871c5ec8a97 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@linaro.org>
Date: Mon, 16 Dec 2013 21:04:36 +0000
Subject: [PATCH 0289/1185] arm64: Add hwcaps for crypto and CRC32 extensions.

Advertise the optional cryptographic and CRC32 instructions to
user space where present. Several hwcap bits [3-7] are allocated.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
[bit 2 is taken now so use bits 3-7 instead]
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/uapi/asm/hwcap.h |  6 ++++-
 arch/arm64/kernel/setup.c           | 37 +++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 9b12476e9c85..73cf0f54d57c 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -22,6 +22,10 @@
 #define HWCAP_FP		(1 << 0)
 #define HWCAP_ASIMD		(1 << 1)
 #define HWCAP_EVTSTRM		(1 << 2)
-
+#define HWCAP_AES		(1 << 3)
+#define HWCAP_PMULL		(1 << 4)
+#define HWCAP_SHA1		(1 << 5)
+#define HWCAP_SHA2		(1 << 6)
+#define HWCAP_CRC32		(1 << 7)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index d6c340eb45cc..a480eae24ec9 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -110,6 +110,7 @@ void __init early_print(const char *str, ...)
 static void __init setup_processor(void)
 {
 	struct cpu_info *cpu_info;
+	u64 features, block;
 
 	/*
 	 * locate processor in the list of supported processor
@@ -130,6 +131,37 @@ static void __init setup_processor(void)
 
 	sprintf(init_utsname()->machine, "aarch64");
 	elf_hwcap = 0;
+
+	/*
+	 * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
+	 * The blocks we test below represent incremental functionality
+	 * for non-negative values. Negative values are reserved.
+	 */
+	features = read_cpuid(ID_AA64ISAR0_EL1);
+	block = (features >> 4) & 0xf;
+	if (!(block & 0x8)) {
+		switch (block) {
+		default:
+		case 2:
+			elf_hwcap |= HWCAP_PMULL;
+		case 1:
+			elf_hwcap |= HWCAP_AES;
+		case 0:
+			break;
+		}
+	}
+
+	block = (features >> 8) & 0xf;
+	if (block && !(block & 0x8))
+		elf_hwcap |= HWCAP_SHA1;
+
+	block = (features >> 12) & 0xf;
+	if (block && !(block & 0x8))
+		elf_hwcap |= HWCAP_SHA2;
+
+	block = (features >> 16) & 0xf;
+	if (block && !(block & 0x8))
+		elf_hwcap |= HWCAP_CRC32;
 }
 
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
@@ -320,6 +352,11 @@ static const char *hwcap_str[] = {
 	"fp",
 	"asimd",
 	"evtstrm",
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
 	NULL
 };
 

From 1ffb7f631abe362e99dd64e566cc62255f4423c3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 3 Mar 2014 07:34:44 +0000
Subject: [PATCH 0290/1185] binfmt_elf: add ELF_HWCAP2 to compat auxv entries

Add ELF_HWCAP2 to the set of auxv entries that is passed to
a 32-bit ELF program running in 32-bit compat mode under a
64-bit kernel.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 fs/compat_binfmt_elf.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index a81147e2e4ef..4d24d17bcfc1 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime,
 #define	ELF_HWCAP		COMPAT_ELF_HWCAP
 #endif
 
+#ifdef	COMPAT_ELF_HWCAP2
+#undef	ELF_HWCAP2
+#define	ELF_HWCAP2		COMPAT_ELF_HWCAP2
+#endif
+
 #ifdef	COMPAT_ARCH_DLINFO
 #undef	ARCH_DLINFO
 #define	ARCH_DLINFO		COMPAT_ARCH_DLINFO

From a6483864467b280cb3d9e916823d3bd9a78f3ded Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 3 Mar 2014 07:34:45 +0000
Subject: [PATCH 0291/1185] arm64: add AT_HWCAP2 support for 32-bit compat

Add support for the ELF auxv entry AT_HWCAP2 when running 32-bit
ELF binaries in compat mode.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/hwcap.h | 3 ++-
 arch/arm64/kernel/setup.c      | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 6cddbb0c9f54..9a4cbd60c88e 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -41,7 +41,8 @@
 
 #ifdef CONFIG_COMPAT
 #define COMPAT_ELF_HWCAP	(compat_elf_hwcap)
-extern unsigned int compat_elf_hwcap;
+#define COMPAT_ELF_HWCAP2	(compat_elf_hwcap2)
+extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
 #endif
 
 extern unsigned long elf_hwcap;
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index a480eae24ec9..f97c5d26e446 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -68,6 +68,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap);
 				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
 				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
 unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+unsigned int compat_elf_hwcap2 __read_mostly;
 #endif
 
 static const char *cpu_name;

From 2c15311ffaad6ff6aa7f0a735694d4d2c7e4b3c5 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 3 Mar 2014 07:34:46 +0000
Subject: [PATCH 0292/1185] arm64: advertise ARMv8 extensions to 32-bit compat
 ELF binaries

This adds support for advertising the presence of ARMv8 Crypto
Extensions in the Aarch32 execution state to 32-bit ELF binaries
running in 32-bit compat mode under the arm64 kernel.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/hwcap.h |  6 ++++++
 arch/arm64/kernel/setup.c      | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 9a4cbd60c88e..024c46183c3c 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -32,6 +32,12 @@
 #define COMPAT_HWCAP_IDIV	(COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
 #define COMPAT_HWCAP_EVTSTRM	(1 << 21)
 
+#define COMPAT_HWCAP2_AES	(1 << 0)
+#define COMPAT_HWCAP2_PMULL	(1 << 1)
+#define COMPAT_HWCAP2_SHA1	(1 << 2)
+#define COMPAT_HWCAP2_SHA2	(1 << 3)
+#define COMPAT_HWCAP2_CRC32	(1 << 4)
+
 #ifndef __ASSEMBLY__
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f97c5d26e446..3a06f1aa0902 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -163,6 +163,38 @@ static void __init setup_processor(void)
 	block = (features >> 16) & 0xf;
 	if (block && !(block & 0x8))
 		elf_hwcap |= HWCAP_CRC32;
+
+#ifdef CONFIG_COMPAT
+	/*
+	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to
+	 * the Aarch32 32-bit execution state.
+	 */
+	features = read_cpuid(ID_ISAR5_EL1);
+	block = (features >> 4) & 0xf;
+	if (!(block & 0x8)) {
+		switch (block) {
+		default:
+		case 2:
+			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
+		case 1:
+			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
+		case 0:
+			break;
+		}
+	}
+
+	block = (features >> 8) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
+
+	block = (features >> 12) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
+
+	block = (features >> 16) & 0xf;
+	if (block && !(block & 0x8))
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
+#endif
 }
 
 static void __init setup_machine_fdt(phys_addr_t dt_phys)

From d7592de8d09e5834d5ef54c842ffae6fccf92a25 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Sat, 16 Mar 2013 08:48:13 +0000
Subject: [PATCH 0293/1185] arm64: debug: consolidate software breakpoint
 handlers

The software breakpoint handlers are hooked in directly from ptrace,
which makes it difficult to add additional handlers for things like
kprobes and kgdb.

This patch moves the handling code into debug-monitors.c, where we can
dispatch to different debug subsystems more easily.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/debug-monitors.h |  9 ++++
 arch/arm64/include/asm/ptrace.h         |  2 -
 arch/arm64/kernel/debug-monitors.c      | 66 ++++++++++++++++++++++++-
 arch/arm64/kernel/ptrace.c              | 59 ----------------------
 arch/arm64/kernel/traps.c               |  5 +-
 5 files changed, 75 insertions(+), 66 deletions(-)

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7eaa0b302493..ef8235c68c09 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -83,6 +83,15 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs)
 }
 #endif
 
+#ifdef CONFIG_COMPAT
+int aarch32_break_handler(struct pt_regs *regs);
+#else
+static int aarch32_break_handler(struct pt_regs *regs)
+{
+	return -EFAULT;
+}
+#endif
+
 #endif	/* __ASSEMBLY */
 #endif	/* __KERNEL__ */
 #endif	/* __ASM_DEBUG_MONITORS_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 7304fa2fd9fa..41e59e2459ff 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -171,7 +171,5 @@ extern unsigned long profile_pc(struct pt_regs *regs);
 #define profile_pc(regs) instruction_pointer(regs)
 #endif
 
-extern int aarch32_break_trap(struct pt_regs *regs);
-
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index f4726dc054b3..08018e3df580 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/ptrace.h>
 #include <linux/stat.h>
+#include <linux/uaccess.h>
 
 #include <asm/debug-monitors.h>
 #include <asm/local.h>
@@ -226,13 +227,74 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
 	return 0;
 }
 
-static int __init single_step_init(void)
+static int brk_handler(unsigned long addr, unsigned int esr,
+		       struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	if (!user_mode(regs))
+		return -EFAULT;
+
+	info = (siginfo_t) {
+		.si_signo = SIGTRAP,
+		.si_errno = 0,
+		.si_code  = TRAP_BRKPT,
+		.si_addr  = (void __user *)instruction_pointer(regs),
+	};
+
+	force_sig_info(SIGTRAP, &info, current);
+	return 0;
+}
+
+int aarch32_break_handler(struct pt_regs *regs)
+{
+	siginfo_t info;
+	unsigned int instr;
+	bool bp = false;
+	void __user *pc = (void __user *)instruction_pointer(regs);
+
+	if (!compat_user_mode(regs))
+		return -EFAULT;
+
+	if (compat_thumb_mode(regs)) {
+		/* get 16-bit Thumb instruction */
+		get_user(instr, (u16 __user *)pc);
+		if (instr == AARCH32_BREAK_THUMB2_LO) {
+			/* get second half of 32-bit Thumb-2 instruction */
+			get_user(instr, (u16 __user *)(pc + 2));
+			bp = instr == AARCH32_BREAK_THUMB2_HI;
+		} else {
+			bp = instr == AARCH32_BREAK_THUMB;
+		}
+	} else {
+		/* 32-bit ARM instruction */
+		get_user(instr, (u32 __user *)pc);
+		bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM;
+	}
+
+	if (!bp)
+		return -EFAULT;
+
+	info = (siginfo_t) {
+		.si_signo = SIGTRAP,
+		.si_errno = 0,
+		.si_code  = TRAP_BRKPT,
+		.si_addr  = pc,
+	};
+
+	force_sig_info(SIGTRAP, &info, current);
+	return 0;
+}
+
+static int __init debug_traps_init(void)
 {
 	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
 			      TRAP_HWBKPT, "single-step handler");
+	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
+			      TRAP_BRKPT, "ptrace BRK handler");
 	return 0;
 }
-arch_initcall(single_step_init);
+arch_initcall(debug_traps_init);
 
 /* Re-enable single step for syscall restarting. */
 void user_rewind_single_step(struct task_struct *task)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index aebfc1519e8e..7190a6544cab 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -53,28 +53,6 @@ void ptrace_disable(struct task_struct *child)
 {
 }
 
-/*
- * Handle hitting a breakpoint.
- */
-static int ptrace_break(struct pt_regs *regs)
-{
-	siginfo_t info = {
-		.si_signo = SIGTRAP,
-		.si_errno = 0,
-		.si_code  = TRAP_BRKPT,
-		.si_addr  = (void __user *)instruction_pointer(regs),
-	};
-
-	force_sig_info(SIGTRAP, &info, current);
-	return 0;
-}
-
-static int arm64_break_trap(unsigned long addr, unsigned int esr,
-			    struct pt_regs *regs)
-{
-	return ptrace_break(regs);
-}
-
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 /*
  * Handle hitting a HW-breakpoint.
@@ -819,33 +797,6 @@ static const struct user_regset_view user_aarch32_view = {
 	.regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets)
 };
 
-int aarch32_break_trap(struct pt_regs *regs)
-{
-	unsigned int instr;
-	bool bp = false;
-	void __user *pc = (void __user *)instruction_pointer(regs);
-
-	if (compat_thumb_mode(regs)) {
-		/* get 16-bit Thumb instruction */
-		get_user(instr, (u16 __user *)pc);
-		if (instr == AARCH32_BREAK_THUMB2_LO) {
-			/* get second half of 32-bit Thumb-2 instruction */
-			get_user(instr, (u16 __user *)(pc + 2));
-			bp = instr == AARCH32_BREAK_THUMB2_HI;
-		} else {
-			bp = instr == AARCH32_BREAK_THUMB;
-		}
-	} else {
-		/* 32-bit ARM instruction */
-		get_user(instr, (u32 __user *)pc);
-		bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM;
-	}
-
-	if (bp)
-		return ptrace_break(regs);
-	return 1;
-}
-
 static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off,
 				   compat_ulong_t __user *ret)
 {
@@ -1113,16 +1064,6 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ptrace_request(child, request, addr, data);
 }
 
-
-static int __init ptrace_break_init(void)
-{
-	hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP,
-			      TRAP_BRKPT, "ptrace BRK handler");
-	return 0;
-}
-core_initcall(ptrace_break_init);
-
-
 asmlinkage int syscall_trace(int dir, struct pt_regs *regs)
 {
 	unsigned long saved_reg;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index f30852d28590..7ffadddb645d 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -32,6 +32,7 @@
 #include <linux/syscalls.h>
 
 #include <asm/atomic.h>
+#include <asm/debug-monitors.h>
 #include <asm/traps.h>
 #include <asm/stacktrace.h>
 #include <asm/exception.h>
@@ -261,11 +262,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	siginfo_t info;
 	void __user *pc = (void __user *)instruction_pointer(regs);
 
-#ifdef CONFIG_COMPAT
 	/* check for AArch32 breakpoint instructions */
-	if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0)
+	if (!aarch32_break_handler(regs))
 		return;
-#endif
 
 	if (show_unhandled_signals && unhandled_signal(current, SIGILL) &&
 	    printk_ratelimit()) {

From 51ee4b18842045289a9557c9d5f817f1c16a8bd8 Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Thu, 30 Jan 2014 15:07:34 -0800
Subject: [PATCH 0294/1185] arm64: copy conditional instruction tests from arm

Copy the code that is used to compute if a conditional instruction
would be executed.

This code is needed to support A32 instruction emulation in the
kernel.

Change-Id: I0bab7537efd8cc317bd20995cd36961cf95165aa
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/362154
Reviewed-on: http://git-master/r/365061
Reviewed-by: Richard Wiley <rwiley@nvidia.com>
Tested-by: Oskari Jaaskelainen <oskarij@nvidia.com>
---
 arch/arm64/include/asm/opcodes.h | 231 +++++++++++++++++++++++++++++++
 arch/arm64/kernel/Makefile       |   4 +-
 arch/arm64/kernel/opcodes.c      |  72 ++++++++++
 3 files changed, 305 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/include/asm/opcodes.h
 create mode 100644 arch/arm64/kernel/opcodes.c

diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h
new file mode 100644
index 000000000000..fd189a522aee
--- /dev/null
+++ b/arch/arm64/include/asm/opcodes.h
@@ -0,0 +1,231 @@
+/*
+ *  Copied from arch/arm/include/asm/opcodes.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARM_OPCODES_H
+#define __ASM_ARM_OPCODES_H
+
+#ifndef __ASSEMBLY__
+#include <linux/linkage.h>
+extern asmlinkage unsigned int arm_check_condition(u32 opcode, u64 psr);
+#endif
+
+#define ARM_OPCODE_CONDTEST_FAIL   0
+#define ARM_OPCODE_CONDTEST_PASS   1
+#define ARM_OPCODE_CONDTEST_UNCOND 2
+
+
+/*
+ * Assembler opcode byteswap helpers.
+ * These are only intended for use by this header: don't use them directly,
+ * because they will be suboptimal in most cases.
+ */
+#define ___asm_opcode_swab32(x) (	\
+	  (((x) << 24) & 0xFF000000)	\
+	| (((x) <<  8) & 0x00FF0000)	\
+	| (((x) >>  8) & 0x0000FF00)	\
+	| (((x) >> 24) & 0x000000FF)	\
+)
+#define ___asm_opcode_swab16(x) (	\
+	  (((x) << 8) & 0xFF00)		\
+	| (((x) >> 8) & 0x00FF)		\
+)
+#define ___asm_opcode_swahb32(x) (	\
+	  (((x) << 8) & 0xFF00FF00)	\
+	| (((x) >> 8) & 0x00FF00FF)	\
+)
+#define ___asm_opcode_swahw32(x) (	\
+	  (((x) << 16) & 0xFFFF0000)	\
+	| (((x) >> 16) & 0x0000FFFF)	\
+)
+#define ___asm_opcode_identity32(x) ((x) & 0xFFFFFFFF)
+#define ___asm_opcode_identity16(x) ((x) & 0xFFFF)
+
+
+/*
+ * Opcode byteswap helpers
+ *
+ * These macros help with converting instructions between a canonical integer
+ * format and in-memory representation, in an endianness-agnostic manner.
+ *
+ * __mem_to_opcode_*() convert from in-memory representation to canonical form.
+ * __opcode_to_mem_*() convert from canonical form to in-memory representation.
+ *
+ *
+ * Canonical instruction representation:
+ *
+ *	ARM:		0xKKLLMMNN
+ *	Thumb 16-bit:	0x0000KKLL, where KK < 0xE8
+ *	Thumb 32-bit:	0xKKLLMMNN, where KK >= 0xE8
+ *
+ * There is no way to distinguish an ARM instruction in canonical representation
+ * from a Thumb instruction (just as these cannot be distinguished in memory).
+ * Where this distinction is important, it needs to be tracked separately.
+ *
+ * Note that values in the range 0x0000E800..0xE7FFFFFF intentionally do not
+ * represent any valid Thumb-2 instruction.  For this range,
+ * __opcode_is_thumb32() and __opcode_is_thumb16() will both be false.
+ *
+ * The ___asm variants are intended only for use by this header, in situations
+ * involving inline assembler.  For .S files, the normal __opcode_*() macros
+ * should do the right thing.
+ */
+#ifdef __ASSEMBLY__
+
+#define ___opcode_swab32(x) ___asm_opcode_swab32(x)
+#define ___opcode_swab16(x) ___asm_opcode_swab16(x)
+#define ___opcode_swahb32(x) ___asm_opcode_swahb32(x)
+#define ___opcode_swahw32(x) ___asm_opcode_swahw32(x)
+#define ___opcode_identity32(x) ___asm_opcode_identity32(x)
+#define ___opcode_identity16(x) ___asm_opcode_identity16(x)
+
+#else /* ! __ASSEMBLY__ */
+
+#include <linux/types.h>
+#include <linux/swab.h>
+
+#define ___opcode_swab32(x) swab32(x)
+#define ___opcode_swab16(x) swab16(x)
+#define ___opcode_swahb32(x) swahb32(x)
+#define ___opcode_swahw32(x) swahw32(x)
+#define ___opcode_identity32(x) ((u32)(x))
+#define ___opcode_identity16(x) ((u16)(x))
+
+#endif /* ! __ASSEMBLY__ */
+
+
+#ifdef CONFIG_CPU_ENDIAN_BE8
+
+#define __opcode_to_mem_arm(x) ___opcode_swab32(x)
+#define __opcode_to_mem_thumb16(x) ___opcode_swab16(x)
+#define __opcode_to_mem_thumb32(x) ___opcode_swahb32(x)
+#define ___asm_opcode_to_mem_arm(x) ___asm_opcode_swab32(x)
+#define ___asm_opcode_to_mem_thumb16(x) ___asm_opcode_swab16(x)
+#define ___asm_opcode_to_mem_thumb32(x) ___asm_opcode_swahb32(x)
+
+#else /* ! CONFIG_CPU_ENDIAN_BE8 */
+
+#define __opcode_to_mem_arm(x) ___opcode_identity32(x)
+#define __opcode_to_mem_thumb16(x) ___opcode_identity16(x)
+#define ___asm_opcode_to_mem_arm(x) ___asm_opcode_identity32(x)
+#define ___asm_opcode_to_mem_thumb16(x) ___asm_opcode_identity16(x)
+#ifndef CONFIG_CPU_ENDIAN_BE32
+/*
+ * On BE32 systems, using 32-bit accesses to store Thumb instructions will not
+ * work in all cases, due to alignment constraints.  For now, a correct
+ * version is not provided for BE32.
+ */
+#define __opcode_to_mem_thumb32(x) ___opcode_swahw32(x)
+#define ___asm_opcode_to_mem_thumb32(x) ___asm_opcode_swahw32(x)
+#endif
+
+#endif /* ! CONFIG_CPU_ENDIAN_BE8 */
+
+#define __mem_to_opcode_arm(x) __opcode_to_mem_arm(x)
+#define __mem_to_opcode_thumb16(x) __opcode_to_mem_thumb16(x)
+#ifndef CONFIG_CPU_ENDIAN_BE32
+#define __mem_to_opcode_thumb32(x) __opcode_to_mem_thumb32(x)
+#endif
+
+/* Operations specific to Thumb opcodes */
+
+/* Instruction size checks: */
+#define __opcode_is_thumb32(x) (		\
+	   ((x) & 0xF8000000) == 0xE8000000	\
+	|| ((x) & 0xF0000000) == 0xF0000000	\
+)
+#define __opcode_is_thumb16(x) (					\
+	   ((x) & 0xFFFF0000) == 0					\
+	&& !(((x) & 0xF800) == 0xE800 || ((x) & 0xF000) == 0xF000)	\
+)
+
+/* Operations to construct or split 32-bit Thumb instructions: */
+#define __opcode_thumb32_first(x) (___opcode_identity16((x) >> 16))
+#define __opcode_thumb32_second(x) (___opcode_identity16(x))
+#define __opcode_thumb32_compose(first, second) (			\
+	  (___opcode_identity32(___opcode_identity16(first)) << 16)	\
+	| ___opcode_identity32(___opcode_identity16(second))		\
+)
+#define ___asm_opcode_thumb32_first(x) (___asm_opcode_identity16((x) >> 16))
+#define ___asm_opcode_thumb32_second(x) (___asm_opcode_identity16(x))
+#define ___asm_opcode_thumb32_compose(first, second) (			    \
+	  (___asm_opcode_identity32(___asm_opcode_identity16(first)) << 16) \
+	| ___asm_opcode_identity32(___asm_opcode_identity16(second))	    \
+)
+
+/*
+ * Opcode injection helpers
+ *
+ * In rare cases it is necessary to assemble an opcode which the
+ * assembler does not support directly, or which would normally be
+ * rejected because of the CFLAGS or AFLAGS used to build the affected
+ * file.
+ *
+ * Before using these macros, consider carefully whether it is feasible
+ * instead to change the build flags for your file, or whether it really
+ * makes sense to support old assembler versions when building that
+ * particular kernel feature.
+ *
+ * The macros defined here should only be used where there is no viable
+ * alternative.
+ *
+ *
+ * __inst_arm(x): emit the specified ARM opcode
+ * __inst_thumb16(x): emit the specified 16-bit Thumb opcode
+ * __inst_thumb32(x): emit the specified 32-bit Thumb opcode
+ *
+ * __inst_arm_thumb16(arm, thumb): emit either the specified arm or
+ *	16-bit Thumb opcode, depending on whether an ARM or Thumb-2
+ *	kernel is being built
+ *
+ * __inst_arm_thumb32(arm, thumb): emit either the specified arm or
+ *	32-bit Thumb opcode, depending on whether an ARM or Thumb-2
+ *	kernel is being built
+ *
+ *
+ * Note that using these macros directly is poor practice.  Instead, you
+ * should use them to define human-readable wrapper macros to encode the
+ * instructions that you care about.  In code which might run on ARMv7 or
+ * above, you can usually use the __inst_arm_thumb{16,32} macros to
+ * specify the ARM and Thumb alternatives at the same time.  This ensures
+ * that the correct opcode gets emitted depending on the instruction set
+ * used for the kernel build.
+ *
+ * Look at opcodes-virt.h for an example of how to use these macros.
+ */
+#include <linux/stringify.h>
+
+#define __inst_arm(x) ___inst_arm(___asm_opcode_to_mem_arm(x))
+#define __inst_thumb32(x) ___inst_thumb32(				\
+	___asm_opcode_to_mem_thumb16(___asm_opcode_thumb32_first(x)),	\
+	___asm_opcode_to_mem_thumb16(___asm_opcode_thumb32_second(x))	\
+)
+#define __inst_thumb16(x) ___inst_thumb16(___asm_opcode_to_mem_thumb16(x))
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define __inst_arm_thumb16(arm_opcode, thumb_opcode) \
+	__inst_thumb16(thumb_opcode)
+#define __inst_arm_thumb32(arm_opcode, thumb_opcode) \
+	__inst_thumb32(thumb_opcode)
+#else
+#define __inst_arm_thumb16(arm_opcode, thumb_opcode) __inst_arm(arm_opcode)
+#define __inst_arm_thumb32(arm_opcode, thumb_opcode) __inst_arm(arm_opcode)
+#endif
+
+/* Helpers for the helpers.  Don't use these directly. */
+#ifdef __ASSEMBLY__
+#define ___inst_arm(x) .long x
+#define ___inst_thumb16(x) .short x
+#define ___inst_thumb32(first, second) .short first, second
+#else
+#define ___inst_arm(x) ".long " __stringify(x) "\n\t"
+#define ___inst_thumb16(x) ".short " __stringify(x) "\n\t"
+#define ___inst_thumb32(first, second) \
+	".short " __stringify(first) ", " __stringify(second) "\n\t"
+#endif
+
+#endif /* __ASM_ARM_OPCODES_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7b4b564961d4..d7ff1913c506 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -9,9 +9,9 @@ AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
 			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
-			   hyp-stub.o psci.o
+			   hyp-stub.o psci.o opcodes.o
 
-arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
+arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o	\
 					   sys_compat.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
 arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o smp_psci.o
diff --git a/arch/arm64/kernel/opcodes.c b/arch/arm64/kernel/opcodes.c
new file mode 100644
index 000000000000..ceb5a04a1e12
--- /dev/null
+++ b/arch/arm64/kernel/opcodes.c
@@ -0,0 +1,72 @@
+/*
+ *  Copied from linux/arch/arm/kernel/opcodes.c
+ *
+ *  A32 condition code lookup feature moved from nwfpe/fpopcode.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <asm/opcodes.h>
+
+#define ARM_OPCODE_CONDITION_UNCOND 0xf
+
+/*
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+	0xF0F0,			/* EQ == Z set            */
+	0x0F0F,			/* NE                     */
+	0xCCCC,			/* CS == C set            */
+	0x3333,			/* CC                     */
+	0xFF00,			/* MI == N set            */
+	0x00FF,			/* PL                     */
+	0xAAAA,			/* VS == V set            */
+	0x5555,			/* VC                     */
+	0x0C0C,			/* HI == C set && Z clear */
+	0xF3F3,			/* LS == C clear || Z set */
+	0xAA55,			/* GE == (N==V)           */
+	0x55AA,			/* LT == (N!=V)           */
+	0x0A05,			/* GT == (!Z && (N==V))   */
+	0xF5FA,			/* LE == (Z || (N!=V))    */
+	0xFFFF,			/* AL always              */
+	0			/* NV                     */
+};
+
+/*
+ * Returns:
+ * ARM_OPCODE_CONDTEST_FAIL   - if condition fails
+ * ARM_OPCODE_CONDTEST_PASS   - if condition passes (including AL)
+ * ARM_OPCODE_CONDTEST_UNCOND - if NV condition, or separate unconditional
+ *                              opcode space from v5 onwards
+ *
+ * Code that tests whether a conditional instruction would pass its condition
+ * check should check that return value == ARM_OPCODE_CONDTEST_PASS.
+ *
+ * Code that tests if a condition means that the instruction would be executed
+ * (regardless of conditional or unconditional) should instead check that the
+ * return value != ARM_OPCODE_CONDTEST_FAIL.
+ */
+asmlinkage unsigned int arm_check_condition(u32 opcode, u64 psr)
+{
+	u32 cc_bits  = opcode >> 28;
+	u32 psr_cond = (u32)(psr & 0xffffffff) >> 28;
+	unsigned int ret;
+
+	if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+		if ((cc_map[cc_bits] >> (psr_cond)) & 1)
+			ret = ARM_OPCODE_CONDTEST_PASS;
+		else
+			ret = ARM_OPCODE_CONDTEST_FAIL;
+	} else {
+		ret = ARM_OPCODE_CONDTEST_UNCOND;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(arm_check_condition);

From 99738c2dc1ad034c0b71318b245b063479ed423a Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Wed, 29 Jan 2014 13:45:20 -0800
Subject: [PATCH 0295/1185] arm64: ptrace: add is_wide_instruction() macro

Add the is_wide_instruction() macro. This was copied from the arm
architecture.

Change-Id: I28f83b47f5c587fe778dc2846df77673f8dd918b
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/361737
Reviewed-by: Peng Du <pdu@nvidia.com>
Reviewed-on: http://git-master/r/365060
Reviewed-by: Richard Wiley <rwiley@nvidia.com>
Tested-by: Oskari Jaaskelainen <oskarij@nvidia.com>
---
 arch/arm64/include/asm/ptrace.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 41e59e2459ff..fff28950e660 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -171,5 +171,13 @@ extern unsigned long profile_pc(struct pt_regs *regs);
 #define profile_pc(regs) instruction_pointer(regs)
 #endif
 
+/*
+ * True if instr is a 32-bit thumb instruction. This works if instr
+ * is the first or only half-word of a thumb instruction. It also works
+ * when instr holds all 32-bits of a wide thumb instruction if stored
+ * in the form (first_half<<16)|(second_half)
+ */
+#define is_wide_instruction(instr)	((unsigned)(instr) >= 0xe800)
+
 #endif /* __ASSEMBLY__ */
 #endif

From bc97ba4de74958ead0d40d44d1b4516e7f3de8dd Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Wed, 29 Jan 2014 13:41:01 -0800
Subject: [PATCH 0296/1185] arm64: add undefined instruction handler hooks

Add undefined instruction handler hooks similar to the system in the
arm archetecture. One difference is that hooks can only be added at
boot time and they can never be removed. This removes the need for
the spinlock in the handler.

Change-Id: I4684937f5209ca2a64ee63947bb2ab6411ae14f7
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/361736
Reviewed-on: http://git-master/r/365059
Reviewed-by: Richard Wiley <rwiley@nvidia.com>
Tested-by: Oskari Jaaskelainen <oskarij@nvidia.com>
---
 arch/arm64/include/asm/traps.h | 13 +++++++++++
 arch/arm64/kernel/traps.c      | 40 ++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 10ca8ff93cc2..75f51eadb9eb 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -18,6 +18,19 @@
 #ifndef __ASM_TRAP_H
 #define __ASM_TRAP_H
 
+#include <linux/list.h>
+
+struct undef_hook {
+	struct list_head node;
+	u32 instr_mask;
+	u32 instr_val;
+	u32 pstate_mask;
+	u32 pstate_val;
+	int (*fn)(struct pt_regs *regs, unsigned int instr);
+};
+
+void register_undef_hook(struct undef_hook *hook);
+
 static inline int in_exception_text(unsigned long ptr)
 {
 	extern char __exception_text_start[];
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 7ffadddb645d..4f8bf320ec99 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 1995-2009 Russell King
  * Copyright (C) 2012 ARM Ltd.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -257,15 +258,54 @@ void arm64_notify_die(const char *str, struct pt_regs *regs,
 		die(str, regs, err);
 }
 
+static LIST_HEAD(undef_hook);
+
+void register_undef_hook(struct undef_hook *hook)
+{
+	list_add(&hook->node, &undef_hook);
+}
+
+static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
+{
+	struct undef_hook *hook;
+	int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL;
+
+	list_for_each_entry(hook, &undef_hook, node)
+		if ((instr & hook->instr_mask) == hook->instr_val &&
+		    (regs->pstate & hook->pstate_mask) == hook->pstate_val)
+			fn = hook->fn;
+
+	return fn ? fn(regs, instr) : 1;
+}
+
 asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 {
+	u32 instr;
 	siginfo_t info;
 	void __user *pc = (void __user *)instruction_pointer(regs);
 
 	/* check for AArch32 breakpoint instructions */
 	if (!aarch32_break_handler(regs))
 		return;
+	if (compat_thumb_mode(regs)) {
+		if (get_user(instr, (u16 __user *)pc))
+			goto die_sig;
+		if (is_wide_instruction(instr)) {
+			u32 instr2;
+			if (get_user(instr2, (u16 __user *)pc+1))
+				goto die_sig;
+			instr <<= 16;
+			instr |= instr2;
+		}
 
+	} else if ((get_user(instr, (u32 __user *)pc))) {
+		goto die_sig;
+	}
+
+	if (call_undef_hook(regs, instr) == 0)
+		return;
+
+die_sig:
 	if (show_unhandled_signals && unhandled_signal(current, SIGILL) &&
 	    printk_ratelimit()) {
 		pr_info("%s[%d]: undefined instruction: pc=%p\n",

From b3987a9c4664d18b8c8af6d43e6ae1904c8febce Mon Sep 17 00:00:00 2001
From: Peng Du <pdu@nvidia.com>
Date: Wed, 23 Jul 2014 11:40:33 -0700
Subject: [PATCH 0297/1185] arm64: kernel: check mode for get_user in
 undefinstr

get_user() should be called only for user_mode undef instruction.

Change-Id: Ia654783de0cf72abac6847ac9630236f9f0d6ebb
Signed-off-by: Peng Du <pdu@nvidia.com>
Reviewed-on: http://git-master/r/441348
Reviewed-by: Thomas Cherry <tcherry@nvidia.com>
Reviewed-by: Bo Yan <byan@nvidia.com>
---
 arch/arm64/kernel/traps.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 4f8bf320ec99..0da47699510b 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -287,19 +287,23 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	/* check for AArch32 breakpoint instructions */
 	if (!aarch32_break_handler(regs))
 		return;
-	if (compat_thumb_mode(regs)) {
-		if (get_user(instr, (u16 __user *)pc))
-			goto die_sig;
-		if (is_wide_instruction(instr)) {
-			u32 instr2;
-			if (get_user(instr2, (u16 __user *)pc+1))
+	if (user_mode(regs)) {
+		if (compat_thumb_mode(regs)) {
+			if (get_user(instr, (u16 __user *)pc))
 				goto die_sig;
-			instr <<= 16;
-			instr |= instr2;
+			if (is_wide_instruction(instr)) {
+				u32 instr2;
+				if (get_user(instr2, (u16 __user *)pc+1))
+					goto die_sig;
+				instr <<= 16;
+				instr |= instr2;
+			}
+		} else if (get_user(instr, (u32 __user *)pc)) {
+			goto die_sig;
 		}
-
-	} else if ((get_user(instr, (u32 __user *)pc))) {
-		goto die_sig;
+	} else {
+		/* kernel mode */
+		instr = *((u32 *)pc);
 	}
 
 	if (call_undef_hook(regs, instr) == 0)

From 75079f8dfc7d4b32844a24e30bafa317e0af97ee Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Tue, 11 Feb 2014 10:08:51 -0800
Subject: [PATCH 0298/1185] arm64: a backwards compatible config option

Create a config option that when selected configures the kernel to be
as backwards compatable with kernels that ran on an ARMv7 processor
as possible.

Change-Id: I7cd67e6d4174335f9a67aba2a39dfd993f240c27
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/366094
Reviewed-by: Richard Wiley <rwiley@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
Tested-by: Oskari Jaaskelainen <oskarij@nvidia.com>
---
 arch/arm64/Kconfig | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3ee27d34a020..437dd6cf5ac8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -182,6 +182,16 @@ config HW_PERF_EVENTS
 	  Enable hardware performance counter support for perf events. If
 	  disabled, perf events will use software events only.
 
+config ARMV7_COMPAT
+	bool "Kernel support for ARMv7 applications"
+	depends on COMPAT
+	help
+	 This option enables features that allow that ran on an ARMv7 or older
+	 processor to continue functioning.
+
+	 If you want to execute ARMv7 applications, say Y
+
+
 source "mm/Kconfig"
 
 endmenu

From ecff782253197463f33d719a4fed4bd39c8de469 Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Thu, 30 Jan 2014 15:10:39 -0800
Subject: [PATCH 0299/1185] arm64: emulate the swp/swpb instruction

The swp and spwb instructions were deprecated in ARMv6. ARMv8
obsoleted the instruction. Despite this, many applications rely on
these instruuctions.

This patch starts with the version present in the arm architecture.
However, it uses the ldx*()/stx*() functions to implement the handler
in C code. It also removes a lot of code that is not needed.

Change-Id: I6882fbe5f71bfa8f9e9a75d067b2111188c6f2fa
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/366097
Reviewed-by: Richard Wiley <rwiley@nvidia.com>
Tested-by: Oskari Jaaskelainen <oskarij@nvidia.com>

Conflicts:
	arch/arm64/Kconfig
	arch/arm64/kernel/Makefile
---
 arch/arm64/Kconfig              |  26 ++++++
 arch/arm64/kernel/Makefile      |   3 +
 arch/arm64/kernel/swp_emulate.c | 155 ++++++++++++++++++++++++++++++++
 3 files changed, 184 insertions(+)
 create mode 100644 arch/arm64/kernel/swp_emulate.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 437dd6cf5ac8..787cee384e1c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -152,6 +152,31 @@ config NR_CPUS
 	depends on SMP
 	default "4"
 
+config SWP_EMULATE
+	bool "Emulate SWP/SWPB instructions"
+	help
+	  ARMv6 architecture deprecates use of the SWP/SWPB instructions. ARMv8
+	  oblosetes the use of SWP/SWPB instructions. ARMv7 multiprocessing
+	  extensions introduce the ability to disable these instructions,
+	  triggering an undefined instruction exception when executed. Say Y
+	  here to enable software emulation of these instructions for userspace
+	  (not kernel) using LDREX/STREX. Also creates /proc/cpu/swp_emulation
+	  for statistics.
+
+	  In some older versions of glibc [<=2.8] SWP is used during futex
+	  trylock() operations with the assumption that the code will not
+	  be preempted. This invalid assumption may be more likely to fail
+	  with SWP emulation enabled, leading to deadlock of the user
+	  application.
+
+	  NOTE: when accessing uncached shared regions, LDREX/STREX rely
+	  on an external transaction monitoring block called a global
+	  monitor to maintain update atomicity. If your system does not
+	  implement a global monitor, this option can cause programs that
+	  perform SWP operations to uncached memory to deadlock.
+
+	  If unsure, say Y.
+
 source kernel/Kconfig.preempt
 
 config HZ
@@ -185,6 +210,7 @@ config HW_PERF_EVENTS
 config ARMV7_COMPAT
 	bool "Kernel support for ARMv7 applications"
 	depends on COMPAT
+	select SWP_EMULATE
 	help
 	 This option enables features that allow that ran on an ARMv7 or older
 	 processor to continue functioning.
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index d7ff1913c506..3e6706630ae5 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -18,6 +18,9 @@ arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o smp_psci.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
 arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
 arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+arm64-obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
+
+obj-$(CONFIG_SWP_EMULATE)	+= swp_emulate.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/swp_emulate.c b/arch/arm64/kernel/swp_emulate.c
new file mode 100644
index 000000000000..05ffe59b5bc4
--- /dev/null
+++ b/arch/arm64/kernel/swp_emulate.c
@@ -0,0 +1,155 @@
+/*
+ *  Derived from from linux/arch/arm/kernel/swp_emulate.c
+ *
+ *  Copyright (C) 2009 ARM Limited
+ *  Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Implements emulation of the SWP/SWPB instructions using load-exclusive and
+ *  store-exclusive for processors that have them disabled (or future ones that
+ *  might not implement them).
+ *
+ *  Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>]
+ *  Where: Rt  = destination
+ *	   Rt2 = source
+ *	   Rn  = address
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/perf_event.h>
+
+#include <asm/opcodes.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <asm/system_misc.h>
+#include <linux/debugfs.h>
+
+/*
+ * Macros/defines for extracting register numbers from instruction.
+ */
+#define EXTRACT_REG_NUM(instruction, offset) \
+	(((instruction) & (0xf << (offset))) >> (offset))
+#define RN_OFFSET  16
+#define RT_OFFSET  12
+#define RT2_OFFSET  0
+/*
+ * Bit 22 of the instruction encoding distinguishes between
+ * the SWP and SWPB variants (bit set means SWPB).
+ */
+#define TYPE_SWPB (1 << 22)
+
+static pid_t previous_pid;
+
+u64 swpb_count = 0;
+u64 swp_count = 0;
+
+/*
+ * swp_handler logs the id of calling process, dissects the instruction, sanity
+ * checks the memory location, calls emulate_swpX for the actual operation and
+ * deals with fixup/error handling before returning
+ */
+static int swp_handler(struct pt_regs *regs, unsigned int instr)
+{
+	u32 address_reg, destreg, data, type;
+	uintptr_t address;
+	unsigned int res = 0;
+	u32 temp32;
+	u8 temp8;
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
+
+	res = arm_check_condition(instr, regs->pstate);
+	switch (res) {
+	case ARM_OPCODE_CONDTEST_PASS:
+		break;
+	case ARM_OPCODE_CONDTEST_FAIL:
+		/* Condition failed - return to next instruction */
+		regs->pc += 4;
+		return 0;
+	case ARM_OPCODE_CONDTEST_UNCOND:
+		/* If unconditional encoding - not a SWP, undef */
+		return -EFAULT;
+	default:
+		return -EINVAL;
+	}
+
+	if (current->pid != previous_pid) {
+		pr_warn("\"%s\" (%ld) uses obsolete SWP{B} instruction\n",
+			 current->comm, (unsigned long)current->pid);
+		previous_pid = current->pid;
+	}
+
+	address = regs->regs[EXTRACT_REG_NUM(instr, RN_OFFSET)] & 0xffffffff;
+	data = regs->regs[EXTRACT_REG_NUM(instr, RT2_OFFSET)];
+	destreg = EXTRACT_REG_NUM(instr, RT_OFFSET);
+
+	type = instr & TYPE_SWPB;
+
+	/* Check access in reasonable access range for both SWP and SWPB */
+	if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+		pr_debug("SWP{B} emulation: access to %p not allowed!\n",
+			 (void *)address);
+		res = -EFAULT;
+	}
+	if (type == TYPE_SWPB) {
+		do {
+			temp8 = ldax8((u8 *) address);
+		} while (stx8((u8 *) address, (u8) data));
+		regs->regs[destreg] = temp8;
+		regs->pc += 4;
+		swpb_count++;
+	} else if (address & 0x3) {
+		/* SWP to unaligned address not permitted */
+		pr_debug("SWP instruction on unaligned pointer!\n");
+		return -EFAULT;
+	} else {
+		do {
+			temp32 = ldax32((u32 *) address);
+		} while (stlx32((u32 *) address, (u32) data));
+		regs->regs[destreg] = temp32;
+		regs->pc += 4;
+		swp_count++;
+	}
+
+	return 0;
+}
+
+/*
+ * Only emulate SWP/SWPB executed in ARM state/User mode.
+ * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE.
+ */
+static struct undef_hook swp_hook = {
+	.instr_mask	= 0x0fb00ff0,
+	.instr_val	= 0x01000090,
+	.pstate_mask	= COMPAT_PSR_MODE_MASK | COMPAT_PSR_T_BIT,
+	.pstate_val	= COMPAT_PSR_MODE_USR,
+	.fn		= swp_handler
+};
+
+/*
+ * Register handler and create status file in /proc/cpu
+ * Invoked as late_initcall, since not needed before init spawned.
+ */
+static int __init swp_emulation_init(void)
+{
+	struct dentry *dir;
+	dir = debugfs_create_dir("swp_emulate", NULL);
+	debugfs_create_u64("swp_count", S_IRUGO | S_IWUSR, dir, &swp_count);
+	debugfs_create_u64("swpb_count", S_IRUGO | S_IWUSR, dir, &swpb_count);
+
+	pr_notice("Registering SWP/SWPB emulation handler\n");
+	register_undef_hook(&swp_hook);
+
+
+	return 0;
+}
+
+late_initcall(swp_emulation_init);

From d0c5e1444d3efbe361bb5e9fc0d8dd0aeb6ae2fb Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Tue, 18 Feb 2014 17:50:57 -0800
Subject: [PATCH 0300/1185] arm64: fix a warning and a typo in SWP emulation

The store-release-exclusive is missing the "L" that makes it a
release rather than a normal store-exclusive.

Remove a variable that is not used and causes a compiler warning.

Change-Id: I91633a352b805ed9af450b632c9ee394235637c4
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/369076
Reviewed-by: Richard Wiley <rwiley@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
---
 arch/arm64/kernel/swp_emulate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/swp_emulate.c b/arch/arm64/kernel/swp_emulate.c
index 05ffe59b5bc4..1dd824e56eb4 100644
--- a/arch/arm64/kernel/swp_emulate.c
+++ b/arch/arm64/kernel/swp_emulate.c
@@ -58,7 +58,7 @@ u64 swp_count = 0;
  */
 static int swp_handler(struct pt_regs *regs, unsigned int instr)
 {
-	u32 address_reg, destreg, data, type;
+	u32 destreg, data, type;
 	uintptr_t address;
 	unsigned int res = 0;
 	u32 temp32;
@@ -102,7 +102,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)
 	if (type == TYPE_SWPB) {
 		do {
 			temp8 = ldax8((u8 *) address);
-		} while (stx8((u8 *) address, (u8) data));
+		} while (stlx8((u8 *) address, (u8) data));
 		regs->regs[destreg] = temp8;
 		regs->pc += 4;
 		swpb_count++;

From 70f16b592ee11f74eb0bc2664202b87ee3582bd8 Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Thu, 20 Feb 2014 18:18:53 -0800
Subject: [PATCH 0301/1185] arm64: add fault handling to SWP emulation

Add excpetion table and fixup for SWP/SWPB instruction emulation.
This prevents the kernel from panicing when emulating a SWP/SWPB
instruction that access unmapped memory.

Change-Id: I4a9ca34fa161a0f306cdb663827d9bee39cec733
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/370278
---
 arch/arm64/kernel/swp_emulate.c | 78 ++++++++++++++++++++++++++++++---
 1 file changed, 72 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/swp_emulate.c b/arch/arm64/kernel/swp_emulate.c
index 1dd824e56eb4..b1a176bdede3 100644
--- a/arch/arm64/kernel/swp_emulate.c
+++ b/arch/arm64/kernel/swp_emulate.c
@@ -32,6 +32,71 @@
 #include <asm/system_misc.h>
 #include <linux/debugfs.h>
 
+/*
+ * Error-checking SWP macros implemented using ldrex{b}/strex{b}
+ */
+
+static int swpb(u8 in, u8 *out, u8 *addr)
+{
+	u8 _out;
+	int res;
+	int err = 0;
+
+	do {
+		__asm__ __volatile__(
+		"0:	ldxrb	%w1, %4\n"
+		"1:	stxrb	%w0, %w3, %4\n"
+		"2:\n"
+		"	.section	 .fixup,\"ax\"\n"
+		"	.align		2\n"
+		"3:	mov	%w2, %5\n"
+		"	b	2b\n"
+		"	.previous\n"
+		"	.section	 __ex_table,\"a\"\n"
+		"	.align		3\n"
+		"	.quad		0b, 3b\n"
+		"	.quad		1b, 3b\n"
+		"	.previous"
+		: "=&r" (res), "=r" (_out), "=r" (err)
+		: "r" (in), "Q" (addr), "i" (-EFAULT)
+		: "cc", "memory");
+	} while (err == 0 && res != 0);
+
+	if (err == 0)
+		*out = _out;
+	return err;
+}
+
+static int swp(u32 in, u32 *out, u32 *addr)
+{
+	u32 _out;
+	int res;
+	int err = 0;
+
+	do {
+		__asm__ __volatile__(
+		"0:	ldxr	%w1, %4\n"
+		"1:	stxr	%w0, %w3, %4\n"
+		"2:\n"
+		"	.section	 .fixup,\"ax\"\n"
+		"	.align		2\n"
+		"3:	mov	%w2, %5\n"
+		"	b	2b\n"
+		"	.previous\n"
+		"	.section	 __ex_table,\"a\"\n"
+		"	.align		3\n"
+		"	.quad		0b, 3b\n"
+		"	.quad		1b, 3b\n"
+		"	.previous"
+		: "=&r" (res), "=r" (_out), "=r" (err)
+		: "r" (in), "Q" (addr), "i" (-EFAULT)
+		: "cc", "memory");
+	} while (err == 0 && res != 0);
+
+	if (err == 0)
+		*out = _out;
+	return err;
+}
 /*
  * Macros/defines for extracting register numbers from instruction.
  */
@@ -61,6 +126,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)
 	u32 destreg, data, type;
 	uintptr_t address;
 	unsigned int res = 0;
+	int err;
 	u32 temp32;
 	u8 temp8;
 
@@ -100,9 +166,9 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)
 		res = -EFAULT;
 	}
 	if (type == TYPE_SWPB) {
-		do {
-			temp8 = ldax8((u8 *) address);
-		} while (stlx8((u8 *) address, (u8) data));
+		err = swpb((u8) data, &temp8, (u8 *) address);
+		if (err)
+			return err;
 		regs->regs[destreg] = temp8;
 		regs->pc += 4;
 		swpb_count++;
@@ -111,9 +177,9 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)
 		pr_debug("SWP instruction on unaligned pointer!\n");
 		return -EFAULT;
 	} else {
-		do {
-			temp32 = ldax32((u32 *) address);
-		} while (stlx32((u32 *) address, (u32) data));
+		err = swp((u32) data, &temp32, (u32 *) address);
+		if (err)
+			return err;
 		regs->regs[destreg] = temp32;
 		regs->pc += 4;
 		swp_count++;

From ff5878c3916bdfc1229eeade86728814c4dd4e4f Mon Sep 17 00:00:00 2001
From: Rich Wiley <rwiley@nvidia.com>
Date: Mon, 10 Mar 2014 14:01:06 -0700
Subject: [PATCH 0302/1185] arm64: fix SWP instruction emulation

initial variable values may get overwritten
if they're listed as an output in ASM, even if
they're not explicitly written to.

Change-Id: I2a239e1819850a2a7005a46e83d82deac4ca303b
Signed-off-by: Rich Wiley <rwiley@nvidia.com>
Reviewed-on: http://git-master/r/379646
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Li Li (SW-TEGRA) <lli5@nvidia.com>
Tested-by: Li Li (SW-TEGRA) <lli5@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alexander Van Brunt <avanbrunt@nvidia.com>
---
 arch/arm64/kernel/swp_emulate.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/swp_emulate.c b/arch/arm64/kernel/swp_emulate.c
index b1a176bdede3..508fd2edb8ab 100644
--- a/arch/arm64/kernel/swp_emulate.c
+++ b/arch/arm64/kernel/swp_emulate.c
@@ -40,12 +40,13 @@ static int swpb(u8 in, u8 *out, u8 *addr)
 {
 	u8 _out;
 	int res;
-	int err = 0;
+	int err;
 
 	do {
 		__asm__ __volatile__(
 		"0:	ldxrb	%w1, %4\n"
 		"1:	stxrb	%w0, %w3, %4\n"
+		"	mov	%w2, #0\n"
 		"2:\n"
 		"	.section	 .fixup,\"ax\"\n"
 		"	.align		2\n"
@@ -58,7 +59,7 @@ static int swpb(u8 in, u8 *out, u8 *addr)
 		"	.quad		1b, 3b\n"
 		"	.previous"
 		: "=&r" (res), "=r" (_out), "=r" (err)
-		: "r" (in), "Q" (addr), "i" (-EFAULT)
+		: "r" (in), "Q" (*addr), "i" (-EFAULT)
 		: "cc", "memory");
 	} while (err == 0 && res != 0);
 
@@ -77,6 +78,7 @@ static int swp(u32 in, u32 *out, u32 *addr)
 		__asm__ __volatile__(
 		"0:	ldxr	%w1, %4\n"
 		"1:	stxr	%w0, %w3, %4\n"
+		"	mov	%w2, #0\n"
 		"2:\n"
 		"	.section	 .fixup,\"ax\"\n"
 		"	.align		2\n"
@@ -89,7 +91,7 @@ static int swp(u32 in, u32 *out, u32 *addr)
 		"	.quad		1b, 3b\n"
 		"	.previous"
 		: "=&r" (res), "=r" (_out), "=r" (err)
-		: "r" (in), "Q" (addr), "i" (-EFAULT)
+		: "r" (in), "Q" (*addr), "i" (-EFAULT)
 		: "cc", "memory");
 	} while (err == 0 && res != 0);
 

From 80cb26c175627cb9633aeae13adc8455450bf77a Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Tue, 28 Jan 2014 12:40:10 -0800
Subject: [PATCH 0303/1185] arm64: optionally set CP15BEN in SCTLR

Setting CP15BEN allows legacy applications running in AArch32 mode
that use CP15 DMB as similar instructions to continue running.

Change-Id: If76d3c6ee12865ff8c4b4e7aed01146bead87773
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/366096
Reviewed-by: Richard Wiley <rwiley@nvidia.com>
Tested-by: Oskari Jaaskelainen <oskarij@nvidia.com>
---
 arch/arm64/Kconfig   | 11 +++++++++++
 arch/arm64/mm/proc.S | 15 +++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 787cee384e1c..0df94f10cda6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -218,6 +218,17 @@ config ARMV7_COMPAT
 	 If you want to execute ARMv7 applications, say Y
 
 
+config ARMV7_COMPAT_CP15_BARRIER
+	bool "Allow applications to use the CP15 barrier operations"
+	depends on ARMV7_COMPAT
+	default y
+	help
+	 This option allows applications to use deprecated CP15 barrier
+	 instructions. This is useful because this was the only way to create
+	 a barrier on older ARM processors.
+
+	 If you want to execute ARMv7 applications, say Y
+
 source "mm/Kconfig"
 
 endmenu
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 9428de8a8f37..48fffb27a7bf 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -160,6 +160,20 @@ ENTRY(__cpu_setup)
 	ret					// return to head.S
 ENDPROC(__cpu_setup)
 
+#ifdef CONFIG_ARMV7_COMPAT_CP15_BARRIER
+	/*
+	 *                 n n            T
+	 *       U E      WT T UD     US IHBS
+	 *       CE0      XWHW CZ     ME TEEA S
+	 * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM
+	 * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved
+	 * .... .100 .... 01.1 11.1 ..01 0011 1101 < software settings
+	 */
+	.type	crval, #object
+crval:
+	.word	0x030802e2			// clear
+	.word	0x0405d13d			// set
+#else
 	/*
 	 *                 n n            T
 	 *       U E      WT T UD     US IHBS
@@ -172,3 +186,4 @@ ENDPROC(__cpu_setup)
 crval:
 	.word	0x030802e2			// clear
 	.word	0x0405d11d			// set
+#endif

From bad15588d39c24ecb76593f632a0ab5d71ace7ed Mon Sep 17 00:00:00 2001
From: Rich Wiley <rwiley@nvidia.com>
Date: Wed, 4 Jun 2014 11:41:53 -0700
Subject: [PATCH 0304/1185] arm64: make SCTLR compat config depend on
 CONFIG_ARMV7_COMPAT

Conflicts:
	arch/arm64/mm/proc.S

Change-Id: I76e0067839c96e3082b42c80d3fc670cf3d371b5
Signed-off-by: Rich Wiley <rwiley@nvidia.com>
Reviewed-on: http://git-master/r/422173
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alexander Van Brunt <avanbrunt@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
Tested-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 arch/arm64/Kconfig   | 11 -----------
 arch/arm64/mm/proc.S |  2 +-
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0df94f10cda6..787cee384e1c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -218,17 +218,6 @@ config ARMV7_COMPAT
 	 If you want to execute ARMv7 applications, say Y
 
 
-config ARMV7_COMPAT_CP15_BARRIER
-	bool "Allow applications to use the CP15 barrier operations"
-	depends on ARMV7_COMPAT
-	default y
-	help
-	 This option allows applications to use deprecated CP15 barrier
-	 instructions. This is useful because this was the only way to create
-	 a barrier on older ARM processors.
-
-	 If you want to execute ARMv7 applications, say Y
-
 source "mm/Kconfig"
 
 endmenu
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 48fffb27a7bf..9796b33a708b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -160,7 +160,7 @@ ENTRY(__cpu_setup)
 	ret					// return to head.S
 ENDPROC(__cpu_setup)
 
-#ifdef CONFIG_ARMV7_COMPAT_CP15_BARRIER
+#ifdef CONFIG_ARMV7_COMPAT
 	/*
 	 *                 n n            T
 	 *       U E      WT T UD     US IHBS

From 2e0602939baf22b8f9057f7626c189248383d4ae Mon Sep 17 00:00:00 2001
From: Rich Wiley <rwiley@nvidia.com>
Date: Wed, 4 Jun 2014 11:44:03 -0700
Subject: [PATCH 0305/1185] arm64: enable deprecated SETEND instruction in
 SCTLR compat config

Change-Id: I703d4843f8aab2ec63324f04cc13aaabae88e163
Signed-off-by: Rich Wiley <rwiley@nvidia.com>
Reviewed-on: http://git-master/r/422174
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alexander Van Brunt <avanbrunt@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
Tested-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 arch/arm64/mm/proc.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 9796b33a708b..3e5dcd9897db 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -172,7 +172,7 @@ ENDPROC(__cpu_setup)
 	.type	crval, #object
 crval:
 	.word	0x030802e2			// clear
-	.word	0x0405d13d			// set
+	.word	0x0405d03d			// set
 #else
 	/*
 	 *                 n n            T

From cba0c6b2913c0d075a7434025f5dc29cd813707f Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Thu, 9 Jan 2014 12:51:05 -0800
Subject: [PATCH 0306/1185] arm64: cpuinfo: ARMv7 compatable cpuinfo option

To be backwards compatable with the output of cpuinfo on an ARMv7,
print the features that were optional in ARMv7 but are required in
ARMv8.

Change-Id: Ic728f71be4a971adc79ef552f25cfbf95a4dac29
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/366095
Reviewed-by: Richard Wiley <rwiley@nvidia.com>
Tested-by: Oskari Jaaskelainen <oskarij@nvidia.com>
---
 arch/arm64/Kconfig        | 10 ++++++++++
 arch/arm64/kernel/setup.c |  7 ++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 787cee384e1c..71c2a070ace4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -217,6 +217,16 @@ config ARMV7_COMPAT
 
 	 If you want to execute ARMv7 applications, say Y
 
+config ARMV7_COMPAT_CPUINFO
+	bool "Report backwards compatible cpu features in /proc/cpuinfo"
+	depends on ARMV7_COMPAT
+	default y
+	help
+	 This option makes /proc/cpuinfo list CPU features that an ARMv7 or
+	 earlier kernel would report, but are not optional on an ARMv8 or later
+	 processor.
+
+	 If you want to execute ARMv7 applications, say Y
 
 source "mm/Kconfig"
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 3a06f1aa0902..efe4850da9d0 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -420,9 +420,14 @@ static int c_show(struct seq_file *m, void *v)
 	for (i = 0; hwcap_str[i]; i++)
 		if (elf_hwcap & (1 << i))
 			seq_printf(m, "%s ", hwcap_str[i]);
+#ifdef CONFIG_ARMV7_COMPAT_CPUINFO
+	/* Print out the non-optional ARMv8 HW capabilities */
+	seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3d16 tlsi ");
+	seq_printf(m, "vfpv4 idiva idivt ");
+#endif
 
 	seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24);
-	seq_printf(m, "CPU architecture: AArch64\n");
+	seq_printf(m, "CPU architecture: 8\n");
 	seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15);
 	seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff);
 	seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15);

From 779a6cf48cf313582b8d88757a798866a29195d0 Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Thu, 20 Feb 2014 10:46:21 -0800
Subject: [PATCH 0307/1185] arm64: report vfpv3 instead of vfpv3d16

vfpv3 is the correct version for an ARMv8 processor and it is the
version reported by an A15.

Change-Id: I486f3af21a352c27775888cca332a48d7e0c59ce
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/370076
---
 arch/arm64/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index efe4850da9d0..18fcb7ca9b95 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -422,7 +422,7 @@ static int c_show(struct seq_file *m, void *v)
 			seq_printf(m, "%s ", hwcap_str[i]);
 #ifdef CONFIG_ARMV7_COMPAT_CPUINFO
 	/* Print out the non-optional ARMv8 HW capabilities */
-	seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3d16 tlsi ");
+	seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3 tlsi ");
 	seq_printf(m, "vfpv4 idiva idivt ");
 #endif
 

From 3868e7f8d47992922756d1aa6590f0d556c669b8 Mon Sep 17 00:00:00 2001
From: Greg Hackmann <ghackmann@google.com>
Date: Tue, 5 Aug 2014 16:14:27 -0700
Subject: [PATCH 0308/1185] arm64: restrict effects of ARMV7_COMPAT_CPUINFO to
 ARMv7 tasks

Since ARMV7_COMPAT_CPUINFO only exists to support existing ARMv7
binaries, restrict its effects to compat tasks

Bug: 16819658

Change-Id: I1092de596c7822d23f5f3f8a05b417a3cb49f593
Signed-off-by: Greg Hackmann <ghackmann@google.com>
---
 arch/arm64/kernel/setup.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 18fcb7ca9b95..06aeec407b20 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -421,13 +421,19 @@ static int c_show(struct seq_file *m, void *v)
 		if (elf_hwcap & (1 << i))
 			seq_printf(m, "%s ", hwcap_str[i]);
 #ifdef CONFIG_ARMV7_COMPAT_CPUINFO
-	/* Print out the non-optional ARMv8 HW capabilities */
-	seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3 tlsi ");
-	seq_printf(m, "vfpv4 idiva idivt ");
+	if (is_compat_task()) {
+		/* Print out the non-optional ARMv8 HW capabilities */
+		seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3 tlsi ");
+		seq_printf(m, "vfpv4 idiva idivt ");
+	}
 #endif
 
 	seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24);
-	seq_printf(m, "CPU architecture: 8\n");
+	seq_printf(m, "CPU architecture: %s\n",
+#if IS_ENABLED(CONFIG_ARMV7_COMPAT_CPUINFO)
+			is_compat_task() ? "8" :
+#endif
+			"AArch64");
 	seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15);
 	seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff);
 	seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15);

From 91e8ffa66ead9f85ff86bdcb03042ada441acf78 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 28 Feb 2014 14:42:55 +0100
Subject: [PATCH 0309/1185] arm64: add support for reserved memory defined by
 device tree

Enable reserved memory initialization from device tree.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
(cherry picked from commit 9bf14b7c540ae9ca7747af3a0c0d8470ef77b6ce)
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm64/Kconfig   | 1 +
 arch/arm64/mm/init.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 21201a0cb3f7..43068cf44c2d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -41,6 +41,7 @@ config ARM64
 	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
+	select OF_RESERVED_MEM
 	select PERF_USE_VMALLOC
 	select POWER_RESET
 	select POWER_SUPPLY
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 52806427e15d..81bdd29df3a4 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -170,6 +170,7 @@ void __init arm64_memblock_init(void)
 		memblock_reserve(base, size);
 	}
 
+	early_init_fdt_scan_reserved_mem();
 	dma_contiguous_reserve(0);
 
 	memblock_allow_resize();

From 39fdc30ff59d36904e8f25361b07d6c88e04b078 Mon Sep 17 00:00:00 2001
From: Greg Hackmann <ghackmann@google.com>
Date: Mon, 15 Sep 2014 15:34:55 -0700
Subject: [PATCH 0310/1185] android: base-cfg: enable ARMV7_COMPAT

Enables backwards-compatibility features on arm64, and has no effect
(does not exist) on other architectures

Change-Id: I6fc2f6567437750a0032f8a39a9cde1fb92d4ef4
Signed-off-by: Greg Hackmann <ghackmann@google.com>
---
 android/configs/android-base.cfg | 1 +
 1 file changed, 1 insertion(+)

diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg
index bdc4749a195f..d8503e450957 100644
--- a/android/configs/android-base.cfg
+++ b/android/configs/android-base.cfg
@@ -6,6 +6,7 @@ CONFIG_ANDROID=y
 CONFIG_ANDROID_BINDER_IPC=y
 CONFIG_ANDROID_INTF_ALARM_DEV=y
 CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_ARMV7_COMPAT=y
 CONFIG_ASHMEM=y
 CONFIG_BLK_DEV_DM=y
 CONFIG_BLK_DEV_INITRD=y

From ce1c89d450af950db7d381df8ba8025736a44c8b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <m.chehab@samsung.com>
Date: Mon, 21 Jul 2014 14:21:18 -0300
Subject: [PATCH 0311/1185] media: xc5000: Fix get_frequency()

commit a3eec916cbc17dc1aaa3ddf120836cd5200eb4ef upstream.

The programmed frequency on xc5000 is not the middle
frequency, but the initial frequency on the bandwidth range.
However, the DVB API works with the middle frequency.

Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/tuners/xc5000.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/media/tuners/xc5000.c b/drivers/media/tuners/xc5000.c
index 5cd09a681b6a..b2d9e9cb97f7 100644
--- a/drivers/media/tuners/xc5000.c
+++ b/drivers/media/tuners/xc5000.c
@@ -55,7 +55,7 @@ struct xc5000_priv {
 
 	u32 if_khz;
 	u16 xtal_khz;
-	u32 freq_hz;
+	u32 freq_hz, freq_offset;
 	u32 bandwidth;
 	u8  video_standard;
 	u8  rf_mode;
@@ -755,13 +755,13 @@ static int xc5000_set_params(struct dvb_frontend *fe)
 	case SYS_ATSC:
 		dprintk(1, "%s() VSB modulation\n", __func__);
 		priv->rf_mode = XC_RF_MODE_AIR;
-		priv->freq_hz = freq - 1750000;
+		priv->freq_offset = 1750000;
 		priv->video_standard = DTV6;
 		break;
 	case SYS_DVBC_ANNEX_B:
 		dprintk(1, "%s() QAM modulation\n", __func__);
 		priv->rf_mode = XC_RF_MODE_CABLE;
-		priv->freq_hz = freq - 1750000;
+		priv->freq_offset = 1750000;
 		priv->video_standard = DTV6;
 		break;
 	case SYS_ISDBT:
@@ -776,15 +776,15 @@ static int xc5000_set_params(struct dvb_frontend *fe)
 		switch (bw) {
 		case 6000000:
 			priv->video_standard = DTV6;
-			priv->freq_hz = freq - 1750000;
+			priv->freq_offset = 1750000;
 			break;
 		case 7000000:
 			priv->video_standard = DTV7;
-			priv->freq_hz = freq - 2250000;
+			priv->freq_offset = 2250000;
 			break;
 		case 8000000:
 			priv->video_standard = DTV8;
-			priv->freq_hz = freq - 2750000;
+			priv->freq_offset = 2750000;
 			break;
 		default:
 			printk(KERN_ERR "xc5000 bandwidth not set!\n");
@@ -798,15 +798,15 @@ static int xc5000_set_params(struct dvb_frontend *fe)
 		priv->rf_mode = XC_RF_MODE_CABLE;
 		if (bw <= 6000000) {
 			priv->video_standard = DTV6;
-			priv->freq_hz = freq - 1750000;
+			priv->freq_offset = 1750000;
 			b = 6;
 		} else if (bw <= 7000000) {
 			priv->video_standard = DTV7;
-			priv->freq_hz = freq - 2250000;
+			priv->freq_offset = 2250000;
 			b = 7;
 		} else {
 			priv->video_standard = DTV7_8;
-			priv->freq_hz = freq - 2750000;
+			priv->freq_offset = 2750000;
 			b = 8;
 		}
 		dprintk(1, "%s() Bandwidth %dMHz (%d)\n", __func__,
@@ -817,6 +817,8 @@ static int xc5000_set_params(struct dvb_frontend *fe)
 		return -EINVAL;
 	}
 
+	priv->freq_hz = freq - priv->freq_offset;
+
 	dprintk(1, "%s() frequency=%d (compensated to %d)\n",
 		__func__, freq, priv->freq_hz);
 
@@ -1067,7 +1069,7 @@ static int xc5000_get_frequency(struct dvb_frontend *fe, u32 *freq)
 {
 	struct xc5000_priv *priv = fe->tuner_priv;
 	dprintk(1, "%s()\n", __func__);
-	*freq = priv->freq_hz;
+	*freq = priv->freq_hz + priv->freq_offset;
 	return 0;
 }
 

From d2b8c8c9e5ea79818a1496f8838ea1c7020ec34f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <m.chehab@samsung.com>
Date: Mon, 21 Jul 2014 13:28:15 -0300
Subject: [PATCH 0312/1185] media: xc4000: Fix get_frequency()

commit 4c07e32884ab69574cfd9eb4de3334233c938071 upstream.

The programmed frequency on xc4000 is not the middle
frequency, but the initial frequency on the bandwidth range.
However, the DVB API works with the middle frequency.

This works fine on set_frontend, as the device calculates
the needed offset. However, at get_frequency(), the returned
value is the initial frequency. That's generally not a big
problem on most drivers, however, starting with changeset
6fe1099c7aec, the frequency drift is taken into account at
dib7000p driver.

This broke support for PCTV 340e, with uses dib7000p demod and
xc4000 tuner.

Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/tuners/xc4000.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/media/tuners/xc4000.c b/drivers/media/tuners/xc4000.c
index 2018befabb5a..e71decbfd0af 100644
--- a/drivers/media/tuners/xc4000.c
+++ b/drivers/media/tuners/xc4000.c
@@ -93,7 +93,7 @@ struct xc4000_priv {
 	struct firmware_description *firm;
 	int	firm_size;
 	u32	if_khz;
-	u32	freq_hz;
+	u32	freq_hz, freq_offset;
 	u32	bandwidth;
 	u8	video_standard;
 	u8	rf_mode;
@@ -1157,14 +1157,14 @@ static int xc4000_set_params(struct dvb_frontend *fe)
 	case SYS_ATSC:
 		dprintk(1, "%s() VSB modulation\n", __func__);
 		priv->rf_mode = XC_RF_MODE_AIR;
-		priv->freq_hz = c->frequency - 1750000;
+		priv->freq_offset = 1750000;
 		priv->video_standard = XC4000_DTV6;
 		type = DTV6;
 		break;
 	case SYS_DVBC_ANNEX_B:
 		dprintk(1, "%s() QAM modulation\n", __func__);
 		priv->rf_mode = XC_RF_MODE_CABLE;
-		priv->freq_hz = c->frequency - 1750000;
+		priv->freq_offset = 1750000;
 		priv->video_standard = XC4000_DTV6;
 		type = DTV6;
 		break;
@@ -1173,23 +1173,23 @@ static int xc4000_set_params(struct dvb_frontend *fe)
 		dprintk(1, "%s() OFDM\n", __func__);
 		if (bw == 0) {
 			if (c->frequency < 400000000) {
-				priv->freq_hz = c->frequency - 2250000;
+				priv->freq_offset = 2250000;
 			} else {
-				priv->freq_hz = c->frequency - 2750000;
+				priv->freq_offset = 2750000;
 			}
 			priv->video_standard = XC4000_DTV7_8;
 			type = DTV78;
 		} else if (bw <= 6000000) {
 			priv->video_standard = XC4000_DTV6;
-			priv->freq_hz = c->frequency - 1750000;
+			priv->freq_offset = 1750000;
 			type = DTV6;
 		} else if (bw <= 7000000) {
 			priv->video_standard = XC4000_DTV7;
-			priv->freq_hz = c->frequency - 2250000;
+			priv->freq_offset = 2250000;
 			type = DTV7;
 		} else {
 			priv->video_standard = XC4000_DTV8;
-			priv->freq_hz = c->frequency - 2750000;
+			priv->freq_offset = 2750000;
 			type = DTV8;
 		}
 		priv->rf_mode = XC_RF_MODE_AIR;
@@ -1200,6 +1200,8 @@ static int xc4000_set_params(struct dvb_frontend *fe)
 		goto fail;
 	}
 
+	priv->freq_hz = c->frequency - priv->freq_offset;
+
 	dprintk(1, "%s() frequency=%d (compensated)\n",
 		__func__, priv->freq_hz);
 
@@ -1520,7 +1522,7 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq)
 {
 	struct xc4000_priv *priv = fe->tuner_priv;
 
-	*freq = priv->freq_hz;
+	*freq = priv->freq_hz + priv->freq_offset;
 
 	if (debug) {
 		mutex_lock(&priv->lock);

From f3e8f2718d3fe6bf8dd845ce09e581d5e9772158 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <m.chehab@samsung.com>
Date: Sun, 8 Jun 2014 13:54:57 -0300
Subject: [PATCH 0313/1185] media: au0828: Only alt setting logic when needed

commit 64ea37bbd8a5815522706f0099ad3f11c7537e15 upstream.

It seems that there's a bug at au0828 hardware/firmware
related to alternate setting: when the device is already at
alt 5, a further call causes the URBs to receive -ESHUTDOWN.

I found two different encarnations of this issue:

1) at qv4l2, it fails the second time we try to open the
video screen;
2) at xawtv, when audio underrun occurs, with is very
frequent, at least on my test machine.

The fix is simple: just check if alt=5 before calling
set_usb_interface().

Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/au0828/au0828-video.c | 34 ++++++++++++-------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c
index 75ac9947cdac..98e1b937b500 100644
--- a/drivers/media/usb/au0828/au0828-video.c
+++ b/drivers/media/usb/au0828/au0828-video.c
@@ -788,11 +788,27 @@ static int au0828_i2s_init(struct au0828_dev *dev)
 
 /*
  * Auvitek au0828 analog stream enable
- * Please set interface0 to AS5 before enable the stream
  */
 static int au0828_analog_stream_enable(struct au0828_dev *d)
 {
+	struct usb_interface *iface;
+	int ret;
+
 	dprintk(1, "au0828_analog_stream_enable called\n");
+
+	iface = usb_ifnum_to_if(d->usbdev, 0);
+	if (iface && iface->cur_altsetting->desc.bAlternateSetting != 5) {
+		dprintk(1, "Changing intf#0 to alt 5\n");
+		/* set au0828 interface0 to AS5 here again */
+		ret = usb_set_interface(d->usbdev, 0, 5);
+		if (ret < 0) {
+			printk(KERN_INFO "Au0828 can't set alt setting to 5!\n");
+			return -EBUSY;
+		}
+	}
+
+	/* FIXME: size should be calculated using d->width, d->height */
+
 	au0828_writereg(d, AU0828_SENSORCTRL_VBI_103, 0x00);
 	au0828_writereg(d, 0x106, 0x00);
 	/* set x position */
@@ -1003,15 +1019,6 @@ static int au0828_v4l2_open(struct file *filp)
 		return -ERESTARTSYS;
 	}
 	if (dev->users == 0) {
-		/* set au0828 interface0 to AS5 here again */
-		ret = usb_set_interface(dev->usbdev, 0, 5);
-		if (ret < 0) {
-			mutex_unlock(&dev->lock);
-			printk(KERN_INFO "Au0828 can't set alternate to 5!\n");
-			kfree(fh);
-			return -EBUSY;
-		}
-
 		au0828_analog_stream_enable(dev);
 		au0828_analog_stream_reset(dev);
 
@@ -1253,13 +1260,6 @@ static int au0828_set_format(struct au0828_dev *dev, unsigned int cmd,
 		}
 	}
 
-	/* set au0828 interface0 to AS5 here again */
-	ret = usb_set_interface(dev->usbdev, 0, 5);
-	if (ret < 0) {
-		printk(KERN_INFO "Au0828 can't set alt setting to 5!\n");
-		return -EBUSY;
-	}
-
 	au0828_analog_stream_enable(dev);
 
 	return 0;

From 01d29ff71d3b123c443f8426d5540462e9d04b19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Salva=20Peir=C3=B3?= <speiro@ai2.upv.es>
Date: Sat, 7 Jun 2014 11:41:44 -0300
Subject: [PATCH 0314/1185] media: media-device: Remove duplicated memset() in
 media_enum_entities()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit f8ca6ac00d2ba24c5557f08f81439cd3432f0802 upstream.

After the zeroing the whole struct struct media_entity_desc u_ent,
it is no longer necessary to memset(0) its u_ent.name field.

Signed-off-by: Salva Peiró <speiro@ai2.upv.es>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/media-device.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c
index 79715f9feb0a..fdb5840f034b 100644
--- a/drivers/media/media-device.c
+++ b/drivers/media/media-device.c
@@ -106,8 +106,6 @@ static long media_device_enum_entities(struct media_device *mdev,
 	if (ent->name) {
 		strncpy(u_ent.name, ent->name, sizeof(u_ent.name));
 		u_ent.name[sizeof(u_ent.name) - 1] = '\0';
-	} else {
-		memset(u_ent.name, 0, sizeof(u_ent.name));
 	}
 	u_ent.type = ent->type;
 	u_ent.revision = ent->revision;

From b15dba9397aae1e091b047c9e3214bfbd1d17e04 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 5 Aug 2014 17:50:15 +0200
Subject: [PATCH 0315/1185] iommu/amd: Fix cleanup_domain for mass device
 removal

commit 9b29d3c6510407d91786c1cf9183ff4debb3473a upstream.

When multiple devices are detached in __detach_device, they
are also removed from the domains dev_list. This makes it
unsafe to use list_for_each_entry_safe, as the next pointer
might also not be in the list anymore after __detach_device
returns. So just repeatedly remove the first element of the
list until it is empty.

Tested-by: Marti Raudsepp <marti@juffo.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/amd_iommu.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 6f849cbcac6f..dfb401cba733 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3187,14 +3187,16 @@ int __init amd_iommu_init_dma_ops(void)
 
 static void cleanup_domain(struct protection_domain *domain)
 {
-	struct iommu_dev_data *dev_data, *next;
+	struct iommu_dev_data *entry;
 	unsigned long flags;
 
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 
-	list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
-		__detach_device(dev_data);
-		atomic_set(&dev_data->bind, 0);
+	while (!list_empty(&domain->dev_list)) {
+		entry = list_first_entry(&domain->dev_list,
+					 struct iommu_dev_data, list);
+		__detach_device(entry);
+		atomic_set(&entry->bind, 0);
 	}
 
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);

From bd09037436bbd28f75e21bc7e630725f4902c084 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sun, 27 Jul 2014 23:53:19 +0200
Subject: [PATCH 0316/1185] spi: orion: fix incorrect handling of cell-index DT
 property

commit e06871cd2c92e5c65d7ca1d32866b4ca5dd4ac30 upstream.

In commit f814f9ac5a81 ("spi/orion: add device tree binding"), Device
Tree support was added to the spi-orion driver. However, this commit
reads the "cell-index" property, without taking into account the fact
that DT properties are big-endian encoded.

Since most of the platforms using spi-orion with DT have apparently
not used anything but cell-index = <0>, the problem was not
visible. But as soon as one starts using cell-index = <1>, the problem
becomes clearly visible, as the master->bus_num gets a wrong value
(actually it gets the value 0, which conflicts with the first bus that
has cell-index = <0>).

This commit fixes that by using of_property_read_u32() to read the
property value, which does the appropriate endianness conversion when
needed.

Fixes: f814f9ac5a81 ("spi/orion: add device tree binding")
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-orion.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index 66a5f82cf138..183aa80c9017 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -403,8 +403,6 @@ static int orion_spi_probe(struct platform_device *pdev)
 	struct resource *r;
 	unsigned long tclk_hz;
 	int status = 0;
-	const u32 *iprop;
-	int size;
 
 	master = spi_alloc_master(&pdev->dev, sizeof *spi);
 	if (master == NULL) {
@@ -415,10 +413,10 @@ static int orion_spi_probe(struct platform_device *pdev)
 	if (pdev->id != -1)
 		master->bus_num = pdev->id;
 	if (pdev->dev.of_node) {
-		iprop = of_get_property(pdev->dev.of_node, "cell-index",
-					&size);
-		if (iprop && size == sizeof(*iprop))
-			master->bus_num = *iprop;
+		u32 cell_index;
+		if (!of_property_read_u32(pdev->dev.of_node, "cell-index",
+					  &cell_index))
+			master->bus_num = cell_index;
 	}
 
 	/* we support only mode 0, and no options */

From abcc94f8395a956d8aa9188aa5ffb66cba90738c Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@animalcreek.com>
Date: Tue, 1 Jul 2014 20:28:32 -0700
Subject: [PATCH 0317/1185] spi: omap2-mcspi: Configure hardware when slave
 driver changes mode

commit 97ca0d6cc118716840ea443e010cb3d5f2d25eaf upstream.

Commit id 2bd16e3e23d9df41592c6b257c59b6860a9cc3ea
(spi: omap2-mcspi: Do not configure the controller
on each transfer unless needed) does its job too
well so omap2_mcspi_setup_transfer() isn't called
even when an SPI slave driver changes 'spi->mode'.
The result is that the mode requested by the SPI
slave driver never takes effect.

Fix this by adding the 'mode' member to the
omap2_mcspi_cs structure which holds the mode
value that the hardware is configured for.
When the SPI slave driver changes 'spi->mode'
it will be different than the value of this new
member and the SPI master driver will know that
the hardware must be reconfigured (by calling
omap2_mcspi_setup_transfer()).

Fixes: 2bd16e3e23 (spi: omap2-mcspi: Do not configure the controller on each transfer unless needed)
Signed-off-by: Mark A. Greer <mgreer@animalcreek.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-omap2-mcspi.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 86d2158946bb..798729eb6689 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -136,6 +136,7 @@ struct omap2_mcspi_cs {
 	void __iomem		*base;
 	unsigned long		phys;
 	int			word_len;
+	u16			mode;
 	struct list_head	node;
 	/* Context save and restore shadow register */
 	u32			chconf0;
@@ -801,6 +802,8 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi,
 
 	mcspi_write_chconf0(spi, l);
 
+	cs->mode = spi->mode;
+
 	dev_dbg(&spi->dev, "setup: speed %d, sample %s edge, clk %s\n",
 			OMAP2_MCSPI_MAX_FREQ >> div,
 			(spi->mode & SPI_CPHA) ? "trailing" : "leading",
@@ -871,6 +874,7 @@ static int omap2_mcspi_setup(struct spi_device *spi)
 			return -ENOMEM;
 		cs->base = mcspi->base + spi->chip_select * 0x14;
 		cs->phys = mcspi->phys + spi->chip_select * 0x14;
+		cs->mode = 0;
 		cs->chconf0 = 0;
 		spi->controller_state = cs;
 		/* Link this to context save list */
@@ -1043,6 +1047,16 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 			mcspi_read_cs_reg(spi, OMAP2_MCSPI_MODULCTRL);
 	}
 
+	/*
+	 * The slave driver could have changed spi->mode in which case
+	 * it will be different from cs->mode (the current hardware setup).
+	 * If so, set par_override (even though its not a parity issue) so
+	 * omap2_mcspi_setup_transfer will be called to configure the hardware
+	 * with the correct mode on the first iteration of the loop below.
+	 */
+	if (spi->mode != cs->mode)
+		par_override = 1;
+
 	omap2_mcspi_set_enable(spi, 0);
 
 	m->status = status;

From 79943632c58a32b9faf8513d40a8120946cdf35c Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 13 Aug 2014 11:21:34 -0700
Subject: [PATCH 0318/1185] firmware: Do not use WARN_ON(!spin_is_locked())

commit aee530cfecf4f3ec83b78406bac618cec35853f8 upstream.

spin_is_locked() always returns false for uniprocessor configurations
in several architectures, so do not use WARN_ON with it.
Use lockdep_assert_held() instead to also reduce overhead in
non-debug kernels.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/efi/vars.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 391c67b182d9..7dbc319e1cf5 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -481,7 +481,7 @@ EXPORT_SYMBOL_GPL(efivar_entry_remove);
  */
 static void efivar_entry_list_del_unlock(struct efivar_entry *entry)
 {
-	WARN_ON(!spin_is_locked(&__efivars->lock));
+	lockdep_assert_held(&__efivars->lock);
 
 	list_del(&entry->list);
 	spin_unlock_irq(&__efivars->lock);
@@ -507,7 +507,7 @@ int __efivar_entry_delete(struct efivar_entry *entry)
 	const struct efivar_operations *ops = __efivars->ops;
 	efi_status_t status;
 
-	WARN_ON(!spin_is_locked(&__efivars->lock));
+	lockdep_assert_held(&__efivars->lock);
 
 	status = ops->set_variable(entry->var.VariableName,
 				   &entry->var.VendorGuid,
@@ -667,7 +667,7 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
 	int strsize1, strsize2;
 	bool found = false;
 
-	WARN_ON(!spin_is_locked(&__efivars->lock));
+	lockdep_assert_held(&__efivars->lock);
 
 	list_for_each_entry_safe(entry, n, head, list) {
 		strsize1 = ucs2_strsize(name, 1024);
@@ -731,7 +731,7 @@ int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes,
 	const struct efivar_operations *ops = __efivars->ops;
 	efi_status_t status;
 
-	WARN_ON(!spin_is_locked(&__efivars->lock));
+	lockdep_assert_held(&__efivars->lock);
 
 	status = ops->get_variable(entry->var.VariableName,
 				   &entry->var.VendorGuid,

From d4281c33c1086d80b2a5e3cd6081752e75795833 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Fri, 9 May 2014 14:23:10 +0300
Subject: [PATCH 0319/1185] tpm: missing tpm_chip_put in tpm_get_random()

commit 3e14d83ef94a5806a865b85b513b4e891923c19b upstream.

Regression in 41ab999c. Call to tpm_chip_put is missing. This
will cause TPM device driver not to unload if tmp_get_random()
is called.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 7c3b3dcbfbc8..01d6968a9e47 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -1423,13 +1423,13 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max)
 	int err, total = 0, retries = 5;
 	u8 *dest = out;
 
+	if (!out || !num_bytes || max > TPM_MAX_RNG_DATA)
+		return -EINVAL;
+
 	chip = tpm_chip_find_get(chip_num);
 	if (chip == NULL)
 		return -ENODEV;
 
-	if (!out || !num_bytes || max > TPM_MAX_RNG_DATA)
-		return -EINVAL;
-
 	do {
 		tpm_cmd.header.in = tpm_getrandom_header;
 		tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes);
@@ -1448,6 +1448,7 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max)
 		num_bytes -= recd;
 	} while (retries-- && total < max);
 
+	tpm_chip_put(chip);
 	return total ? total : -EIO;
 }
 EXPORT_SYMBOL_GPL(tpm_get_random);

From 76f01555c78e496203105bd29b878db3431a2260 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 23 Jul 2014 15:36:26 -0400
Subject: [PATCH 0320/1185] CAPABILITIES: remove undefined caps from all
 processes

commit 7d8b6c63751cfbbe5eef81a48c22978b3407a3ad upstream.

This is effectively a revert of 7b9a7ec565505699f503b4fcf61500dceb36e744
plus fixing it a different way...

We found, when trying to run an application from an application which
had dropped privs that the kernel does security checks on undefined
capability bits.  This was ESPECIALLY difficult to debug as those
undefined bits are hidden from /proc/$PID/status.

Consider a root application which drops all capabilities from ALL 4
capability sets.  We assume, since the application is going to set
eff/perm/inh from an array that it will clear not only the defined caps
less than CAP_LAST_CAP, but also the higher 28ish bits which are
undefined future capabilities.

The BSET gets cleared differently.  Instead it is cleared one bit at a
time.  The problem here is that in security/commoncap.c::cap_task_prctl()
we actually check the validity of a capability being read.  So any task
which attempts to 'read all things set in bset' followed by 'unset all
things set in bset' will not even attempt to unset the undefined bits
higher than CAP_LAST_CAP.

So the 'parent' will look something like:
CapInh:	0000000000000000
CapPrm:	0000000000000000
CapEff:	0000000000000000
CapBnd:	ffffffc000000000

All of this 'should' be fine.  Given that these are undefined bits that
aren't supposed to have anything to do with permissions.  But they do...

So lets now consider a task which cleared the eff/perm/inh completely
and cleared all of the valid caps in the bset (but not the invalid caps
it couldn't read out of the kernel).  We know that this is exactly what
the libcap-ng library does and what the go capabilities library does.
They both leave you in that above situation if you try to clear all of
you capapabilities from all 4 sets.  If that root task calls execve()
the child task will pick up all caps not blocked by the bset.  The bset
however does not block bits higher than CAP_LAST_CAP.  So now the child
task has bits in eff which are not in the parent.  These are
'meaningless' undefined bits, but still bits which the parent doesn't
have.

The problem is now in cred_cap_issubset() (or any operation which does a
subset test) as the child, while a subset for valid cap bits, is not a
subset for invalid cap bits!  So now we set durring commit creds that
the child is not dumpable.  Given it is 'more priv' than its parent.  It
also means the parent cannot ptrace the child and other stupidity.

The solution here:
1) stop hiding capability bits in status
	This makes debugging easier!

2) stop giving any task undefined capability bits.  it's simple, it you
don't put those invalid bits in CAP_FULL_SET you won't get them in init
and you won't get them in any other task either.
	This fixes the cap_issubset() tests and resulting fallout (which
	made the init task in a docker container untraceable among other
	things)

3) mask out undefined bits when sys_capset() is called as it might use
~0, ~0 to denote 'all capabilities' for backward/forward compatibility.
	This lets 'capsh --caps="all=eip" -- -c /bin/bash' run.

4) mask out undefined bit when we read a file capability off of disk as
again likely all bits are set in the xattr for forward/backward
compatibility.
	This lets 'setcap all+pe /bin/bash; /bin/bash' run

Signed-off-by: Eric Paris <eparis@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Andrew G. Morgan <morgan@kernel.org>
Cc: Serge E. Hallyn <serge.hallyn@canonical.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Steve Grubb <sgrubb@redhat.com>
Cc: Dan Walsh <dwalsh@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/array.c            | 11 +----------
 include/linux/capability.h |  5 ++++-
 kernel/audit.c             |  2 +-
 kernel/capability.c        |  4 ++++
 security/commoncap.c       |  3 +++
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index cbd0f1b324b9..09f0d9c374a3 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -304,15 +304,11 @@ static void render_cap_t(struct seq_file *m, const char *header,
 	seq_puts(m, header);
 	CAP_FOR_EACH_U32(__capi) {
 		seq_printf(m, "%08x",
-			   a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
+			   a->cap[CAP_LAST_U32 - __capi]);
 	}
 	seq_putc(m, '\n');
 }
 
-/* Remove non-existent capabilities */
-#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
-				CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
-
 static inline void task_cap(struct seq_file *m, struct task_struct *p)
 {
 	const struct cred *cred;
@@ -326,11 +322,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
 	cap_bset	= cred->cap_bset;
 	rcu_read_unlock();
 
-	NORM_CAPS(cap_inheritable);
-	NORM_CAPS(cap_permitted);
-	NORM_CAPS(cap_effective);
-	NORM_CAPS(cap_bset);
-
 	render_cap_t(m, "CapInh:\t", &cap_inheritable);
 	render_cap_t(m, "CapPrm:\t", &cap_permitted);
 	render_cap_t(m, "CapEff:\t", &cap_effective);
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 15f90929fb51..9b4378af414c 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -78,8 +78,11 @@ extern const kernel_cap_t __cap_init_eff_set;
 # error Fix up hand-coded capability macro initializers
 #else /* HAND-CODED capability initializers */
 
+#define CAP_LAST_U32			((_KERNEL_CAPABILITY_U32S) - 1)
+#define CAP_LAST_U32_VALID_MASK		(CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
+
 # define CAP_EMPTY_SET    ((kernel_cap_t){{ 0, 0 }})
-# define CAP_FULL_SET     ((kernel_cap_t){{ ~0, ~0 }})
+# define CAP_FULL_SET     ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
 # define CAP_FS_SET       ((kernel_cap_t){{ CAP_FS_MASK_B0 \
 				    | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \
 				    CAP_FS_MASK_B1 } })
diff --git a/kernel/audit.c b/kernel/audit.c
index a6c632757e57..4dd7529b0845 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1412,7 +1412,7 @@ void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
 	audit_log_format(ab, " %s=", prefix);
 	CAP_FOR_EACH_U32(i) {
 		audit_log_format(ab, "%08x",
-				 cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]);
+				 cap->cap[CAP_LAST_U32 - i]);
 	}
 }
 
diff --git a/kernel/capability.c b/kernel/capability.c
index d52eecc0942b..1339806a8731 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -268,6 +268,10 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 		i++;
 	}
 
+	effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+	permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+	inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
diff --git a/security/commoncap.c b/security/commoncap.c
index c44b6fe6648e..c9219a66b7c6 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -421,6 +421,9 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
 		cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable);
 	}
 
+	cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+	cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+
 	return 0;
 }
 

From a8fd51944ec90a41b37afe0aee25cf9ec17be8fe Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Wed, 6 Aug 2014 16:08:14 -0700
Subject: [PATCH 0321/1185] kernel/smp.c:on_each_cpu_cond(): fix warning in
 fallback path

commit 618fde872163e782183ce574c77f1123e2be8887 upstream.

The rarely-executed memry-allocation-failed callback path generates a
WARN_ON_ONCE() when smp_call_function_single() succeeds.  Presumably
it's supposed to warn on failures.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: Gilad Ben-Yossef <gilad@benyossef.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <htejun@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index 4dba0f7b72ad..88797cb0d23a 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -658,7 +658,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 			if (cond_func(cpu, info)) {
 				ret = smp_call_function_single(cpu, func,
 								info, wait);
-				WARN_ON_ONCE(!ret);
+				WARN_ON_ONCE(ret);
 			}
 		preempt_enable();
 	}

From 2c34d0d0fe11694fa617f7fca64d7e7fed59b3ea Mon Sep 17 00:00:00 2001
From: Michael Welling <mwelling@emacinc.com>
Date: Mon, 28 Jul 2014 18:01:04 -0500
Subject: [PATCH 0322/1185] mfd: omap-usb-host: Fix improper mask use.

commit 46de8ff8e80a6546aa3d2fdf58c6776666301a0c upstream.

single-ulpi-bypass is a flag used for older OMAP3 silicon.

The flag when set, can excite code that improperly uses the
OMAP_UHH_HOSTCONFIG_UPLI_BYPASS define to clear the corresponding bit.
Instead it clears all of the other bits disabling all of the ports in
the process.

Signed-off-by: Michael Welling <mwelling@emacinc.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/omap-usb-host.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 759fae3ca7fb..a36f3f282ae7 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -445,7 +445,7 @@ static unsigned omap_usbhs_rev1_hostconfig(struct usbhs_hcd_omap *omap,
 
 		for (i = 0; i < omap->nports; i++) {
 			if (is_ehci_phy_mode(pdata->port_mode[i])) {
-				reg &= OMAP_UHH_HOSTCONFIG_ULPI_BYPASS;
+				reg &= ~OMAP_UHH_HOSTCONFIG_ULPI_BYPASS;
 				break;
 			}
 		}

From b2102aa9271a4ac6c20b516304c2c88ee1d1cc7b Mon Sep 17 00:00:00 2001
From: Nikesh Oswal <nikesh@opensource.wolfsonmicro.com>
Date: Fri, 4 Jul 2014 09:55:16 +0100
Subject: [PATCH 0323/1185] regulator: arizona-ldo1: remove bypass
 functionality

commit 5b919f3ebb533cbe400664837e24f66a0836b907 upstream.

WM5110/8280 devices do not support bypass mode for LDO1 so remove
the bypass callbacks registered with regulator core.

Signed-off-by: Nikesh Oswal <nikesh@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/arizona-ldo1.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/regulator/arizona-ldo1.c b/drivers/regulator/arizona-ldo1.c
index 81d8681c3195..b1b35f38d11d 100644
--- a/drivers/regulator/arizona-ldo1.c
+++ b/drivers/regulator/arizona-ldo1.c
@@ -141,8 +141,6 @@ static struct regulator_ops arizona_ldo1_ops = {
 	.map_voltage = regulator_map_voltage_linear,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
-	.get_bypass = regulator_get_bypass_regmap,
-	.set_bypass = regulator_set_bypass_regmap,
 };
 
 static const struct regulator_desc arizona_ldo1 = {

From 5bad07d5f8a43a8ebc04840f525c9e8fce5d6f2c Mon Sep 17 00:00:00 2001
From: Andrey Utkin <andrey.krieger.utkin@gmail.com>
Date: Mon, 4 Aug 2014 23:13:10 +0300
Subject: [PATCH 0324/1185] powerpc/mm/numa: Fix break placement

commit b00fc6ec1f24f9d7af9b8988b6a198186eb3408c upstream.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=81631
Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Andrey Utkin <andrey.krieger.utkin@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/mm/numa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b7293bba0062..08c6f3185d45 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -586,8 +586,8 @@ static int __cpuinit cpu_numa_callback(struct notifier_block *nfb,
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
 		unmap_cpu_from_node(lcpu);
-		break;
 		ret = NOTIFY_OK;
+		break;
 #endif
 	}
 	return ret;

From 6136852d18b23a3a6d1db0b669233aba7f05e46c Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 13 Aug 2014 12:32:03 +0530
Subject: [PATCH 0325/1185] powerpc/mm: Use read barrier when creating real_pte

commit 85c1fafd7262e68ad821ee1808686b1392b1167d upstream.

On ppc64 we support 4K hash pte with 64K page size. That requires
us to track the hash pte slot information on a per 4k basis. We do that
by storing the slot details in the second half of pte page. The pte bit
_PAGE_COMBO is used to indicate whether the second half need to be
looked while building real_pte. We need to use read memory barrier while
doing that so that load of hidx is not reordered w.r.t _PAGE_COMBO
check. On the store side we already do a lwsync in __hash_page_4K

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/pte-hash64-64k.h | 32 +++++++++++++++++++----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index d836d945068d..063fcadd1a00 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -40,17 +40,39 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/barrier.h>	/* for smp_rmb() */
+
 /*
  * With 64K pages on hash table, we have a special PTE format that
  * uses a second "half" of the page table to encode sub-page information
  * in order to deal with 64K made of 4K HW pages. Thus we override the
  * generic accessors and iterators here
  */
-#define __real_pte(e,p) 	((real_pte_t) { \
-			(e), (pte_val(e) & _PAGE_COMBO) ? \
-				(pte_val(*((p) + PTRS_PER_PTE))) : 0 })
-#define __rpte_to_hidx(r,index)	((pte_val((r).pte) & _PAGE_COMBO) ? \
-        (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf))
+#define __real_pte __real_pte
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+{
+	real_pte_t rpte;
+
+	rpte.pte = pte;
+	rpte.hidx = 0;
+	if (pte_val(pte) & _PAGE_COMBO) {
+		/*
+		 * Make sure we order the hidx load against the _PAGE_COMBO
+		 * check. The store side ordering is done in __hash_page_4K
+		 */
+		smp_rmb();
+		rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE));
+	}
+	return rpte;
+}
+
+static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
+{
+	if ((pte_val(rpte.pte) & _PAGE_COMBO))
+		return (rpte.hidx >> (index<<2)) & 0xf;
+	return (pte_val(rpte.pte) >> 12) & 0xf;
+}
+
 #define __rpte_to_pte(r)	((r).pte)
 #define __rpte_sub_valid(rpte, index) \
 	(pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))

From d08ed7a370531ca47694498de8b5e32966545884 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Mon, 11 Aug 2014 19:16:19 +1000
Subject: [PATCH 0326/1185] powerpc/pseries: Failure on removing device node

commit f1b3929c232784580e5d8ee324b6bc634e709575 upstream.

While running command "drmgr -c phb -r -s 'PHB 528'", following
backtrace jumped out because the target device node isn't marked
with OF_DETACHED by of_detach_node(), which caused by error
returned from memory hotplug related reconfig notifier when
disabling CONFIG_MEMORY_HOTREMOVE. The patch fixes it.

ERROR: Bad of_node_put() on /pci@800000020000210/ethernet@0
CPU: 14 PID: 2252 Comm: drmgr Tainted: G        W     3.16.0+ #427
Call Trace:
[c000000012a776a0] [c000000000013d9c] .show_stack+0x88/0x148 (unreliable)
[c000000012a77750] [c00000000083cd34] .dump_stack+0x7c/0x9c
[c000000012a777d0] [c0000000006807c4] .of_node_release+0x58/0xe0
[c000000012a77860] [c00000000038a7d0] .kobject_release+0x174/0x1b8
[c000000012a77900] [c00000000038a884] .kobject_put+0x70/0x78
[c000000012a77980] [c000000000681680] .of_node_put+0x28/0x34
[c000000012a77a00] [c000000000681ea8] .__of_get_next_child+0x64/0x70
[c000000012a77a90] [c000000000682138] .of_find_node_by_path+0x1b8/0x20c
[c000000012a77b40] [c000000000051840] .ofdt_write+0x308/0x688
[c000000012a77c20] [c000000000238430] .proc_reg_write+0xb8/0xd4
[c000000012a77cd0] [c0000000001cbeac] .vfs_write+0xec/0x1f8
[c000000012a77d70] [c0000000001cc3b0] .SyS_write+0x58/0xa0
[c000000012a77e30] [c00000000000a064] syscall_exit+0x0/0x98

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 9a432de363b8..bebe64ed5dc3 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -158,7 +158,7 @@ static int pseries_remove_memory(struct device_node *np)
 static inline int pseries_remove_memblock(unsigned long base,
 					  unsigned int memblock_size)
 {
-	return -EOPNOTSUPP;
+	return 0;
 }
 static inline int pseries_remove_memory(struct device_node *np)
 {

From 7afc3ac1263be1a6f2ef76d76ff41615a0c795d3 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 12 Jul 2014 09:48:30 -0700
Subject: [PATCH 0327/1185] Drivers: scsi: storvsc: Implement a eh_timed_out
 handler

commit 56b26e69c8283121febedd12b3cc193384af46b9 upstream.

On Azure, we have seen instances of unbounded I/O latencies. To deal with
this issue, implement handler that can reset the timeout. Note that the
host gaurantees that it will respond to each command that has been issued.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
[hch: added a better comment explaining the issue]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/storvsc_drv.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 91b76cea3e3c..34a8f526908f 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -33,6 +33,7 @@
 #include <linux/device.h>
 #include <linux/hyperv.h>
 #include <linux/mempool.h>
+#include <linux/blkdev.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_host.h>
@@ -1285,6 +1286,16 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
 	return SUCCESS;
 }
 
+/*
+ * The host guarantees to respond to each command, although I/O latencies might
+ * be unbounded on Azure.  Reset the timer unconditionally to give the host a
+ * chance to perform EH.
+ */
+static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd)
+{
+	return BLK_EH_RESET_TIMER;
+}
+
 static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd)
 {
 	bool allowed = true;
@@ -1444,6 +1455,7 @@ static struct scsi_host_template scsi_driver = {
 	.bios_param =		storvsc_get_chs,
 	.queuecommand =		storvsc_queuecommand,
 	.eh_host_reset_handler =	storvsc_host_reset_handler,
+	.eh_timed_out =		storvsc_eh_timed_out,
 	.slave_alloc =		storvsc_device_alloc,
 	.slave_destroy =	storvsc_device_destroy,
 	.slave_configure =	storvsc_device_configure,

From 8ce6d81a2d174a0cecc1efbaf2d218c90d2e2fbd Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 12 Jul 2014 09:48:32 -0700
Subject: [PATCH 0328/1185] drivers: scsi: storvsc: Correctly handle
 TEST_UNIT_READY failure

commit 3533f8603d28b77c62d75ec899449a99bc6b77a1 upstream.

On some Windows hosts on FC SANs, TEST_UNIT_READY can return SRB_STATUS_ERROR.
Correctly handle this. Note that there is sufficient sense information to
support scsi error handling even in this case.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/storvsc_drv.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 34a8f526908f..87ca72d36d5b 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -804,6 +804,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
 		case ATA_12:
 			set_host_byte(scmnd, DID_PASSTHROUGH);
 			break;
+		/*
+		 * On Some Windows hosts TEST_UNIT_READY command can return
+		 * SRB_STATUS_ERROR, let the upper level code deal with it
+		 * based on the sense information.
+		 */
+		case TEST_UNIT_READY:
+			break;
 		default:
 			set_host_byte(scmnd, DID_TARGET_FAILURE);
 		}

From f52337e5d5de58e2c5be00389d1250ae537cb98b Mon Sep 17 00:00:00 2001
From: Jeffrey Deans <jeffrey.deans@imgtec.com>
Date: Thu, 17 Jul 2014 09:20:56 +0100
Subject: [PATCH 0329/1185] MIPS: GIC: Prevent array overrun

commit ffc8415afab20bd97754efae6aad1f67b531132b upstream.

A GIC interrupt which is declared as having a GIC_MAP_TO_NMI_MSK
mapping causes the cpu parameter to gic_setup_intr() to be increased
to 32, causing memory corruption when pcpu_masks[] is written to again
later in the function.

Signed-off-by: Jeffrey Deans <jeffrey.deans@imgtec.com>
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7375/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/irq-gic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index c01b307317a9..bffbbc557879 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -256,11 +256,13 @@ static void __init gic_setup_intr(unsigned int intr, unsigned int cpu,
 
 	/* Setup Intr to Pin mapping */
 	if (pin & GIC_MAP_TO_NMI_MSK) {
+		int i;
+
 		GICWRITE(GIC_REG_ADDR(SHARED, GIC_SH_MAP_TO_PIN(intr)), pin);
 		/* FIXME: hack to route NMI to all cpu's */
-		for (cpu = 0; cpu < NR_CPUS; cpu += 32) {
+		for (i = 0; i < NR_CPUS; i += 32) {
 			GICWRITE(GIC_REG_ADDR(SHARED,
-					  GIC_SH_MAP_TO_VPE_REG_OFF(intr, cpu)),
+					  GIC_SH_MAP_TO_VPE_REG_OFF(intr, i)),
 				 0xffffffff);
 		}
 	} else {

From fbd9df2eb92caee0ca82bd1b73e49e673cfc1ab5 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Tue, 22 Jul 2014 14:21:21 +0100
Subject: [PATCH 0330/1185] MIPS: Prevent user from setting FCSR cause bits

commit b1442d39fac2fcfbe6a4814979020e993ca59c9e upstream.

If one or more matching FCSR cause & enable bits are set in saved thread
context then when that context is restored the kernel will take an FP
exception. This is of course undesirable and considered an oops, leading
to the kernel writing a backtrace to the console and potentially
rebooting depending upon the configuration. Thus the kernel avoids this
situation by clearing the cause bits of the FCSR register when handling
FP exceptions and after emulating FP instructions.

However the kernel does not prevent userland from setting arbitrary FCSR
cause & enable bits via ptrace, using either the PTRACE_POKEUSR or
PTRACE_SETFPREGS requests. This means userland can trivially cause the
kernel to oops on any system with an FPU. Prevent this from happening
by clearing the cause bits when writing to the saved FCSR context via
ptrace.

This problem appears to exist at least back to the beginning of the git
era in the PTRACE_POKEUSR case.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/7438/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/ptrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 9c6299c733a3..1b95b2443221 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -161,6 +161,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
 		__get_user(fregs[i], i + (__u64 __user *) data);
 
 	__get_user(child->thread.fpu.fcr31, data + 64);
+	child->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
 
 	/* FIR may not be written.  */
 
@@ -451,7 +452,7 @@ long arch_ptrace(struct task_struct *child, long request,
 			break;
 #endif
 		case FPC_CSR:
-			child->thread.fpu.fcr31 = data;
+			child->thread.fpu.fcr31 = data & ~FPU_CSR_ALL_X;
 			break;
 		case DSP_BASE ... DSP_BASE + 5: {
 			dspreg_t *dregs;

From 33103cff2cb6a6f2753676ed9f82c4e3e7fe4e42 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Tue, 29 Jul 2014 14:54:40 +0800
Subject: [PATCH 0331/1185] MIPS: tlbex: Fix a missing statement for HUGETLB

commit 8393c524a25609a30129e4a8975cf3b91f6c16a5 upstream.

In commit 2c8c53e28f1 (MIPS: Optimize TLB handlers for Octeon CPUs)
build_r4000_tlb_refill_handler() is modified. But it doesn't compatible
with the original code in HUGETLB case. Because there is a copy & paste
error and one line of code is missing. It is very easy to produce a bug
with LTP's hugemmap05 test.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Binbin Zhou <zhoubb@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J. Hill <Steven.Hill@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Patchwork: https://patchwork.linux-mips.org/patch/7496/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/mm/tlbex.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index afeef93f81a7..0e17e1352718 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -1329,6 +1329,7 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
 	}
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 	uasm_l_tlb_huge_update(&l, p);
+	UASM_i_LW(&p, K0, 0, K1);
 	build_huge_update_entries(&p, htlb_info.huge_pte, K1);
 	build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random,
 				   htlb_info.restore_scratch);

From f020cedd010180e759cb6cce817b8a60ea6311a1 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Wed, 16 Jul 2014 09:19:16 +0800
Subject: [PATCH 0332/1185] MIPS: Remove BUG_ON(!is_fpu_owner()) in do_ade()

commit 2e5767a27337812f6850b3fa362419e2f085e5c3 upstream.

In do_ade(), is_fpu_owner() isn't preempt-safe. For example, when an
unaligned ldc1 is executed, do_cpu() is called and then FPU will be
enabled (and TIF_USEDFPU will be set for the current process). Then,
do_ade() is called because the access is unaligned.  If the current
process is preempted at this time, TIF_USEDFPU will be cleard.  So when
the process is scheduled again, BUG_ON(!is_fpu_owner()) is triggered.

This small program can trigger this BUG in a preemptible kernel:

int main (int argc, char *argv[])
{
        double u64[2];

        while (1) {
                asm volatile (
                        ".set push \n\t"
                        ".set noreorder \n\t"
                        "ldc1 $f3, 4(%0) \n\t"
                        ".set pop \n\t"
                        ::"r"(u64):
                );
        }

        return 0;
}

V2: Remove the BUG_ON() unconditionally due to Paul's suggestion.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Jie Chen <chenj@lemote.com>
Signed-off-by: Rui Wang <wangr@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J. Hill <Steven.Hill@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/unaligned.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 203d8857070d..2c81265bcf46 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -604,7 +604,6 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 	case sdc1_op:
 		die_if_kernel("Unaligned FP access in kernel code", regs);
 		BUG_ON(!used_math());
-		BUG_ON(!is_fpu_owner());
 
 		lose_fpu(1);	/* Save FPU state for the emulator. */
 		res = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,

From 887c148946c4cf2259355197672eeeb070cf8284 Mon Sep 17 00:00:00 2001
From: Alex Smith <alex@alex-smith.me.uk>
Date: Wed, 23 Jul 2014 14:40:08 +0100
Subject: [PATCH 0333/1185] MIPS: asm/reg.h: Make 32- and 64-bit definitions
 available at the same time

commit bcec7c8da6b092b1ff3327fd83c2193adb12f684 upstream.

Get rid of the WANT_COMPAT_REG_H test and instead define both the 32-
and 64-bit register offset definitions at the same time with
MIPS{32,64}_ prefixes, then define the existing EF_* names to the
correct definitions for the kernel's bitness.

This patch is a prerequisite of the following bug fix patch.

Signed-off-by: Alex Smith <alex@alex-smith.me.uk>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7451/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/reg.h      | 250 ++++++++++++++++++++-----------
 arch/mips/kernel/binfmt_elfo32.c |  32 ++--
 2 files changed, 177 insertions(+), 105 deletions(-)

diff --git a/arch/mips/include/asm/reg.h b/arch/mips/include/asm/reg.h
index 910e71a12466..b8343ccbc989 100644
--- a/arch/mips/include/asm/reg.h
+++ b/arch/mips/include/asm/reg.h
@@ -12,116 +12,194 @@
 #ifndef __ASM_MIPS_REG_H
 #define __ASM_MIPS_REG_H
 
-
-#if defined(CONFIG_32BIT) || defined(WANT_COMPAT_REG_H)
-
-#define EF_R0			6
-#define EF_R1			7
-#define EF_R2			8
-#define EF_R3			9
-#define EF_R4			10
-#define EF_R5			11
-#define EF_R6			12
-#define EF_R7			13
-#define EF_R8			14
-#define EF_R9			15
-#define EF_R10			16
-#define EF_R11			17
-#define EF_R12			18
-#define EF_R13			19
-#define EF_R14			20
-#define EF_R15			21
-#define EF_R16			22
-#define EF_R17			23
-#define EF_R18			24
-#define EF_R19			25
-#define EF_R20			26
-#define EF_R21			27
-#define EF_R22			28
-#define EF_R23			29
-#define EF_R24			30
-#define EF_R25			31
+#define MIPS32_EF_R0		6
+#define MIPS32_EF_R1		7
+#define MIPS32_EF_R2		8
+#define MIPS32_EF_R3		9
+#define MIPS32_EF_R4		10
+#define MIPS32_EF_R5		11
+#define MIPS32_EF_R6		12
+#define MIPS32_EF_R7		13
+#define MIPS32_EF_R8		14
+#define MIPS32_EF_R9		15
+#define MIPS32_EF_R10		16
+#define MIPS32_EF_R11		17
+#define MIPS32_EF_R12		18
+#define MIPS32_EF_R13		19
+#define MIPS32_EF_R14		20
+#define MIPS32_EF_R15		21
+#define MIPS32_EF_R16		22
+#define MIPS32_EF_R17		23
+#define MIPS32_EF_R18		24
+#define MIPS32_EF_R19		25
+#define MIPS32_EF_R20		26
+#define MIPS32_EF_R21		27
+#define MIPS32_EF_R22		28
+#define MIPS32_EF_R23		29
+#define MIPS32_EF_R24		30
+#define MIPS32_EF_R25		31
 
 /*
  * k0/k1 unsaved
  */
-#define EF_R26			32
-#define EF_R27			33
+#define MIPS32_EF_R26		32
+#define MIPS32_EF_R27		33
 
-#define EF_R28			34
-#define EF_R29			35
-#define EF_R30			36
-#define EF_R31			37
+#define MIPS32_EF_R28		34
+#define MIPS32_EF_R29		35
+#define MIPS32_EF_R30		36
+#define MIPS32_EF_R31		37
 
 /*
  * Saved special registers
  */
-#define EF_LO			38
-#define EF_HI			39
+#define MIPS32_EF_LO		38
+#define MIPS32_EF_HI		39
 
-#define EF_CP0_EPC		40
-#define EF_CP0_BADVADDR		41
-#define EF_CP0_STATUS		42
-#define EF_CP0_CAUSE		43
-#define EF_UNUSED0		44
+#define MIPS32_EF_CP0_EPC	40
+#define MIPS32_EF_CP0_BADVADDR	41
+#define MIPS32_EF_CP0_STATUS	42
+#define MIPS32_EF_CP0_CAUSE	43
+#define MIPS32_EF_UNUSED0	44
 
-#define EF_SIZE			180
+#define MIPS32_EF_SIZE		180
 
-#endif
-
-#if defined(CONFIG_64BIT) && !defined(WANT_COMPAT_REG_H)
-
-#define EF_R0			 0
-#define EF_R1			 1
-#define EF_R2			 2
-#define EF_R3			 3
-#define EF_R4			 4
-#define EF_R5			 5
-#define EF_R6			 6
-#define EF_R7			 7
-#define EF_R8			 8
-#define EF_R9			 9
-#define EF_R10			10
-#define EF_R11			11
-#define EF_R12			12
-#define EF_R13			13
-#define EF_R14			14
-#define EF_R15			15
-#define EF_R16			16
-#define EF_R17			17
-#define EF_R18			18
-#define EF_R19			19
-#define EF_R20			20
-#define EF_R21			21
-#define EF_R22			22
-#define EF_R23			23
-#define EF_R24			24
-#define EF_R25			25
+#define MIPS64_EF_R0		0
+#define MIPS64_EF_R1		1
+#define MIPS64_EF_R2		2
+#define MIPS64_EF_R3		3
+#define MIPS64_EF_R4		4
+#define MIPS64_EF_R5		5
+#define MIPS64_EF_R6		6
+#define MIPS64_EF_R7		7
+#define MIPS64_EF_R8		8
+#define MIPS64_EF_R9		9
+#define MIPS64_EF_R10		10
+#define MIPS64_EF_R11		11
+#define MIPS64_EF_R12		12
+#define MIPS64_EF_R13		13
+#define MIPS64_EF_R14		14
+#define MIPS64_EF_R15		15
+#define MIPS64_EF_R16		16
+#define MIPS64_EF_R17		17
+#define MIPS64_EF_R18		18
+#define MIPS64_EF_R19		19
+#define MIPS64_EF_R20		20
+#define MIPS64_EF_R21		21
+#define MIPS64_EF_R22		22
+#define MIPS64_EF_R23		23
+#define MIPS64_EF_R24		24
+#define MIPS64_EF_R25		25
 
 /*
  * k0/k1 unsaved
  */
-#define EF_R26			26
-#define EF_R27			27
+#define MIPS64_EF_R26		26
+#define MIPS64_EF_R27		27
 
 
-#define EF_R28			28
-#define EF_R29			29
-#define EF_R30			30
-#define EF_R31			31
+#define MIPS64_EF_R28		28
+#define MIPS64_EF_R29		29
+#define MIPS64_EF_R30		30
+#define MIPS64_EF_R31		31
 
 /*
  * Saved special registers
  */
-#define EF_LO			32
-#define EF_HI			33
+#define MIPS64_EF_LO		32
+#define MIPS64_EF_HI		33
 
-#define EF_CP0_EPC		34
-#define EF_CP0_BADVADDR		35
-#define EF_CP0_STATUS		36
-#define EF_CP0_CAUSE		37
+#define MIPS64_EF_CP0_EPC	34
+#define MIPS64_EF_CP0_BADVADDR	35
+#define MIPS64_EF_CP0_STATUS	36
+#define MIPS64_EF_CP0_CAUSE	37
 
-#define EF_SIZE			304	/* size in bytes */
+#define MIPS64_EF_SIZE		304	/* size in bytes */
+
+#if defined(CONFIG_32BIT)
+
+#define EF_R0			MIPS32_EF_R0
+#define EF_R1			MIPS32_EF_R1
+#define EF_R2			MIPS32_EF_R2
+#define EF_R3			MIPS32_EF_R3
+#define EF_R4			MIPS32_EF_R4
+#define EF_R5			MIPS32_EF_R5
+#define EF_R6			MIPS32_EF_R6
+#define EF_R7			MIPS32_EF_R7
+#define EF_R8			MIPS32_EF_R8
+#define EF_R9			MIPS32_EF_R9
+#define EF_R10			MIPS32_EF_R10
+#define EF_R11			MIPS32_EF_R11
+#define EF_R12			MIPS32_EF_R12
+#define EF_R13			MIPS32_EF_R13
+#define EF_R14			MIPS32_EF_R14
+#define EF_R15			MIPS32_EF_R15
+#define EF_R16			MIPS32_EF_R16
+#define EF_R17			MIPS32_EF_R17
+#define EF_R18			MIPS32_EF_R18
+#define EF_R19			MIPS32_EF_R19
+#define EF_R20			MIPS32_EF_R20
+#define EF_R21			MIPS32_EF_R21
+#define EF_R22			MIPS32_EF_R22
+#define EF_R23			MIPS32_EF_R23
+#define EF_R24			MIPS32_EF_R24
+#define EF_R25			MIPS32_EF_R25
+#define EF_R26			MIPS32_EF_R26
+#define EF_R27			MIPS32_EF_R27
+#define EF_R28			MIPS32_EF_R28
+#define EF_R29			MIPS32_EF_R29
+#define EF_R30			MIPS32_EF_R30
+#define EF_R31			MIPS32_EF_R31
+#define EF_LO			MIPS32_EF_LO
+#define EF_HI			MIPS32_EF_HI
+#define EF_CP0_EPC		MIPS32_EF_CP0_EPC
+#define EF_CP0_BADVADDR		MIPS32_EF_CP0_BADVADDR
+#define EF_CP0_STATUS		MIPS32_EF_CP0_STATUS
+#define EF_CP0_CAUSE		MIPS32_EF_CP0_CAUSE
+#define EF_UNUSED0		MIPS32_EF_UNUSED0
+#define EF_SIZE			MIPS32_EF_SIZE
+
+#elif defined(CONFIG_64BIT)
+
+#define EF_R0			MIPS64_EF_R0
+#define EF_R1			MIPS64_EF_R1
+#define EF_R2			MIPS64_EF_R2
+#define EF_R3			MIPS64_EF_R3
+#define EF_R4			MIPS64_EF_R4
+#define EF_R5			MIPS64_EF_R5
+#define EF_R6			MIPS64_EF_R6
+#define EF_R7			MIPS64_EF_R7
+#define EF_R8			MIPS64_EF_R8
+#define EF_R9			MIPS64_EF_R9
+#define EF_R10			MIPS64_EF_R10
+#define EF_R11			MIPS64_EF_R11
+#define EF_R12			MIPS64_EF_R12
+#define EF_R13			MIPS64_EF_R13
+#define EF_R14			MIPS64_EF_R14
+#define EF_R15			MIPS64_EF_R15
+#define EF_R16			MIPS64_EF_R16
+#define EF_R17			MIPS64_EF_R17
+#define EF_R18			MIPS64_EF_R18
+#define EF_R19			MIPS64_EF_R19
+#define EF_R20			MIPS64_EF_R20
+#define EF_R21			MIPS64_EF_R21
+#define EF_R22			MIPS64_EF_R22
+#define EF_R23			MIPS64_EF_R23
+#define EF_R24			MIPS64_EF_R24
+#define EF_R25			MIPS64_EF_R25
+#define EF_R26			MIPS64_EF_R26
+#define EF_R27			MIPS64_EF_R27
+#define EF_R28			MIPS64_EF_R28
+#define EF_R29			MIPS64_EF_R29
+#define EF_R30			MIPS64_EF_R30
+#define EF_R31			MIPS64_EF_R31
+#define EF_LO			MIPS64_EF_LO
+#define EF_HI			MIPS64_EF_HI
+#define EF_CP0_EPC		MIPS64_EF_CP0_EPC
+#define EF_CP0_BADVADDR		MIPS64_EF_CP0_BADVADDR
+#define EF_CP0_STATUS		MIPS64_EF_CP0_STATUS
+#define EF_CP0_CAUSE		MIPS64_EF_CP0_CAUSE
+#define EF_SIZE			MIPS64_EF_SIZE
 
 #endif /* CONFIG_64BIT */
 
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index 202e581e6096..7fdf1de0447f 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -58,12 +58,6 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
 
 #include <asm/processor.h>
 
-/*
- * When this file is selected, we are definitely running a 64bit kernel.
- * So using the right regs define in asm/reg.h
- */
-#define WANT_COMPAT_REG_H
-
 /* These MUST be defined before elf.h gets included */
 extern void elf32_core_copy_regs(elf_gregset_t grp, struct pt_regs *regs);
 #define ELF_CORE_COPY_REGS(_dest, _regs) elf32_core_copy_regs(_dest, _regs);
@@ -135,21 +129,21 @@ void elf32_core_copy_regs(elf_gregset_t grp, struct pt_regs *regs)
 {
 	int i;
 
-	for (i = 0; i < EF_R0; i++)
+	for (i = 0; i < MIPS32_EF_R0; i++)
 		grp[i] = 0;
-	grp[EF_R0] = 0;
+	grp[MIPS32_EF_R0] = 0;
 	for (i = 1; i <= 31; i++)
-		grp[EF_R0 + i] = (elf_greg_t) regs->regs[i];
-	grp[EF_R26] = 0;
-	grp[EF_R27] = 0;
-	grp[EF_LO] = (elf_greg_t) regs->lo;
-	grp[EF_HI] = (elf_greg_t) regs->hi;
-	grp[EF_CP0_EPC] = (elf_greg_t) regs->cp0_epc;
-	grp[EF_CP0_BADVADDR] = (elf_greg_t) regs->cp0_badvaddr;
-	grp[EF_CP0_STATUS] = (elf_greg_t) regs->cp0_status;
-	grp[EF_CP0_CAUSE] = (elf_greg_t) regs->cp0_cause;
-#ifdef EF_UNUSED0
-	grp[EF_UNUSED0] = 0;
+		grp[MIPS32_EF_R0 + i] = (elf_greg_t) regs->regs[i];
+	grp[MIPS32_EF_R26] = 0;
+	grp[MIPS32_EF_R27] = 0;
+	grp[MIPS32_EF_LO] = (elf_greg_t) regs->lo;
+	grp[MIPS32_EF_HI] = (elf_greg_t) regs->hi;
+	grp[MIPS32_EF_CP0_EPC] = (elf_greg_t) regs->cp0_epc;
+	grp[MIPS32_EF_CP0_BADVADDR] = (elf_greg_t) regs->cp0_badvaddr;
+	grp[MIPS32_EF_CP0_STATUS] = (elf_greg_t) regs->cp0_status;
+	grp[MIPS32_EF_CP0_CAUSE] = (elf_greg_t) regs->cp0_cause;
+#ifdef MIPS32_EF_UNUSED0
+	grp[MIPS32_EF_UNUSED0] = 0;
 #endif
 }
 

From 1b91a02f6af6554850568fb1d2716c1b27d1c503 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 29 May 2013 01:02:18 +0200
Subject: [PATCH 0334/1185] MIPS: Cleanup flags in syscall flags handlers.

commit e7f3b48af7be9f8007a224663a5b91340626fed5 upstream.

This will simplify further modifications.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/thread_info.h | 2 ++
 arch/mips/kernel/scall32-o32.S      | 2 +-
 arch/mips/kernel/scall64-64.S       | 2 +-
 arch/mips/kernel/scall64-n32.S      | 2 +-
 arch/mips/kernel/scall64-o32.S      | 2 +-
 5 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 895320e25662..cdea4f65b944 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -131,6 +131,8 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_FPUBOUND		(1<<TIF_FPUBOUND)
 #define _TIF_LOAD_WATCH		(1<<TIF_LOAD_WATCH)
 
+#define _TIF_WORK_SYSCALL_ENTRY	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT)
+
 /* work to do in syscall_trace_leave() */
 #define _TIF_WORK_SYSCALL_EXIT	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT)
 
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 9b36424b03c5..ed5bafb5d637 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -52,7 +52,7 @@ NESTED(handle_sys, PT_SIZE, sp)
 
 stack_done:
 	lw	t0, TI_FLAGS($28)	# syscall tracing enabled?
-	li	t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+	li	t1, _TIF_WORK_SYSCALL_ENTRY
 	and	t0, t1
 	bnez	t0, syscall_trace_entry # -> yes
 
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 97a5909a61cf..be6627ead619 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -54,7 +54,7 @@ NESTED(handle_sys64, PT_SIZE, sp)
 
 	sd	a3, PT_R26(sp)		# save a3 for syscall restarting
 
-	li	t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+	li	t1, _TIF_WORK_SYSCALL_ENTRY
 	LONG_L	t0, TI_FLAGS($28)	# syscall tracing enabled?
 	and	t0, t1, t0
 	bnez	t0, syscall_trace_entry
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index edcb6594e7b5..cab150789c8d 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -47,7 +47,7 @@ NESTED(handle_sysn32, PT_SIZE, sp)
 
 	sd	a3, PT_R26(sp)		# save a3 for syscall restarting
 
-	li	t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+	li	t1, _TIF_WORK_SYSCALL_ENTRY
 	LONG_L	t0, TI_FLAGS($28)	# syscall tracing enabled?
 	and	t0, t1, t0
 	bnez	t0, n32_syscall_trace_entry
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 74f485d3c0ef..37605dc8eef7 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -81,7 +81,7 @@ NESTED(handle_sys, PT_SIZE, sp)
 	PTR	4b, bad_stack
 	.previous
 
-	li	t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+	li	t1, _TIF_WORK_SYSCALL_ENTRY
 	LONG_L	t0, TI_FLAGS($28)	# syscall tracing enabled?
 	and	t0, t1, t0
 	bnez	t0, trace_a_syscall

From 4fc5ea5e141b9874db74af31befd6a953972b7c0 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Wed, 22 Jan 2014 14:40:00 +0000
Subject: [PATCH 0335/1185] MIPS: asm: thread_info: Add _TIF_SECCOMP flag

commit 137f7df8cead00688524c82360930845396b8a21 upstream.

Add _TIF_SECCOMP flag to _TIF_WORK_SYSCALL_ENTRY to indicate
that the system call needs to be checked against a seccomp filter.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Reviewed-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/6405/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
[bwh: Backported to 3.2: various other flags are not included in
 _TIF_WORK_SYSCALL_ENTRY]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/thread_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index cdea4f65b944..e6e5d9162213 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -131,7 +131,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_FPUBOUND		(1<<TIF_FPUBOUND)
 #define _TIF_LOAD_WATCH		(1<<TIF_LOAD_WATCH)
 
-#define _TIF_WORK_SYSCALL_ENTRY	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT)
+#define _TIF_WORK_SYSCALL_ENTRY	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP)
 
 /* work to do in syscall_trace_leave() */
 #define _TIF_WORK_SYSCALL_EXIT	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT)

From d2a3ec399f0429d595ca3b14849b001ebb091f78 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nsn.com>
Date: Tue, 22 Jul 2014 14:51:08 +0300
Subject: [PATCH 0336/1185] MIPS: OCTEON: make get_system_type() thread-safe

commit 608308682addfdc7b8e2aee88f0e028331d88e4d upstream.

get_system_type() is not thread-safe on OCTEON. It uses static data,
also more dangerous issue is that it's calling cvmx_fuse_read_byte()
every time without any synchronization. Currently it's possible to get
processes stuck looping forever in kernel simply by launching multiple
readers of /proc/cpuinfo:

	(while true; do cat /proc/cpuinfo > /dev/null; done) &
	(while true; do cat /proc/cpuinfo > /dev/null; done) &
	...

Fix by initializing the system type string only once during the early
boot.

Signed-off-by: Aaro Koskinen <aaro.koskinen@nsn.com>
Reviewed-by: Markos Chandras <markos.chandras@imgtec.com>
Patchwork: http://patchwork.linux-mips.org/patch/7437/
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/cavium-octeon/setup.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 2a75ff249e71..6430e7acb1eb 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -463,6 +463,18 @@ static void octeon_halt(void)
 	octeon_kill_core(NULL);
 }
 
+static char __read_mostly octeon_system_type[80];
+
+static int __init init_octeon_system_type(void)
+{
+	snprintf(octeon_system_type, sizeof(octeon_system_type), "%s (%s)",
+		cvmx_board_type_to_string(octeon_bootinfo->board_type),
+		octeon_model_get_string(read_c0_prid()));
+
+	return 0;
+}
+early_initcall(init_octeon_system_type);
+
 /**
  * Handle all the error condition interrupts that might occur.
  *
@@ -482,11 +494,7 @@ static irqreturn_t octeon_rlm_interrupt(int cpl, void *dev_id)
  */
 const char *octeon_board_type_string(void)
 {
-	static char name[80];
-	sprintf(name, "%s (%s)",
-		cvmx_board_type_to_string(octeon_bootinfo->board_type),
-		octeon_model_get_string(read_c0_prid()));
-	return name;
+	return octeon_system_type;
 }
 
 const char *get_system_type(void)

From 4f91cb537d2f7fa700a2b6d86a2cc77d20ee2616 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 17 Sep 2013 12:44:31 +0200
Subject: [PATCH 0337/1185] MIPS: Fix accessing to per-cpu data when flushing
 the cache

commit ff522058bd717506b2fa066fa564657f2b86477e upstream.

This fixes the following issue

BUG: using smp_processor_id() in preemptible [00000000] code: kjournald/1761
caller is blast_dcache32+0x30/0x254
Call Trace:
[<8047f02c>] dump_stack+0x8/0x34
[<802e7e40>] debug_smp_processor_id+0xe0/0xf0
[<80114d94>] blast_dcache32+0x30/0x254
[<80118484>] r4k_dma_cache_wback_inv+0x200/0x288
[<80110ff0>] mips_dma_map_sg+0x108/0x180
[<80355098>] ide_dma_prepare+0xf0/0x1b8
[<8034eaa4>] do_rw_taskfile+0x1e8/0x33c
[<8035951c>] ide_do_rw_disk+0x298/0x3e4
[<8034a3c4>] do_ide_request+0x2e0/0x704
[<802bb0dc>] __blk_run_queue+0x44/0x64
[<802be000>] queue_unplugged.isra.36+0x1c/0x54
[<802beb94>] blk_flush_plug_list+0x18c/0x24c
[<802bec6c>] blk_finish_plug+0x18/0x48
[<8026554c>] journal_commit_transaction+0x3b8/0x151c
[<80269648>] kjournald+0xec/0x238
[<8014ac00>] kthread+0xb8/0xc0
[<8010268c>] ret_from_kernel_thread+0x14/0x1c

Caches in most systems are identical - but not always, so we can't avoid
the use of smp_call_function() by just looking at the boot CPU's data,
have to fiddle with preemption instead.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/5835
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/mm/c-r4k.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 21813beec7a5..5495101d32c8 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -12,6 +12,7 @@
 #include <linux/highmem.h>
 #include <linux/kernel.h>
 #include <linux/linkage.h>
+#include <linux/preempt.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
@@ -601,6 +602,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	/* Catch bad driver code */
 	BUG_ON(size == 0);
 
+	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
 		if (size >= scache_size)
 			r4k_blast_scache();
@@ -621,6 +623,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 		R4600_HIT_CACHEOP_WAR_IMPL;
 		blast_dcache_range(addr, addr + size);
 	}
+	preempt_enable();
 
 	bc_wback_inv(addr, size);
 	__sync();
@@ -631,6 +634,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 	/* Catch bad driver code */
 	BUG_ON(size == 0);
 
+	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
 		if (size >= scache_size)
 			r4k_blast_scache();
@@ -655,6 +659,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 		R4600_HIT_CACHEOP_WAR_IMPL;
 		blast_inv_dcache_range(addr, addr + size);
 	}
+	preempt_enable();
 
 	bc_inv(addr, size);
 	__sync();

From af7b15c9d60584ad34b2ac1641953229ac6d1ba8 Mon Sep 17 00:00:00 2001
From: Jonas Bonn <jonas@southpole.se>
Date: Sun, 19 Feb 2012 17:36:53 +0100
Subject: [PATCH 0338/1185] openrisc: Rework signal handling

commit 10f67dbf6add97751050f294d4c8e0cc1e5c2c23 upstream.

The mainline signal handling code for OpenRISC has been buggy since day
one with respect to syscall restart.  This patch significantly reworks
the signal handling code:

i)   Move the "work pending" loop to C code (borrowed from ARM arch)

ii)  Allow a tracer to muck about with the IP and skip syscall restart
     in that case (again, borrowed from ARM)

iii) Make signal handling WRT syscall restart actually work

v)   Make the signal handling code look more like that of other
     architectures so that it's easier for others to follow

Reported-by: Anders Nystrom <anders@southpole.se>
Signed-off-by: Jonas Bonn <jonas@southpole.se>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/openrisc/kernel/entry.S  |  63 ++++++-----
 arch/openrisc/kernel/signal.c | 202 ++++++++++++++++++----------------
 2 files changed, 143 insertions(+), 122 deletions(-)

diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index d8a455ede5a7..fec8bf97d806 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -853,38 +853,45 @@ UNHANDLED_EXCEPTION(_vector_0x1f00,0x1f00)
 
 /* ========================================================[ return ] === */
 
-_work_pending:
-	/*
-	 * if (current_thread_info->flags & _TIF_NEED_RESCHED)
-	 *     schedule();
-	 */
-	l.lwz   r5,TI_FLAGS(r10)
-	l.andi	r3,r5,_TIF_NEED_RESCHED
-	l.sfnei r3,0
-	l.bnf   _work_notifysig
-	 l.nop
-	l.jal   schedule
-	 l.nop
-	l.j	_resume_userspace
-	 l.nop
-
-/* Handle pending signals and notify-resume requests.
- * do_notify_resume must be passed the latest pushed pt_regs, not
- * necessarily the "userspace" ones.  Also, pt_regs->syscallno
- * must be set so that the syscall restart functionality works.
- */
-_work_notifysig:
-	l.jal	do_notify_resume
-	 l.ori	r3,r1,0		  /* pt_regs */
-
 _resume_userspace:
 	DISABLE_INTERRUPTS(r3,r4)
-	l.lwz	r3,TI_FLAGS(r10)
-	l.andi	r3,r3,_TIF_WORK_MASK
-	l.sfnei	r3,0
-	l.bf	_work_pending
+	l.lwz	r4,TI_FLAGS(r10)
+	l.andi	r13,r4,_TIF_WORK_MASK
+	l.sfeqi	r13,0
+	l.bf	_restore_all
 	 l.nop
 
+_work_pending:
+	l.lwz	r5,PT_ORIG_GPR11(r1)
+	l.sfltsi r5,0
+	l.bnf	1f
+	 l.nop
+	l.andi	r5,r5,0
+1:
+	l.jal	do_work_pending
+	 l.ori	r3,r1,0			/* pt_regs */
+
+	l.sfeqi	r11,0
+	l.bf	_restore_all
+	 l.nop
+	l.sfltsi r11,0
+	l.bnf	1f
+	 l.nop
+	l.and	r11,r11,r0
+	l.ori	r11,r11,__NR_restart_syscall
+	l.j	_syscall_check_trace_enter
+	 l.nop
+1:
+	l.lwz	r11,PT_ORIG_GPR11(r1)
+	/* Restore arg registers */
+	l.lwz	r3,PT_GPR3(r1)
+	l.lwz	r4,PT_GPR4(r1)
+	l.lwz	r5,PT_GPR5(r1)
+	l.lwz	r6,PT_GPR6(r1)
+	l.lwz	r7,PT_GPR7(r1)
+	l.j	_syscall_check_trace_enter
+	 l.lwz	r8,PT_GPR8(r1)
+
 _restore_all:
 	RESTORE_ALL
 	/* This returns to userspace code */
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index ae167f7e081a..c277ec82783d 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -28,24 +28,24 @@
 #include <linux/tracehook.h>
 
 #include <asm/processor.h>
+#include <asm/syscall.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 
 #define DEBUG_SIG 0
 
 struct rt_sigframe {
-	struct siginfo *pinfo;
-	void *puc;
 	struct siginfo info;
 	struct ucontext uc;
 	unsigned char retcode[16];	/* trampoline code */
 };
 
-static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc)
+static int restore_sigcontext(struct pt_regs *regs,
+			      struct sigcontext __user *sc)
 {
-	unsigned int err = 0;
+	int err = 0;
 
-	/* Alwys make any pending restarted system call return -EINTR */
+	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
 	/*
@@ -53,25 +53,21 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc)
 	 * (sc is already checked for VERIFY_READ since the sigframe was
 	 *  checked in sys_sigreturn previously)
 	 */
-	if (__copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long)))
-		goto badframe;
-	if (__copy_from_user(&regs->pc, &sc->regs.pc, sizeof(unsigned long)))
-		goto badframe;
-	if (__copy_from_user(&regs->sr, &sc->regs.sr, sizeof(unsigned long)))
-		goto badframe;
+	err |= __copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long));
+	err |= __copy_from_user(&regs->pc, &sc->regs.pc, sizeof(unsigned long));
+	err |= __copy_from_user(&regs->sr, &sc->regs.sr, sizeof(unsigned long));
 
 	/* make sure the SM-bit is cleared so user-mode cannot fool us */
 	regs->sr &= ~SPR_SR_SM;
 
+	regs->orig_gpr11 = -1;	/* Avoid syscall restart checks */
+
 	/* TODO: the other ports use regs->orig_XX to disable syscall checks
 	 * after this completes, but we don't use that mechanism. maybe we can
 	 * use it now ?
 	 */
 
 	return err;
-
-badframe:
-	return 1;
 }
 
 asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs)
@@ -111,21 +107,18 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs)
  * Set up a signal frame.
  */
 
-static int setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
-			    unsigned long mask)
+static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 {
 	int err = 0;
 
 	/* copy the regs */
-
+	/* There should be no need to save callee-saved registers here...
+	 * ...but we save them anyway.  Revisit this
+	 */
 	err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long));
 	err |= __copy_to_user(&sc->regs.pc, &regs->pc, sizeof(unsigned long));
 	err |= __copy_to_user(&sc->regs.sr, &regs->sr, sizeof(unsigned long));
 
-	/* then some other stuff */
-
-	err |= __put_user(mask, &sc->oldmask);
-
 	return err;
 }
 
@@ -181,24 +174,18 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	int err = 0;
 
 	frame = get_sigframe(ka, regs, sizeof(*frame));
-
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
 
-	err |= __put_user(&frame->info, &frame->pinfo);
-	err |= __put_user(&frame->uc, &frame->puc);
-
+	/* Create siginfo.  */
 	if (ka->sa.sa_flags & SA_SIGINFO)
 		err |= copy_siginfo_to_user(&frame->info, info);
-	if (err)
-		goto give_sigsegv;
 
-	/* Clear all the bits of the ucontext we don't use.  */
-	err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext));
+	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(NULL, &frame->uc.uc_link);
 	err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0]);
+	err |= setup_sigcontext(regs, &frame->uc.uc_mcontext);
 
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
@@ -207,9 +194,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 	/* trampoline - the desired return ip is the retcode itself */
 	return_ip = (unsigned long)&frame->retcode;
-	/* This is l.ori r11,r0,__NR_sigreturn, l.sys 1 */
-	err |= __put_user(0xa960, (short *)(frame->retcode + 0));
-	err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2));
+	/* This is:
+		l.ori r11,r0,__NR_sigreturn
+		l.sys 1
+	 */
+	err |= __put_user(0xa960,             (short *)(frame->retcode + 0));
+	err |= __put_user(__NR_rt_sigreturn,  (short *)(frame->retcode + 2));
 	err |= __put_user(0x20000001, (unsigned long *)(frame->retcode + 4));
 	err |= __put_user(0x15000000, (unsigned long *)(frame->retcode + 8));
 
@@ -262,82 +252,106 @@ handle_signal(unsigned long sig,
  * mode below.
  */
 
-void do_signal(struct pt_regs *regs)
+int do_signal(struct pt_regs *regs, int syscall)
 {
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
+	unsigned long continue_addr = 0;
+	unsigned long restart_addr = 0;
+	unsigned long retval = 0;
+	int restart = 0;
+
+	if (syscall) {
+		continue_addr = regs->pc;
+		restart_addr = continue_addr - 4;
+		retval = regs->gpr[11];
+
+		/*
+		 * Setup syscall restart here so that a debugger will
+		 * see the already changed PC.
+		 */
+		switch (retval) {
+		case -ERESTART_RESTARTBLOCK:
+			restart = -2;
+			/* Fall through */
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			restart++;
+			regs->gpr[11] = regs->orig_gpr11;
+			regs->pc = restart_addr;
+			break;
+		}
+	}
 
 	/*
-	 * We want the common case to go fast, which
-	 * is why we may in certain cases get here from
-	 * kernel mode. Just return without doing anything
-	 * if so.
+	 * Get the signal to deliver.  When running under ptrace, at this
+	 * point the debugger may change all our registers ...
 	 */
-	if (!user_mode(regs))
-		return;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-
-	/* If we are coming out of a syscall then we need
-	 * to check if the syscall was interrupted and wants to be
-	 * restarted after handling the signal.  If so, the original
-	 * syscall number is put back into r11 and the PC rewound to
-	 * point at the l.sys instruction that resulted in the
-	 * original syscall.  Syscall results other than the four
-	 * below mean that the syscall executed to completion and no
-	 * restart is necessary.
+	/*
+	 * Depending on the signal settings we may need to revert the
+	 * decision to restart the system call.  But skip this if a
+	 * debugger has chosen to restart at a different PC.
 	 */
-	if (regs->orig_gpr11) {
-		int restart = 0;
-
-		switch (regs->gpr[11]) {
-		case -ERESTART_RESTARTBLOCK:
-		case -ERESTARTNOHAND:
-			/* Restart if there is no signal handler */
-			restart = (signr <= 0);
-			break;
-		case -ERESTARTSYS:
-			/* Restart if there no signal handler or
-			 * SA_RESTART flag is set */
-			restart = (signr <= 0 || (ka.sa.sa_flags & SA_RESTART));
-			break;
-		case -ERESTARTNOINTR:
-			/* Always restart */
-			restart = 1;
-			break;
+	if (signr > 0) {
+		if (unlikely(restart) && regs->pc == restart_addr) {
+			if (retval == -ERESTARTNOHAND ||
+			    retval == -ERESTART_RESTARTBLOCK
+			    || (retval == -ERESTARTSYS
+			        && !(ka.sa.sa_flags & SA_RESTART))) {
+				/* No automatic restart */
+				regs->gpr[11] = -EINTR;
+				regs->pc = continue_addr;
+			}
 		}
 
-		if (restart) {
-			if (regs->gpr[11] == -ERESTART_RESTARTBLOCK)
-				regs->gpr[11] = __NR_restart_syscall;
-			else
-				regs->gpr[11] = regs->orig_gpr11;
-			regs->pc -= 4;
-		} else {
-			regs->gpr[11] = -EINTR;
-		}
-	}
-
-	if (signr <= 0) {
-		/* no signal to deliver so we just put the saved sigmask
-		 * back */
-		restore_saved_sigmask();
-	} else {		/* signr > 0 */
-		/* Whee!  Actually deliver the signal.  */
 		handle_signal(signr, &info, &ka, regs);
+	} else {
+		/* no handler */
+		restore_saved_sigmask();
+		/*
+		 * Restore pt_regs PC as syscall restart will be handled by
+		 * kernel without return to userspace
+		 */
+		if (unlikely(restart) && regs->pc == restart_addr) {
+			regs->pc = continue_addr;
+			return restart;
+		}
 	}
 
-	return;
+	return 0;
 }
 
-asmlinkage void do_notify_resume(struct pt_regs *regs)
+asmlinkage int
+do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 {
-	if (current_thread_info()->flags & _TIF_SIGPENDING)
-		do_signal(regs);
-
-	if (current_thread_info()->flags & _TIF_NOTIFY_RESUME) {
-		clear_thread_flag(TIF_NOTIFY_RESUME);
-		tracehook_notify_resume(regs);
-	}
+	do {
+		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+			schedule();
+		} else {
+			if (unlikely(!user_mode(regs)))
+				return 0;
+			local_irq_enable();
+			if (thread_flags & _TIF_SIGPENDING) {
+				int restart = do_signal(regs, syscall);
+				if (unlikely(restart)) {
+					/*
+					 * Restart without handlers.
+					 * Deal with it without leaving
+					 * the kernel space.
+					 */
+					return restart;
+				}
+				syscall = 0;
+			} else {
+				clear_thread_flag(TIF_NOTIFY_RESUME);
+				tracehook_notify_resume(regs);
+			}
+		}
+		local_irq_disable();
+		thread_flags = current_thread_info()->flags;
+	} while (thread_flags & _TIF_WORK_MASK);
+	return 0;
 }

From 24be9aa61fa39fb973d5f7079a382524e459972c Mon Sep 17 00:00:00 2001
From: Qiao Zhou <zhouqiao@marvell.com>
Date: Wed, 4 Jun 2014 19:42:06 +0800
Subject: [PATCH 0339/1185] ASoC: pcm: fix dpcm_path_put in dpcm runtime update

commit 7ed9de76ff342cbd717a9cf897044b99272cb8f8 upstream.

we need to release dapm widget list after dpcm_path_get in
soc_dpcm_runtime_update. otherwise, there will be potential memory
leak. add dpcm_path_put to fix it.

Signed-off-by: Qiao Zhou <zhouqiao@marvell.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/soc-pcm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index ccb6be4d658d..02d26915b61d 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1886,6 +1886,7 @@ int soc_dpcm_runtime_update(struct snd_soc_dapm_widget *widget)
 			dpcm_be_disconnect(fe, SNDRV_PCM_STREAM_PLAYBACK);
 		}
 
+		dpcm_path_put(&list);
 capture:
 		/* skip if FE doesn't have capture capability */
 		if (!fe->cpu_dai->driver->capture.channels_min)

From f3327152958aeab0c3dc9ae115e171141eabe2ab Mon Sep 17 00:00:00 2001
From: Praveen Diwakar <praveen.diwakar@intel.com>
Date: Fri, 4 Jul 2014 11:17:41 +0530
Subject: [PATCH 0340/1185] ASoC: wm_adsp: Add missing MODULE_LICENSE

commit 0a37c6efec4a2fdc2563c5a8faa472b814deee80 upstream.

Since MODULE_LICENSE is missing the module load fails,
so add this for module.

Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/wm_adsp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 6dbb17d050c9..ca1e999026e5 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1284,3 +1284,5 @@ int wm_adsp2_init(struct wm_adsp *adsp, bool dvfs)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(wm_adsp2_init);
+
+MODULE_LICENSE("GPL v2");

From ef49cea359559aa9065226ca331c11ddc6a5327f Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Fri, 4 Jul 2014 16:05:45 +0200
Subject: [PATCH 0341/1185] ASoC: samsung: Correct I2S DAI suspend/resume ops

commit d3d4e5247b013008a39e4d5f69ce4c60ed57f997 upstream.

We should save/restore relevant I2S registers regardless of
the dai->active flag, otherwise some settings are being lost
after system suspend/resume cycle. E.g. I2S slave mode set only
during dai initialization is not preserved and the device ends
up in master mode after system resume.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/samsung/i2s.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 82ebb1a51479..5c9b5e4f94c3 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -853,11 +853,9 @@ static int i2s_suspend(struct snd_soc_dai *dai)
 {
 	struct i2s_dai *i2s = to_info(dai);
 
-	if (dai->active) {
-		i2s->suspend_i2smod = readl(i2s->addr + I2SMOD);
-		i2s->suspend_i2scon = readl(i2s->addr + I2SCON);
-		i2s->suspend_i2spsr = readl(i2s->addr + I2SPSR);
-	}
+	i2s->suspend_i2smod = readl(i2s->addr + I2SMOD);
+	i2s->suspend_i2scon = readl(i2s->addr + I2SCON);
+	i2s->suspend_i2spsr = readl(i2s->addr + I2SPSR);
 
 	return 0;
 }
@@ -866,11 +864,9 @@ static int i2s_resume(struct snd_soc_dai *dai)
 {
 	struct i2s_dai *i2s = to_info(dai);
 
-	if (dai->active) {
-		writel(i2s->suspend_i2scon, i2s->addr + I2SCON);
-		writel(i2s->suspend_i2smod, i2s->addr + I2SMOD);
-		writel(i2s->suspend_i2spsr, i2s->addr + I2SPSR);
-	}
+	writel(i2s->suspend_i2scon, i2s->addr + I2SCON);
+	writel(i2s->suspend_i2smod, i2s->addr + I2SMOD);
+	writel(i2s->suspend_i2spsr, i2s->addr + I2SPSR);
 
 	return 0;
 }

From f084c9428b1fa9a7df201a312fe708a286252240 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Thu, 19 Jun 2014 09:32:05 +0300
Subject: [PATCH 0342/1185] ASoC: max98090: Fix missing free_irq

commit 4adeb0ccf86a5af1825bbfe290dee9e60a5ab870 upstream.

max98090.c doesn't free the threaded interrupt it requests. This causes
an oops when doing "cat /proc/interrupts" after snd-soc-max98090.ko is
unloaded.

Fix this by requesting the interrupt by using devm_request_threaded_irq().

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/max98090.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c
index 9b7746c9546f..76bfeb3c3e30 100644
--- a/sound/soc/codecs/max98090.c
+++ b/sound/soc/codecs/max98090.c
@@ -2234,7 +2234,7 @@ static int max98090_probe(struct snd_soc_codec *codec)
 	/* Register for interrupts */
 	dev_dbg(codec->dev, "irq = %d\n", max98090->irq);
 
-	ret = request_threaded_irq(max98090->irq, NULL,
+	ret = devm_request_threaded_irq(codec->dev, max98090->irq, NULL,
 		max98090_interrupt, IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 		"max98090_interrupt", codec);
 	if (ret < 0) {

From f4d475ab09e057063a8a399b6ebb4c42bc22dd07 Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Wed, 13 Aug 2014 21:51:06 +0200
Subject: [PATCH 0343/1185] ASoC: pxa-ssp: drop SNDRV_PCM_FMTBIT_S24_LE

commit 9301503af016eb537ccce76adec0c1bb5c84871e upstream.

This mode is unsupported, as the DMA controller can't do zero-padding
of samples.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Reported-by: Johannes Stezenbach <js@sig21.net>
Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/pxa/pxa-ssp.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 6f4dd7543e82..95a9b07bbe96 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -757,9 +757,7 @@ static int pxa_ssp_remove(struct snd_soc_dai *dai)
 			  SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_64000 |	\
 			  SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000)
 
-#define PXA_SSP_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
-			    SNDRV_PCM_FMTBIT_S24_LE |	\
-			    SNDRV_PCM_FMTBIT_S32_LE)
+#define PXA_SSP_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE)
 
 static const struct snd_soc_dai_ops pxa_ssp_dai_ops = {
 	.startup	= pxa_ssp_startup,

From db065663add6a78c8054b11e41c30cd045316437 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 8 Jun 2014 23:33:25 +0100
Subject: [PATCH 0344/1185] bfa: Fix undefined bit shift on big-endian
 architectures with 32-bit DMA address

commit 03a6c3ff3282ee9fa893089304d951e0be93a144 upstream.

bfa_swap_words() shifts its argument (assumed to be 64-bit) by 32 bits
each way.  In two places the argument type is dma_addr_t, which may be
32-bit, in which case the effect of the bit shift is undefined:

drivers/scsi/bfa/bfa_fcpim.c: In function 'bfa_ioim_send_ioreq':
drivers/scsi/bfa/bfa_fcpim.c:2497:4: warning: left shift count >= width of type [enabled by default]
    addr = bfa_sgaddr_le(sg_dma_address(sg));
    ^
drivers/scsi/bfa/bfa_fcpim.c:2497:4: warning: right shift count >= width of type [enabled by default]
drivers/scsi/bfa/bfa_fcpim.c:2509:4: warning: left shift count >= width of type [enabled by default]
    addr = bfa_sgaddr_le(sg_dma_address(sg));
    ^
drivers/scsi/bfa/bfa_fcpim.c:2509:4: warning: right shift count >= width of type [enabled by default]

Avoid this by adding casts to u64 in bfa_swap_words().

Compile-tested only.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Anil Gurumurthy <anil.gurumurthy@qlogic.com>
Fixes: f16a17507b09 ('[SCSI] bfa: remove all OS wrappers')
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/bfa/bfa_ioc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/bfa/bfa_ioc.h b/drivers/scsi/bfa/bfa_ioc.h
index 23a90e7b7107..a119421cb324 100644
--- a/drivers/scsi/bfa/bfa_ioc.h
+++ b/drivers/scsi/bfa/bfa_ioc.h
@@ -72,7 +72,7 @@ struct bfa_sge_s {
 } while (0)
 
 #define bfa_swap_words(_x)  (	\
-	((_x) << 32) | ((_x) >> 32))
+	((u64)(_x) << 32) | ((u64)(_x) >> 32))
 
 #ifdef __BIG_ENDIAN
 #define bfa_sge_to_be(_x)

From b3e98f0c4f996cd53b80bad71f0d7e4a2cf3a4e8 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Tue, 8 Jul 2014 10:05:52 +0800
Subject: [PATCH 0345/1185] ACPICA: Utilities: Fix memory leak in
 acpi_ut_copy_iobject_to_iobject

commit 8aa5e56eeb61a099ea6519eb30ee399e1bc043ce upstream.

Adds return status check on copy routines to delete the allocated destination
object if either copy fails. Reported by Colin Ian King on bugs.acpica.org,
Bug 1087.
The last applicable commit:
 Commit: 3371c19c294a4cb3649aa4e84606be8a1d999e61
 Subject: ACPICA: Remove ACPI_GET_OBJECT_TYPE macro

Link: https://bugs.acpica.org/show_bug.cgi?id=1087
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpica/utcopy.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c
index e4c9291fc0a3..a63a4cdd2ce8 100644
--- a/drivers/acpi/acpica/utcopy.c
+++ b/drivers/acpi/acpica/utcopy.c
@@ -998,5 +998,11 @@ acpi_ut_copy_iobject_to_iobject(union acpi_operand_object *source_desc,
 		status = acpi_ut_copy_simple_object(source_desc, *dest_desc);
 	}
 
+	/* Delete the allocated object if copy failed */
+
+	if (ACPI_FAILURE(status)) {
+		acpi_ut_remove_reference(*dest_desc);
+	}
+
 	return_ACPI_STATUS(status);
 }

From c55d35d2b8fc84218785cf12dd3e895bc83af66d Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Tue, 26 Aug 2014 01:29:24 +0200
Subject: [PATCH 0346/1185] ACPI: Run fixed event device notifications in
 process context

commit 236105db632c6279a020f78c83e22eaef746006b upstream.

Currently, notify callbacks for fixed button events are run from
interrupt context.  That is not necessary and after commit 0bf6368ee8f2
(ACPI / button: Add ACPI Button event via netlink routine) it causes
netlink routines to be called from interrupt context which is not
correct.

Also, that is different from non-fixed device events (including
non-fixed button events) whose notify callbacks are all executed from
process context.

For the above reasons, make fixed button device notify callbacks run
in process context which will avoid the deadlock when using netlink
to report button events to user space.

Fixes: 0bf6368ee8f2 (ACPI / button: Add ACPI Button event via netlink routine)
Link: https://lkml.org/lkml/2014/8/21/606
Reported-by: Benjamin Block <bebl@mageta.org>
Reported-by: Knut Petersen <Knut_Petersen@t-online.de>
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
[rjw: Function names, subject and changelog.]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/scan.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index cca761e80d89..091682fb1617 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -769,12 +769,17 @@ static void acpi_device_notify(acpi_handle handle, u32 event, void *data)
 	device->driver->ops.notify(device, event);
 }
 
-static acpi_status acpi_device_notify_fixed(void *data)
+static void acpi_device_notify_fixed(void *data)
 {
 	struct acpi_device *device = data;
 
 	/* Fixed hardware devices have no handles */
 	acpi_device_notify(NULL, ACPI_FIXED_HARDWARE_EVENT, device);
+}
+
+static acpi_status acpi_device_fixed_event(void *data)
+{
+	acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_device_notify_fixed, data);
 	return AE_OK;
 }
 
@@ -785,12 +790,12 @@ static int acpi_device_install_notify_handler(struct acpi_device *device)
 	if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON)
 		status =
 		    acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
-						     acpi_device_notify_fixed,
+						     acpi_device_fixed_event,
 						     device);
 	else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON)
 		status =
 		    acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
-						     acpi_device_notify_fixed,
+						     acpi_device_fixed_event,
 						     device);
 	else
 		status = acpi_install_notify_handler(device->handle,
@@ -807,10 +812,10 @@ static void acpi_device_remove_notify_handler(struct acpi_device *device)
 {
 	if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON)
 		acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
-						acpi_device_notify_fixed);
+						acpi_device_fixed_event);
 	else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON)
 		acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
-						acpi_device_notify_fixed);
+						acpi_device_fixed_event);
 	else
 		acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
 					   acpi_device_notify);

From 4f6a1e6210f5aeed2832d69103fc6511c0ca7c2d Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 3 Sep 2014 15:04:28 +0200
Subject: [PATCH 0347/1185] ACPI / cpuidle: fix deadlock between cpuidle_lock
 and cpu_hotplug.lock

commit 6726655dfdd2dc60c035c690d9f10cb69d7ea075 upstream.

There is a following AB-BA dependency between cpu_hotplug.lock and
cpuidle_lock:

1) cpu_hotplug.lock -> cpuidle_lock
enable_nonboot_cpus()
 _cpu_up()
  cpu_hotplug_begin()
   LOCK(cpu_hotplug.lock)
 cpu_notify()
  ...
  acpi_processor_hotplug()
   cpuidle_pause_and_lock()
    LOCK(cpuidle_lock)

2) cpuidle_lock -> cpu_hotplug.lock
acpi_os_execute_deferred() workqueue
 ...
 acpi_processor_cst_has_changed()
  cpuidle_pause_and_lock()
   LOCK(cpuidle_lock)
  get_online_cpus()
   LOCK(cpu_hotplug.lock)

Fix this by reversing the order acpi_processor_cst_has_changed() does
thigs -- let it first execute the protection against CPU hotplug by
calling get_online_cpus() and obtain the cpuidle lock only after that (and
perform the symmentric change when allowing CPUs hotplug again and
dropping cpuidle lock).

Spotted by lockdep.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/processor_idle.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 4056d3175178..a88894190e41 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -1101,9 +1101,9 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
 
 	if (pr->id == 0 && cpuidle_get_driver() == &acpi_idle_driver) {
 
-		cpuidle_pause_and_lock();
 		/* Protect against cpu-hotplug */
 		get_online_cpus();
+		cpuidle_pause_and_lock();
 
 		/* Disable all cpuidle devices */
 		for_each_online_cpu(cpu) {
@@ -1130,8 +1130,8 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
 				cpuidle_enable_device(dev);
 			}
 		}
-		put_online_cpus();
 		cpuidle_resume_and_unlock();
+		put_online_cpus();
 	}
 
 	return 0;

From 814aa5addf612498365a99dc844265eed232a700 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 6 Aug 2014 14:11:33 -0400
Subject: [PATCH 0348/1185] ring-buffer: Always reset iterator to reader page

commit 651e22f2701b4113989237c3048d17337dd2185c upstream.

When performing a consuming read, the ring buffer swaps out a
page from the ring buffer with a empty page and this page that
was swapped out becomes the new reader page. The reader page
is owned by the reader and since it was swapped out of the ring
buffer, writers do not have access to it (there's an exception
to that rule, but it's out of scope for this commit).

When reading the "trace" file, it is a non consuming read, which
means that the data in the ring buffer will not be modified.
When the trace file is opened, a ring buffer iterator is allocated
and writes to the ring buffer are disabled, such that the iterator
will not have issues iterating over the data.

Although the ring buffer disabled writes, it does not disable other
reads, or even consuming reads. If a consuming read happens, then
the iterator is reset and starts reading from the beginning again.

My tests would sometimes trigger this bug on my i386 box:

WARNING: CPU: 0 PID: 5175 at kernel/trace/trace.c:1527 __trace_find_cmdline+0x66/0xaa()
Modules linked in:
CPU: 0 PID: 5175 Comm: grep Not tainted 3.16.0-rc3-test+ #8
Hardware name:                  /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006
 00000000 00000000 f09c9e1c c18796b3 c1b5d74c f09c9e4c c103a0e3 c1b5154b
 f09c9e78 00001437 c1b5d74c 000005f7 c10bd85a c10bd85a c1cac57c f09c9eb0
 ed0e0000 f09c9e64 c103a185 00000009 f09c9e5c c1b5154b f09c9e78 f09c9e80^M
Call Trace:
 [<c18796b3>] dump_stack+0x4b/0x75
 [<c103a0e3>] warn_slowpath_common+0x7e/0x95
 [<c10bd85a>] ? __trace_find_cmdline+0x66/0xaa
 [<c10bd85a>] ? __trace_find_cmdline+0x66/0xaa
 [<c103a185>] warn_slowpath_fmt+0x33/0x35
 [<c10bd85a>] __trace_find_cmdline+0x66/0xaa^M
 [<c10bed04>] trace_find_cmdline+0x40/0x64
 [<c10c3c16>] trace_print_context+0x27/0xec
 [<c10c4360>] ? trace_seq_printf+0x37/0x5b
 [<c10c0b15>] print_trace_line+0x319/0x39b
 [<c10ba3fb>] ? ring_buffer_read+0x47/0x50
 [<c10c13b1>] s_show+0x192/0x1ab
 [<c10bfd9a>] ? s_next+0x5a/0x7c
 [<c112e76e>] seq_read+0x267/0x34c
 [<c1115a25>] vfs_read+0x8c/0xef
 [<c112e507>] ? seq_lseek+0x154/0x154
 [<c1115ba2>] SyS_read+0x54/0x7f
 [<c188488e>] syscall_call+0x7/0xb
---[ end trace 3f507febd6b4cc83 ]---
>>>> ##### CPU 1 buffer started ####

Which was the __trace_find_cmdline() function complaining about the pid
in the event record being negative.

After adding more test cases, this would trigger more often. Strangely
enough, it would never trigger on a single test, but instead would trigger
only when running all the tests. I believe that was the case because it
required one of the tests to be shutting down via delayed instances while
a new test started up.

After spending several days debugging this, I found that it was caused by
the iterator becoming corrupted. Debugging further, I found out why
the iterator became corrupted. It happened with the rb_iter_reset().

As consuming reads may not read the full reader page, and only part
of it, there's a "read" field to know where the last read took place.
The iterator, must also start at the read position. In the rb_iter_reset()
code, if the reader page was disconnected from the ring buffer, the iterator
would start at the head page within the ring buffer (where writes still
happen). But the mistake there was that it still used the "read" field
to start the iterator on the head page, where it should always start
at zero because readers never read from within the ring buffer where
writes occur.

I originally wrote a patch to have it set the iter->head to 0 instead
of iter->head_page->read, but then I questioned why it wasn't always
setting the iter to point to the reader page, as the reader page is
still valid.  The list_empty(reader_page->list) just means that it was
successful in swapping out. But the reader_page may still have data.

There was a bug report a long time ago that was not reproducible that
had something about trace_pipe (consuming read) not matching trace
(iterator read). This may explain why that happened.

Anyway, the correct answer to this bug is to always use the reader page
an not reset the iterator to inside the writable ring buffer.

Fixes: d769041f8653 "ring_buffer: implement new locking"
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/ring_buffer.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 4063d5fe5e44..933d1838488b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3353,21 +3353,16 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 
 	/* Iterator usage is expected to have record disabled */
-	if (list_empty(&cpu_buffer->reader_page->list)) {
-		iter->head_page = rb_set_head_page(cpu_buffer);
-		if (unlikely(!iter->head_page))
-			return;
-		iter->head = iter->head_page->read;
-	} else {
-		iter->head_page = cpu_buffer->reader_page;
-		iter->head = cpu_buffer->reader_page->read;
-	}
+	iter->head_page = cpu_buffer->reader_page;
+	iter->head = cpu_buffer->reader_page->read;
+
+	iter->cache_reader_page = iter->head_page;
+	iter->cache_read = iter->head;
+
 	if (iter->head)
 		iter->read_stamp = cpu_buffer->read_stamp;
 	else
 		iter->read_stamp = iter->head_page->page->time_stamp;
-	iter->cache_reader_page = cpu_buffer->reader_page;
-	iter->cache_read = cpu_buffer->read;
 }
 
 /**

From 7f70b62ea0f72edae160ddf540723991b84c8279 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 6 Aug 2014 15:36:31 -0400
Subject: [PATCH 0349/1185] ring-buffer: Up rb_iter_peek() loop count to 3

commit 021de3d904b88b1771a3a2cfc5b75023c391e646 upstream.

After writting a test to try to trigger the bug that caused the
ring buffer iterator to become corrupted, I hit another bug:

 WARNING: CPU: 1 PID: 5281 at kernel/trace/ring_buffer.c:3766 rb_iter_peek+0x113/0x238()
 Modules linked in: ipt_MASQUERADE sunrpc [...]
 CPU: 1 PID: 5281 Comm: grep Tainted: G        W     3.16.0-rc3-test+ #143
 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
  0000000000000000 ffffffff81809a80 ffffffff81503fb0 0000000000000000
  ffffffff81040ca1 ffff8800796d6010 ffffffff810c138d ffff8800796d6010
  ffff880077438c80 ffff8800796d6010 ffff88007abbe600 0000000000000003
 Call Trace:
  [<ffffffff81503fb0>] ? dump_stack+0x4a/0x75
  [<ffffffff81040ca1>] ? warn_slowpath_common+0x7e/0x97
  [<ffffffff810c138d>] ? rb_iter_peek+0x113/0x238
  [<ffffffff810c138d>] ? rb_iter_peek+0x113/0x238
  [<ffffffff810c14df>] ? ring_buffer_iter_peek+0x2d/0x5c
  [<ffffffff810c6f73>] ? tracing_iter_reset+0x6e/0x96
  [<ffffffff810c74a3>] ? s_start+0xd7/0x17b
  [<ffffffff8112b13e>] ? kmem_cache_alloc_trace+0xda/0xea
  [<ffffffff8114cf94>] ? seq_read+0x148/0x361
  [<ffffffff81132d98>] ? vfs_read+0x93/0xf1
  [<ffffffff81132f1b>] ? SyS_read+0x60/0x8e
  [<ffffffff8150bf9f>] ? tracesys+0xdd/0xe2

Debugging this bug, which triggers when the rb_iter_peek() loops too
many times (more than 2 times), I discovered there's a case that can
cause that function to legitimately loop 3 times!

rb_iter_peek() is different than rb_buffer_peek() as the rb_buffer_peek()
only deals with the reader page (it's for consuming reads). The
rb_iter_peek() is for traversing the buffer without consuming it, and as
such, it can loop for one more reason. That is, if we hit the end of
the reader page or any page, it will go to the next page and try again.

That is, we have this:

 1. iter->head > iter->head_page->page->commit
    (rb_inc_iter() which moves the iter to the next page)
    try again

 2. event = rb_iter_head_event()
    event->type_len == RINGBUF_TYPE_TIME_EXTEND
    rb_advance_iter()
    try again

 3. read the event.

But we never get to 3, because the count is greater than 2 and we
cause the WARNING and return NULL.

Up the counter to 3.

Fixes: 69d1b839f7ee "ring-buffer: Bind time extend and data events together"
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/ring_buffer.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 933d1838488b..5efbc122e5ce 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1980,7 +1980,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
 
 /**
  * rb_update_event - update event type and data
- * @event: the even to update
+ * @event: the event to update
  * @type: the type of event
  * @length: the size of the event field in the ring buffer
  *
@@ -3755,12 +3755,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 		return NULL;
 
 	/*
-	 * We repeat when a time extend is encountered.
-	 * Since the time extend is always attached to a data event,
-	 * we should never loop more than once.
-	 * (We never hit the following condition more than twice).
+	 * We repeat when a time extend is encountered or we hit
+	 * the end of the page. Since the time extend is always attached
+	 * to a data event, we should never loop more than three times.
+	 * Once for going to next page, once on time extend, and
+	 * finally once to get the event.
+	 * (We never hit the following condition more than thrice).
 	 */
-	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
+	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
 		return NULL;
 
 	if (rb_per_cpu_empty(cpu_buffer))

From 8c30f22757c97041750fddce8ea11c6d7231574a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 28 Jul 2014 16:26:53 -0700
Subject: [PATCH 0350/1185] mnt: Only change user settable mount flags in
 remount

commit a6138db815df5ee542d848318e5dae681590fccd upstream.

Kenton Varda <kenton@sandstorm.io> discovered that by remounting a
read-only bind mount read-only in a user namespace the
MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user
to the remount a read-only mount read-write.

Correct this by replacing the mask of mount flags to preserve
with a mask of mount flags that may be changed, and preserve
all others.   This ensures that any future bugs with this mask and
remount will fail in an easy to detect way where new mount flags
simply won't change.

Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c        | 2 +-
 include/linux/mount.h | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index a45ba4f267fe..a438e4c81b0b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1805,7 +1805,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 		err = do_remount_sb(sb, flags, data, 0);
 	if (!err) {
 		br_write_lock(&vfsmount_lock);
-		mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
+		mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
 		mnt->mnt.mnt_flags = mnt_flags;
 		br_write_unlock(&vfsmount_lock);
 	}
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 73005f9957ea..16fc05d816d4 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -42,7 +42,9 @@ struct mnt_namespace;
  * flag, consider how it interacts with shared mounts.
  */
 #define MNT_SHARED_MASK	(MNT_UNBINDABLE)
-#define MNT_PROPAGATION_MASK	(MNT_SHARED | MNT_UNBINDABLE)
+#define MNT_USER_SETTABLE_MASK  (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
+				 | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
+				 | MNT_READONLY)
 
 
 #define MNT_INTERNAL	0x4000

From 81d4c13ebbdcb69b2d56b3bc5e626b1a881421cf Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 28 Jul 2014 17:10:56 -0700
Subject: [PATCH 0351/1185] mnt: Move the test for MNT_LOCK_READONLY from
 change_mount_flags into do_remount

commit 07b645589dcda8b7a5249e096fece2a67556f0f4 upstream.

There are no races as locked mount flags are guaranteed to never change.

Moving the test into do_remount makes it more visible, and ensures all
filesystem remounts pass the MNT_LOCK_READONLY permission check.  This
second case is not an issue today as filesystem remounts are guarded
by capable(CAP_DAC_ADMIN) and thus will always fail in less privileged
mount namespaces, but it could become an issue in the future.

Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index a438e4c81b0b..515cbff64c93 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1764,9 +1764,6 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
 	if (readonly_request == __mnt_is_readonly(mnt))
 		return 0;
 
-	if (mnt->mnt_flags & MNT_LOCK_READONLY)
-		return -EPERM;
-
 	if (readonly_request)
 		error = mnt_make_readonly(real_mount(mnt));
 	else
@@ -1792,6 +1789,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	if (path->dentry != path->mnt->mnt_root)
 		return -EINVAL;
 
+	/* Don't allow changing of locked mnt flags.
+	 *
+	 * No locks need to be held here while testing the various
+	 * MNT_LOCK flags because those flags can never be cleared
+	 * once they are set.
+	 */
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
+	    !(mnt_flags & MNT_READONLY)) {
+		return -EPERM;
+	}
 	err = security_sb_remount(sb, data);
 	if (err)
 		return err;

From 187985d9395c7c093e9a565c87c6547c16009ddf Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 28 Jul 2014 17:26:07 -0700
Subject: [PATCH 0352/1185] mnt: Correct permission checks in do_remount

commit 9566d6742852c527bf5af38af5cbb878dad75705 upstream.

While invesgiating the issue where in "mount --bind -oremount,ro ..."
would result in later "mount --bind -oremount,rw" succeeding even if
the mount started off locked I realized that there are several
additional mount flags that should be locked and are not.

In particular MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, and the atime
flags in addition to MNT_READONLY should all be locked.  These
flags are all per superblock, can all be changed with MS_BIND,
and should not be changable if set by a more privileged user.

The following additions to the current logic are added in this patch.
- nosuid may not be clearable by a less privileged user.
- nodev  may not be clearable by a less privielged user.
- noexec may not be clearable by a less privileged user.
- atime flags may not be changeable by a less privileged user.

The logic with atime is that always setting atime on access is a
global policy and backup software and auditing software could break if
atime bits are not updated (when they are configured to be updated),
and serious performance degradation could result (DOS attack) if atime
updates happen when they have been explicitly disabled.  Therefore an
unprivileged user should not be able to mess with the atime bits set
by a more privileged user.

The additional restrictions are implemented with the addition of
MNT_LOCK_NOSUID, MNT_LOCK_NODEV, MNT_LOCK_NOEXEC, and MNT_LOCK_ATIME
mnt flags.

Taken together these changes and the fixes for MNT_LOCK_READONLY
should make it safe for an unprivileged user to create a user
namespace and to call "mount --bind -o remount,... ..." without
the danger of mount flags being changed maliciously.

Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c        | 36 +++++++++++++++++++++++++++++++++---
 include/linux/mount.h |  5 +++++
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 515cbff64c93..99748ff4065f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -828,8 +828,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 
 	mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
 	/* Don't allow unprivileged users to change mount flags */
-	if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
-		mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+	if (flag & CL_UNPRIVILEGED) {
+		mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
+
+		if (mnt->mnt.mnt_flags & MNT_READONLY)
+			mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+
+		if (mnt->mnt.mnt_flags & MNT_NODEV)
+			mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
+
+		if (mnt->mnt.mnt_flags & MNT_NOSUID)
+			mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
+
+		if (mnt->mnt.mnt_flags & MNT_NOEXEC)
+			mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
+	}
 
 	atomic_inc(&sb->s_active);
 	mnt->mnt.mnt_sb = sb;
@@ -1799,6 +1812,23 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	    !(mnt_flags & MNT_READONLY)) {
 		return -EPERM;
 	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
+	    !(mnt_flags & MNT_NODEV)) {
+		return -EPERM;
+	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
+	    !(mnt_flags & MNT_NOSUID)) {
+		return -EPERM;
+	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
+	    !(mnt_flags & MNT_NOEXEC)) {
+		return -EPERM;
+	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
+	    ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
+		return -EPERM;
+	}
+
 	err = security_sb_remount(sb, data);
 	if (err)
 		return err;
@@ -1998,7 +2028,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
 		 */
 		if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
 			flags |= MS_NODEV;
-			mnt_flags |= MNT_NODEV;
+			mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
 		}
 	}
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 16fc05d816d4..8eeb8f6ab110 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -46,9 +46,14 @@ struct mnt_namespace;
 				 | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
 				 | MNT_READONLY)
 
+#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
 
 #define MNT_INTERNAL	0x4000
 
+#define MNT_LOCK_ATIME		0x040000
+#define MNT_LOCK_NOEXEC		0x080000
+#define MNT_LOCK_NOSUID		0x100000
+#define MNT_LOCK_NODEV		0x200000
 #define MNT_LOCK_READONLY	0x400000
 
 struct vfsmount {

From 99dd97b843562853e01a134e8d5c13a87d156795 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 28 Jul 2014 17:36:04 -0700
Subject: [PATCH 0353/1185] mnt: Change the default remount atime from relatime
 to the existing value

commit ffbc6f0ead47fa5a1dc9642b0331cb75c20a640e upstream.

Since March 2009 the kernel has treated the state that if no
MS_..ATIME flags are passed then the kernel defaults to relatime.

Defaulting to relatime instead of the existing atime state during a
remount is silly, and causes problems in practice for people who don't
specify any MS_...ATIME flags and to get the default filesystem atime
setting.  Those users may encounter a permission error because the
default atime setting does not work.

A default that does not work and causes permission problems is
ridiculous, so preserve the existing value to have a default
atime setting that is always guaranteed to work.

Using the default atime setting in this way is particularly
interesting for applications built to run in restricted userspace
environments without /proc mounted, as the existing atime mount
options of a filesystem can not be read from /proc/mounts.

In practice this fixes user space that uses the default atime
setting on remount that are broken by the permission checks
keeping less privileged users from changing more privileged users
atime settings.

Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/namespace.c b/fs/namespace.c
index 99748ff4065f..00409add4d96 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2346,6 +2346,14 @@ long do_mount(const char *dev_name, const char *dir_name,
 	if (flags & MS_RDONLY)
 		mnt_flags |= MNT_READONLY;
 
+	/* The default atime for remount is preservation */
+	if ((flags & MS_REMOUNT) &&
+	    ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
+		       MS_STRICTATIME)) == 0)) {
+		mnt_flags &= ~MNT_ATIME_MASK;
+		mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
+	}
+
 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
 		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
 		   MS_STRICTATIME);

From bbeed681a5d5f845fad2c097920ca8493f2419f6 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 29 Jul 2014 15:50:44 -0700
Subject: [PATCH 0354/1185] mnt: Add tests for unprivileged remount cases that
 have found to be faulty

commit db181ce011e3c033328608299cd6fac06ea50130 upstream.

Kenton Varda <kenton@sandstorm.io> discovered that by remounting a
read-only bind mount read-only in a user namespace the
MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user
to the remount a read-only mount read-write.

Upon review of the code in remount it was discovered that the code allowed
nosuid, noexec, and nodev to be cleared.  It was also discovered that
the code was allowing the per mount atime flags to be changed.

The first naive patch to fix these issues contained the flaw that using
default atime settings when remounting a filesystem could be disallowed.

To avoid this problems in the future add tests to ensure unprivileged
remounts are succeeding and failing at the appropriate times.

Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/Makefile              |   1 +
 tools/testing/selftests/mount/Makefile        |  17 ++
 .../mount/unprivileged-remount-test.c         | 242 ++++++++++++++++++
 3 files changed, 260 insertions(+)
 create mode 100644 tools/testing/selftests/mount/Makefile
 create mode 100644 tools/testing/selftests/mount/unprivileged-remount-test.c

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 0a63658065f0..2cee2b79b4de 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -4,6 +4,7 @@ TARGETS += efivarfs
 TARGETS += kcmp
 TARGETS += memory-hotplug
 TARGETS += mqueue
+TARGETS += mount
 TARGETS += net
 TARGETS += ptrace
 TARGETS += vm
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
new file mode 100644
index 000000000000..337d853c2b72
--- /dev/null
+++ b/tools/testing/selftests/mount/Makefile
@@ -0,0 +1,17 @@
+# Makefile for mount selftests.
+
+all: unprivileged-remount-test
+
+unprivileged-remount-test: unprivileged-remount-test.c
+	gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test
+
+# Allow specific tests to be selected.
+test_unprivileged_remount: unprivileged-remount-test
+	@if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+
+run_tests: all test_unprivileged_remount
+
+clean:
+	rm -f unprivileged-remount-test
+
+.PHONY: all test_unprivileged_remount
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
new file mode 100644
index 000000000000..1b3ff2fda4d0
--- /dev/null
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -0,0 +1,242 @@
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#ifndef CLONE_NEWNS
+# define CLONE_NEWNS 0x00020000
+#endif
+#ifndef CLONE_NEWUTS
+# define CLONE_NEWUTS 0x04000000
+#endif
+#ifndef CLONE_NEWIPC
+# define CLONE_NEWIPC 0x08000000
+#endif
+#ifndef CLONE_NEWNET
+# define CLONE_NEWNET 0x40000000
+#endif
+#ifndef CLONE_NEWUSER
+# define CLONE_NEWUSER 0x10000000
+#endif
+#ifndef CLONE_NEWPID
+# define CLONE_NEWPID 0x20000000
+#endif
+
+#ifndef MS_RELATIME
+#define MS_RELATIME (1 << 21)
+#endif
+#ifndef MS_STRICTATIME
+#define MS_STRICTATIME (1 << 24)
+#endif
+
+static void die(char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(EXIT_FAILURE);
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+	char buf[4096];
+	int fd;
+	ssize_t written;
+	int buf_len;
+	va_list ap;
+
+	va_start(ap, fmt);
+	buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+	va_end(ap);
+	if (buf_len < 0) {
+		die("vsnprintf failed: %s\n",
+		    strerror(errno));
+	}
+	if (buf_len >= sizeof(buf)) {
+		die("vsnprintf output truncated\n");
+	}
+
+	fd = open(filename, O_WRONLY);
+	if (fd < 0) {
+		die("open of %s failed: %s\n",
+		    filename, strerror(errno));
+	}
+	written = write(fd, buf, buf_len);
+	if (written != buf_len) {
+		if (written >= 0) {
+			die("short write to %s\n", filename);
+		} else {
+			die("write to %s failed: %s\n",
+				filename, strerror(errno));
+		}
+	}
+	if (close(fd) != 0) {
+		die("close of %s failed: %s\n",
+			filename, strerror(errno));
+	}
+}
+
+static void create_and_enter_userns(void)
+{
+	uid_t uid;
+	gid_t gid;
+
+	uid = getuid();
+	gid = getgid();
+
+	if (unshare(CLONE_NEWUSER) !=0) {
+		die("unshare(CLONE_NEWUSER) failed: %s\n",
+			strerror(errno));
+	}
+
+	write_file("/proc/self/uid_map", "0 %d 1", uid);
+	write_file("/proc/self/gid_map", "0 %d 1", gid);
+
+	if (setgroups(0, NULL) != 0) {
+		die("setgroups failed: %s\n",
+			strerror(errno));
+	}
+	if (setgid(0) != 0) {
+		die ("setgid(0) failed %s\n",
+			strerror(errno));
+	}
+	if (setuid(0) != 0) {
+		die("setuid(0) failed %s\n",
+			strerror(errno));
+	}
+}
+
+static
+bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
+{
+	pid_t child;
+
+	child = fork();
+	if (child == -1) {
+		die("fork failed: %s\n",
+			strerror(errno));
+	}
+	if (child != 0) { /* parent */
+		pid_t pid;
+		int status;
+		pid = waitpid(child, &status, 0);
+		if (pid == -1) {
+			die("waitpid failed: %s\n",
+				strerror(errno));
+		}
+		if (pid != child) {
+			die("waited for %d got %d\n",
+				child, pid);
+		}
+		if (!WIFEXITED(status)) {
+			die("child did not terminate cleanly\n");
+		}
+		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+	}
+
+	create_and_enter_userns();
+	if (unshare(CLONE_NEWNS) != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+
+	if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) {
+		die("mount of /tmp failed: %s\n",
+			strerror(errno));
+	}
+
+	create_and_enter_userns();
+
+	if (unshare(CLONE_NEWNS) != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+
+	if (mount("/tmp", "/tmp", "none",
+		  MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) {
+		/* system("cat /proc/self/mounts"); */
+		die("remount of /tmp failed: %s\n",
+		    strerror(errno));
+	}
+
+	if (mount("/tmp", "/tmp", "none",
+		  MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) {
+		/* system("cat /proc/self/mounts"); */
+		die("remount of /tmp with invalid flags "
+		    "succeeded unexpectedly\n");
+	}
+	exit(EXIT_SUCCESS);
+}
+
+static bool test_unpriv_remount_simple(int mount_flags)
+{
+	return test_unpriv_remount(mount_flags, mount_flags, 0);
+}
+
+static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
+{
+	return test_unpriv_remount(mount_flags, mount_flags, invalid_flags);
+}
+
+int main(int argc, char **argv)
+{
+	if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) {
+		die("MS_RDONLY malfunctions\n");
+	}
+	if (!test_unpriv_remount_simple(MS_NODEV)) {
+		die("MS_NODEV malfunctions\n");
+	}
+	if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) {
+		die("MS_NOSUID malfunctions\n");
+	}
+	if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) {
+		die("MS_NOEXEC malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV,
+				       MS_NOATIME|MS_NODEV))
+	{
+		die("MS_RELATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV,
+				       MS_NOATIME|MS_NODEV))
+	{
+		die("MS_STRICTATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV,
+				       MS_STRICTATIME|MS_NODEV))
+	{
+		die("MS_RELATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV,
+				       MS_NOATIME|MS_NODEV))
+	{
+		die("MS_RELATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV,
+				       MS_NOATIME|MS_NODEV))
+	{
+		die("MS_RELATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV,
+				       MS_STRICTATIME|MS_NODEV))
+	{
+		die("MS_RELATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV,
+				 MS_NOATIME|MS_NODEV))
+	{
+		die("Default atime malfunctions\n");
+	}
+	return EXIT_SUCCESS;
+}

From 819f3e7ae9c1ad79b7f9583ea687cdcebe7a0fe1 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Tue, 15 Jul 2014 12:25:28 +0400
Subject: [PATCH 0355/1185] Bluetooth: never linger on process exit

commit 093facf3634da1b0c2cc7ed106f1983da901bbab upstream.

If the current process is exiting, lingering on socket close will make
it unkillable, so we should avoid it.

Reproducer:

  #include <sys/types.h>
  #include <sys/socket.h>

  #define BTPROTO_L2CAP   0
  #define BTPROTO_SCO     2
  #define BTPROTO_RFCOMM  3

  int main()
  {
          int fd;
          struct linger ling;

          fd = socket(PF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM);
          //or: fd = socket(PF_BLUETOOTH, SOCK_DGRAM, BTPROTO_L2CAP);
          //or: fd = socket(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_SCO);

          ling.l_onoff = 1;
          ling.l_linger = 1000000000;
          setsockopt(fd, SOL_SOCKET, SO_LINGER, &ling, sizeof(ling));

          return 0;
  }

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bluetooth/l2cap_sock.c  | 3 ++-
 net/bluetooth/rfcomm/sock.c | 3 ++-
 net/bluetooth/sco.c         | 6 ++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 302d29b3744d..5f36f70ce44d 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -887,7 +887,8 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
 		l2cap_chan_close(chan, 0);
 		lock_sock(sk);
 
-		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+		    !(current->flags & PF_EXITING))
 			err = bt_sock_wait_state(sk, BT_CLOSED,
 						 sk->sk_lingertime);
 	}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index c1c6028e389a..7ca014daa5ab 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -887,7 +887,8 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how)
 		sk->sk_shutdown = SHUTDOWN_MASK;
 		__rfcomm_sock_close(sk);
 
-		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+		    !(current->flags & PF_EXITING))
 			err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
 	}
 	release_sock(sk);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 2bb1d3a5e76b..c9ae6b703c13 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -858,7 +858,8 @@ static int sco_sock_shutdown(struct socket *sock, int how)
 		sco_sock_clear_timer(sk);
 		__sco_sock_close(sk);
 
-		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+		    !(current->flags & PF_EXITING))
 			err = bt_sock_wait_state(sk, BT_CLOSED,
 						 sk->sk_lingertime);
 	}
@@ -878,7 +879,8 @@ static int sco_sock_release(struct socket *sock)
 
 	sco_sock_close(sk);
 
-	if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) {
+	if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+	    !(current->flags & PF_EXITING)) {
 		lock_sock(sk);
 		err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
 		release_sock(sk);

From 07b41b34490bb0286a1537afe0190bb197f973eb Mon Sep 17 00:00:00 2001
From: Vignesh Raman <Vignesh_Raman@mentor.com>
Date: Tue, 22 Jul 2014 19:24:25 +0530
Subject: [PATCH 0356/1185] Bluetooth: Avoid use of session socket after the
 session gets freed

commit 32333edb82fb2009980eefc5518100068147ab82 upstream.

The commits 08c30aca9e698faddebd34f81e1196295f9dc063 "Bluetooth: Remove
RFCOMM session refcnt" and 8ff52f7d04d9cc31f1e81dcf9a2ba6335ed34905
"Bluetooth: Return RFCOMM session ptrs to avoid freed session"
allow rfcomm_recv_ua and rfcomm_session_close to delete the session
(and free the corresponding socket) and propagate NULL session pointer
to the upper callers.

Additional fix is required to terminate the loop in rfcomm_process_rx
function to avoid use of freed 'sk' memory.

The issue is only reproducible with kernel option CONFIG_PAGE_POISONING
enabled making freed memory being changed and filled up with fixed char
value used to unmask use-after-free issues.

Signed-off-by: Vignesh Raman <Vignesh_Raman@mentor.com>
Signed-off-by: Vitaly Kuzmichev <Vitaly_Kuzmichev@mentor.com>
Acked-by: Dean Jenkins <Dean_Jenkins@mentor.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bluetooth/rfcomm/core.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index ca957d34b0c8..19ba192e9dbf 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1857,10 +1857,13 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
 	/* Get data directly from socket receive queue without copying it. */
 	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
 		skb_orphan(skb);
-		if (!skb_linearize(skb))
+		if (!skb_linearize(skb)) {
 			s = rfcomm_recv_frame(s, skb);
-		else
+			if (!s)
+				break;
+		} else {
 			kfree_skb(skb);
+		}
 	}
 
 	if (s && (sk->sk_state == BT_CLOSED))

From 318a3d59cea8f6790a364f4f8ab24a7aa9514ada Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 13 Aug 2014 09:57:07 +1000
Subject: [PATCH 0357/1185] md/raid6: avoid data corruption during recovery of
 double-degraded RAID6

commit 9c4bdf697c39805078392d5ddbbba5ae5680e0dd upstream.

During recovery of a double-degraded RAID6 it is possible for
some blocks not to be recovered properly, leading to corruption.

If a write happens to one block in a stripe that would be written to a
missing device, and at the same time that stripe is recovering data
to the other missing device, then that recovered data may not be written.

This patch skips, in the double-degraded case, an optimisation that is
only safe for single-degraded arrays.

Bug was introduced in 2.6.32 and fix is suitable for any kernel since
then.  In an older kernel with separate handle_stripe5() and
handle_stripe6() functions the patch must change handle_stripe6().

Fixes: 6c0069c0ae9659e3a91b68eaed06a5c6c37f45c8
Cc: Yuri Tikhonov <yur@emcraft.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Reported-by: "Manibalan P" <pmanibalan@amiindia.co.in>
Tested-by: "Manibalan P" <pmanibalan@amiindia.co.in>
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1090423
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid5.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5e3c25d4562c..774f81423d78 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3561,6 +3561,8 @@ static void handle_stripe(struct stripe_head *sh)
 				set_bit(R5_Wantwrite, &dev->flags);
 				if (prexor)
 					continue;
+				if (s.failed > 1)
+					continue;
 				if (!test_bit(R5_Insync, &dev->flags) ||
 				    ((i == sh->pd_idx || i == sh->qd_idx)  &&
 				     s.failed == 0))

From 1075d2bdf6334b8373c00978403a69a4d9fdb883 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 18 Aug 2014 13:56:38 +1000
Subject: [PATCH 0358/1185] md/raid10: fix memory leak when reshaping a RAID10.

commit ce0b0a46955d1bb389684a2605dbcaa990ba0154 upstream.

raid10 reshape clears unwanted bits from a bio->bi_flags using
a method which, while clumsy, worked until 3.10 when BIO_OWNS_VEC
was added.
Since then it clears that bit but shouldn't.  This results in a
memory leak.

So change to used the approved method of clearing unwanted bits.

As this causes a memory leak which can consume all of memory
the fix is suitable for -stable.

Fixes: a38352e0ac02dbbd4fa464dc22d1352b5fbd06fd
Reported-by: mdraid.pkoch@dfgh.net (Peter Koch)
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid10.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d2f8cd332b4a..973e26fc18c1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4398,7 +4398,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 	read_bio->bi_private = r10_bio;
 	read_bio->bi_end_io = end_sync_read;
 	read_bio->bi_rw = READ;
-	read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
+	read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
 	read_bio->bi_flags |= 1 << BIO_UPTODATE;
 	read_bio->bi_vcnt = 0;
 	read_bio->bi_size = 0;

From 26584e18c463bcf54fbd97e7c76f6f00ba13b1c9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 18 Aug 2014 13:59:50 +1000
Subject: [PATCH 0359/1185] md/raid10: Fix memory leak when raid10 reshape
 completes.

commit b39685526f46976bcd13aa08c82480092befa46c upstream.

When a raid10 commences a resync/recovery/reshape it allocates
some buffer space.
When a resync/recovery completes the buffer space is freed.  But not
when the reshape completes.
This can result in a small memory leak.

There is a subtle side-effect of this bug.  When a RAID10 is reshaped
to a larger array (more devices), the reshape is immediately followed
by a "resync" of the new space.  This "resync" will use the buffer
space which was allocated for "reshape".  This can cause problems
including a "BUG" in the SCSI layer.  So this is suitable for -stable.

Fixes: 3ea7daa5d7fde47cd41f4d56c2deb949114da9d6
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid10.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 973e26fc18c1..353c2b392652 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2948,6 +2948,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
 		 */
 		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
 			end_reshape(conf);
+			close_sync(conf);
 			return 0;
 		}
 

From 433d80d62562bfbc30cbf426206f29c9153bbc15 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 25 Jul 2014 09:11:33 -0500
Subject: [PATCH 0360/1185] RDMA/iwcm: Use a default listen backlog if needed

commit 2f0304d21867476394cd51a54e97f7273d112261 upstream.

If the user creates a listening cm_id with backlog of 0 the IWCM ends
up not allowing any connection requests at all.  The correct behavior
is for the IWCM to pick a default value if the user backlog parameter
is zero.

Lustre from version 1.8.8 onward uses a backlog of 0, which breaks
iwarp support without this fix.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/iwcm.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index c47c2034ca71..4293e89bbbdd 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -46,6 +46,7 @@
 #include <linux/completion.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/sysctl.h>
 
 #include <rdma/iw_cm.h>
 #include <rdma/ib_addr.h>
@@ -65,6 +66,20 @@ struct iwcm_work {
 	struct list_head free_list;
 };
 
+static unsigned int default_backlog = 256;
+
+static struct ctl_table_header *iwcm_ctl_table_hdr;
+static struct ctl_table iwcm_ctl_table[] = {
+	{
+		.procname	= "default_backlog",
+		.data		= &default_backlog,
+		.maxlen		= sizeof(default_backlog),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{ }
+};
+
 /*
  * The following services provide a mechanism for pre-allocating iwcm_work
  * elements.  The design pre-allocates them  based on the cm_id type:
@@ -419,6 +434,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 
+	if (!backlog)
+		backlog = default_backlog;
+
 	ret = alloc_work_entries(cm_id_priv, backlog);
 	if (ret)
 		return ret;
@@ -1024,11 +1042,20 @@ static int __init iw_cm_init(void)
 	if (!iwcm_wq)
 		return -ENOMEM;
 
+	iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
+						 iwcm_ctl_table);
+	if (!iwcm_ctl_table_hdr) {
+		pr_err("iw_cm: couldn't register sysctl paths\n");
+		destroy_workqueue(iwcm_wq);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
 static void __exit iw_cm_cleanup(void)
 {
+	unregister_net_sysctl_table(iwcm_ctl_table_hdr);
 	destroy_workqueue(iwcm_wq);
 }
 

From 9a9237c9e897fd73ee74689fc319a6412f4377f9 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 4 Aug 2014 12:43:26 +1000
Subject: [PATCH 0361/1185] xfs: quotacheck leaves dquot buffers without
 verifiers

commit 5fd364fee81a7888af806e42ed8a91c845894f2d upstream.

When running xfs/305, I noticed that quotacheck was flushing dquot
buffers that did not have the xfs_dquot_buf_ops verifiers attached:

XFS (vdb): _xfs_buf_ioapply: no ops on block 0x1dc8/0x1dc8
ffff880052489000: 44 51 01 04 00 00 65 b8 00 00 00 00 00 00 00 00  DQ....e.........
ffff880052489010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
ffff880052489020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
ffff880052489030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
CPU: 1 PID: 2376 Comm: mount Not tainted 3.16.0-rc2-dgc+ #306
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
 ffff88006fe38000 ffff88004a0ffae8 ffffffff81cf1cca 0000000000000001
 ffff88004a0ffb88 ffffffff814d50ca 000010004a0ffc70 0000000000000000
 ffff88006be56dc4 0000000000000021 0000000000001dc8 ffff88007c773d80
Call Trace:
 [<ffffffff81cf1cca>] dump_stack+0x45/0x56
 [<ffffffff814d50ca>] _xfs_buf_ioapply+0x3ca/0x3d0
 [<ffffffff810db520>] ? wake_up_state+0x20/0x20
 [<ffffffff814d51f5>] ? xfs_bdstrat_cb+0x55/0xb0
 [<ffffffff814d513b>] xfs_buf_iorequest+0x6b/0xd0
 [<ffffffff814d51f5>] xfs_bdstrat_cb+0x55/0xb0
 [<ffffffff814d53ab>] __xfs_buf_delwri_submit+0x15b/0x220
 [<ffffffff814d6040>] ? xfs_buf_delwri_submit+0x30/0x90
 [<ffffffff814d6040>] xfs_buf_delwri_submit+0x30/0x90
 [<ffffffff8150f89d>] xfs_qm_quotacheck+0x17d/0x3c0
 [<ffffffff81510591>] xfs_qm_mount_quotas+0x151/0x1e0
 [<ffffffff814ed01c>] xfs_mountfs+0x56c/0x7d0
 [<ffffffff814f0f12>] xfs_fs_fill_super+0x2c2/0x340
 [<ffffffff811c9fe4>] mount_bdev+0x194/0x1d0
 [<ffffffff814f0c50>] ? xfs_finish_flags+0x170/0x170
 [<ffffffff814ef0f5>] xfs_fs_mount+0x15/0x20
 [<ffffffff811ca8c9>] mount_fs+0x39/0x1b0
 [<ffffffff811e4d67>] vfs_kern_mount+0x67/0x120
 [<ffffffff811e757e>] do_mount+0x23e/0xad0
 [<ffffffff8117abde>] ? __get_free_pages+0xe/0x50
 [<ffffffff811e71e6>] ? copy_mount_options+0x36/0x150
 [<ffffffff811e8103>] SyS_mount+0x83/0xc0
 [<ffffffff81cfd40b>] tracesys+0xdd/0xe2

This was caused by dquot buffer readahead not attaching a verifier
structure to the buffer when readahead was issued, resulting in the
followup read of the buffer finding a valid buffer and so not
attaching new verifiers to the buffer as part of the read.

Also, when a verifier failure occurs, we then read the buffer
without verifiers. Attach the verifiers manually after this read so
that if the buffer is then written it will be verified that the
corruption has been repaired.

Further, when flushing a dquot we don't ask for a verifier when
reading in the dquot buffer the dquot belongs to. Most of the time
this isn't an issue because the buffer is still cached, but when it
is not cached it will result in writing the dquot buffer without
having the verfier attached.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/xfs/xfs_dquot.c | 3 ++-
 fs/xfs/xfs_qm.c    | 8 +++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 044e97a33c8d..bac3e1635b7d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1104,7 +1104,8 @@ xfs_qm_dqflush(
 	 * Get the buffer containing the on-disk dquot
 	 */
 	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
-				   mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL);
+				   mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+				   &xfs_dquot_buf_ops);
 	if (error)
 		goto out_unlock;
 
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b75c9bb6e71e..29d1ca567ed3 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -935,6 +935,12 @@ xfs_qm_dqiter_bufs(
 		if (error)
 			break;
 
+		/*
+		 * A corrupt buffer might not have a verifier attached, so
+		 * make sure we have the correct one attached before writeback
+		 * occurs.
+		 */
+		bp->b_ops = &xfs_dquot_buf_ops;
 		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
 		xfs_buf_delwri_queue(bp, buffer_list);
 		xfs_buf_relse(bp);
@@ -1018,7 +1024,7 @@ xfs_qm_dqiterate(
 					xfs_buf_readahead(mp->m_ddev_targp,
 					       XFS_FSB_TO_DADDR(mp, rablkno),
 					       mp->m_quotainfo->qi_dqchunklen,
-					       NULL);
+					       &xfs_dquot_buf_ops);
 					rablkno++;
 				}
 			}

From 3430681f33b23ae206fca816ab8abf82c7a0f0fc Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 2 Sep 2014 12:12:51 +1000
Subject: [PATCH 0362/1185] xfs: don't dirty buffers beyond EOF

commit 22e757a49cf010703fcb9c9b4ef793248c39b0c2 upstream.

generic/263 is failing fsx at this point with a page spanning
EOF that cannot be invalidated. The operations are:

1190 mapwrite   0x52c00 thru    0x5e569 (0xb96a bytes)
1191 mapread    0x5c000 thru    0x5d636 (0x1637 bytes)
1192 write      0x5b600 thru    0x771ff (0x1bc00 bytes)

where 1190 extents EOF from 0x54000 to 0x5e569. When the direct IO
write attempts to invalidate the cached page over this range, it
fails with -EBUSY and so any attempt to do page invalidation fails.

The real question is this: Why can't that page be invalidated after
it has been written to disk and cleaned?

Well, there's data on the first two buffers in the page (1k block
size, 4k page), but the third buffer on the page (i.e. beyond EOF)
is failing drop_buffers because it's bh->b_state == 0x3, which is
BH_Uptodate | BH_Dirty.  IOWs, there's dirty buffers beyond EOF. Say
what?

OK, set_buffer_dirty() is called on all buffers from
__set_page_buffers_dirty(), regardless of whether the buffer is
beyond EOF or not, which means that when we get to ->writepage,
we have buffers marked dirty beyond EOF that we need to clean.
So, we need to implement our own .set_page_dirty method that
doesn't dirty buffers beyond EOF.

This is messy because the buffer code is not meant to be shared
and it has interesting locking issues on the buffer dirty bits.
So just copy and paste it and then modify it to suit what we need.

Note: the solutions the other filesystems and generic block code use
of marking the buffers clean in ->writepage does not work for XFS.
It still leaves dirty buffers beyond EOF and invalidations still
fail. Hence rather than play whack-a-mole, this patch simply
prevents those buffers from being dirtied in the first place.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/xfs/xfs_aops.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 41a695048be7..cfbb4c1b2f17 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1661,11 +1661,72 @@ xfs_vm_readpages(
 	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
 }
 
+/*
+ * This is basically a copy of __set_page_dirty_buffers() with one
+ * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
+ * dirty, we'll never be able to clean them because we don't write buffers
+ * beyond EOF, and that means we can't invalidate pages that span EOF
+ * that have been marked dirty. Further, the dirty state can leak into
+ * the file interior if the file is extended, resulting in all sorts of
+ * bad things happening as the state does not match the underlying data.
+ *
+ * XXX: this really indicates that bufferheads in XFS need to die. Warts like
+ * this only exist because of bufferheads and how the generic code manages them.
+ */
+STATIC int
+xfs_vm_set_page_dirty(
+	struct page		*page)
+{
+	struct address_space	*mapping = page->mapping;
+	struct inode		*inode = mapping->host;
+	loff_t			end_offset;
+	loff_t			offset;
+	int			newly_dirty;
+
+	if (unlikely(!mapping))
+		return !TestSetPageDirty(page);
+
+	end_offset = i_size_read(inode);
+	offset = page_offset(page);
+
+	spin_lock(&mapping->private_lock);
+	if (page_has_buffers(page)) {
+		struct buffer_head *head = page_buffers(page);
+		struct buffer_head *bh = head;
+
+		do {
+			if (offset < end_offset)
+				set_buffer_dirty(bh);
+			bh = bh->b_this_page;
+			offset += 1 << inode->i_blkbits;
+		} while (bh != head);
+	}
+	newly_dirty = !TestSetPageDirty(page);
+	spin_unlock(&mapping->private_lock);
+
+	if (newly_dirty) {
+		/* sigh - __set_page_dirty() is static, so copy it here, too */
+		unsigned long flags;
+
+		spin_lock_irqsave(&mapping->tree_lock, flags);
+		if (page->mapping) {	/* Race with truncate? */
+			WARN_ON_ONCE(!PageUptodate(page));
+			account_page_dirtied(page, mapping);
+			radix_tree_tag_set(&mapping->page_tree,
+					page_index(page), PAGECACHE_TAG_DIRTY);
+		}
+		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	}
+	return newly_dirty;
+}
+
 const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
 	.readpages		= xfs_vm_readpages,
 	.writepage		= xfs_vm_writepage,
 	.writepages		= xfs_vm_writepages,
+	.set_page_dirty		= xfs_vm_set_page_dirty,
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
 	.write_begin		= xfs_vm_write_begin,

From 1025b461b3bb78860e071f7c0bad301c104ac94a Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 2 Sep 2014 12:12:52 +1000
Subject: [PATCH 0363/1185] xfs: don't zero partial page cache pages during
 O_DIRECT writes

commit 834ffca6f7e345a79f6f2e2d131b0dfba8a4b67a upstream.

Similar to direct IO reads, direct IO writes are using
truncate_pagecache_range to invalidate the page cache. This is
incorrect due to the sub-block zeroing in the page cache that
truncate_pagecache_range() triggers.

This patch fixes things by using invalidate_inode_pages2_range
instead.  It preserves the page cache invalidation, but won't zero
any pages.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/xfs/xfs_file.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a5f2042aec8b..1028717d95f8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -677,7 +677,15 @@ xfs_file_dio_aio_write(
 						    pos, -1);
 		if (ret)
 			goto out;
-		truncate_pagecache_range(VFS_I(ip), pos, -1);
+		/*
+		 * Invalidate whole pages. This can return an error if
+		 * we fail to invalidate a page, but this should never
+		 * happen on XFS. Warn if it does fail.
+		 */
+		ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+						pos >> PAGE_CACHE_SHIFT, -1);
+		WARN_ON_ONCE(ret);
+		ret = 0;
 	}
 
 	/*

From d96dbb069199aa4e5bf7406e67fa5d38cebdf2fb Mon Sep 17 00:00:00 2001
From: Chris Mason <clm@fb.com>
Date: Tue, 2 Sep 2014 12:12:52 +1000
Subject: [PATCH 0364/1185] xfs: don't zero partial page cache pages during
 O_DIRECT writes

commit 85e584da3212140ee80fd047f9058bbee0bc00d5 upstream.

xfs is using truncate_pagecache_range to invalidate the page cache
during DIO reads.  This is different from the other filesystems who
only invalidate pages during DIO writes.

truncate_pagecache_range is meant to be used when we are freeing the
underlying data structs from disk, so it will zero any partial
ranges in the page.  This means a DIO read can zero out part of the
page cache page, and it is possible the page will stay in cache.

buffered reads will find an up to date page with zeros instead of
the data actually on disk.

This patch fixes things by using invalidate_inode_pages2_range
instead.  It preserves the page cache invalidation, but won't zero
any pages.

[dchinner: catch error and warn if it fails. Comment.]

Signed-off-by: Chris Mason <clm@fb.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/xfs/xfs_file.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 1028717d95f8..9f457fedbcfc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -298,7 +298,16 @@ xfs_file_aio_read(
 				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
 				return ret;
 			}
-			truncate_pagecache_range(VFS_I(ip), pos, -1);
+
+			/*
+			 * Invalidate whole pages. This can return an error if
+			 * we fail to invalidate a page, but this should never
+			 * happen on XFS. Warn if it does fail.
+			 */
+			ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+						pos >> PAGE_CACHE_SHIFT, -1);
+			WARN_ON_ONCE(ret);
+			ret = 0;
 		}
 		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 	}

From b08633de6ba926a3412292b3239bf41a3f00912b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 31 Jul 2014 10:16:29 +1000
Subject: [PATCH 0365/1185] md/raid1,raid10: always abort recover on write
 error.

commit 2446dba03f9dabe0b477a126cbeb377854785b47 upstream.

Currently we don't abort recovery on a write error if the write error
to the recovering device was triggerd by normal IO (as opposed to
recovery IO).

This means that for one bitmap region, the recovery might write to the
recovering device for a few sectors, then not bother for subsequent
sectors (as it never writes to failed devices).  In this case
the bitmap bit will be cleared, but it really shouldn't.

The result is that if the recovering device fails and is then re-added
(after fixing whatever hardware problem triggerred the failure),
the second recovery won't redo the region it was in the middle of,
so some of the device will not be recovered properly.

If we abort the recovery, the region being processes will be cancelled
(bit not cleared) and the whole region will be retried.

As the bug can result in data corruption the patch is suitable for
-stable.  For kernels prior to 3.11 there is a conflict in raid10.c
which will require care.

Original-from: jiao hui <jiaohui@bwstor.com.cn>
Reported-and-tested-by: jiao hui <jiaohui@bwstor.com.cn>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid1.c  | 8 ++++----
 drivers/md/raid10.c | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 75771b2077c0..a176791509f6 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1406,12 +1406,12 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
 		mddev->degraded++;
 		set_bit(Faulty, &rdev->flags);
 		spin_unlock_irqrestore(&conf->device_lock, flags);
-		/*
-		 * if recovery is running, make sure it aborts.
-		 */
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	} else
 		set_bit(Faulty, &rdev->flags);
+	/*
+	 * if recovery is running, make sure it aborts.
+	 */
+	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
 	printk(KERN_ALERT
 	       "md/raid1:%s: Disk failure on %s, disabling device.\n"
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 353c2b392652..a1ea2a753912 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1681,11 +1681,11 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
 		spin_lock_irqsave(&conf->device_lock, flags);
 		mddev->degraded++;
 		spin_unlock_irqrestore(&conf->device_lock, flags);
-		/*
-		 * if recovery is running, make sure it aborts.
-		 */
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	}
+	/*
+	 * If recovery is running, make sure it aborts.
+	 */
+	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	set_bit(Blocked, &rdev->flags);
 	set_bit(Faulty, &rdev->flags);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);

From a64897279757e1e98b6b1d98e0bba561b0f88331 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Fri, 8 Aug 2014 12:43:39 +0400
Subject: [PATCH 0366/1185] libceph: set last_piece in
 ceph_msg_data_pages_cursor_init() correctly

commit 5f740d7e1531099b888410e6bab13f68da9b1a4d upstream.

Determining ->last_piece based on the value of ->page_offset + length
is incorrect because length here is the length of the entire message.
->last_piece set to false even if page array data item length is <=
PAGE_SIZE, which results in invalid length passed to
ceph_tcp_{send,recv}page() and causes various asserts to fire.

    # cat pages-cursor-init.sh
    #!/bin/bash
    rbd create --size 10 --image-format 2 foo
    FOO_DEV=$(rbd map foo)
    dd if=/dev/urandom of=$FOO_DEV bs=1M &>/dev/null
    rbd snap create foo@snap
    rbd snap protect foo@snap
    rbd clone foo@snap bar
    # rbd_resize calls librbd rbd_resize(), size is in bytes
    ./rbd_resize bar $(((4 << 20) + 512))
    rbd resize --size 10 bar
    BAR_DEV=$(rbd map bar)
    # trigger a 512-byte copyup -- 512-byte page array data item
    dd if=/dev/urandom of=$BAR_DEV bs=1M count=1 seek=5

The problem exists only in ceph_msg_data_pages_cursor_init(),
ceph_msg_data_pages_advance() does the right thing.  The size_t cast is
unnecessary.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@redhat.com>
Reviewed-by: Alex Elder <elder@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ceph/messenger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b9d7df175700..6ff7d9dc240f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -904,7 +904,7 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
 	BUG_ON(page_count > (int)USHRT_MAX);
 	cursor->page_count = (unsigned short)page_count;
 	BUG_ON(length > SIZE_MAX - cursor->page_offset);
-	cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
+	cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE;
 }
 
 static struct page *

From 2e1dbf27a941085ba21c23355006f10d297faec9 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Mon, 8 Sep 2014 17:25:34 +0400
Subject: [PATCH 0367/1185] libceph: add process_one_ticket() helper

commit 597cda357716a3cf8d994cb11927af917c8d71fa upstream.

Add a helper for processing individual cephx auth tickets.  Needed for
the next commit, which deals with allocating ticket buffers.  (Most of
the diff here is whitespace - view with git diff -b).

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ceph/auth_x.c | 228 +++++++++++++++++++++++++---------------------
 1 file changed, 124 insertions(+), 104 deletions(-)

diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 96238ba95f2b..0eb146dce1aa 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -129,17 +129,131 @@ static void remove_ticket_handler(struct ceph_auth_client *ac,
 	kfree(th);
 }
 
+static int process_one_ticket(struct ceph_auth_client *ac,
+			      struct ceph_crypto_key *secret,
+			      void **p, void *end,
+			      void *dbuf, void *ticket_buf)
+{
+	struct ceph_x_info *xi = ac->private;
+	int type;
+	u8 tkt_struct_v, blob_struct_v;
+	struct ceph_x_ticket_handler *th;
+	void *dp, *dend;
+	int dlen;
+	char is_enc;
+	struct timespec validity;
+	struct ceph_crypto_key old_key;
+	void *tp, *tpend;
+	struct ceph_timespec new_validity;
+	struct ceph_crypto_key new_session_key;
+	struct ceph_buffer *new_ticket_blob;
+	unsigned long new_expires, new_renew_after;
+	u64 new_secret_id;
+	int ret;
+
+	ceph_decode_need(p, end, sizeof(u32) + 1, bad);
+
+	type = ceph_decode_32(p);
+	dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
+
+	tkt_struct_v = ceph_decode_8(p);
+	if (tkt_struct_v != 1)
+		goto bad;
+
+	th = get_ticket_handler(ac, type);
+	if (IS_ERR(th)) {
+		ret = PTR_ERR(th);
+		goto out;
+	}
+
+	/* blob for me */
+	dlen = ceph_x_decrypt(secret, p, end, dbuf,
+			      TEMP_TICKET_BUF_LEN);
+	if (dlen <= 0) {
+		ret = dlen;
+		goto out;
+	}
+	dout(" decrypted %d bytes\n", dlen);
+	dp = dbuf;
+	dend = dp + dlen;
+
+	tkt_struct_v = ceph_decode_8(&dp);
+	if (tkt_struct_v != 1)
+		goto bad;
+
+	memcpy(&old_key, &th->session_key, sizeof(old_key));
+	ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
+	if (ret)
+		goto out;
+
+	ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
+	ceph_decode_timespec(&validity, &new_validity);
+	new_expires = get_seconds() + validity.tv_sec;
+	new_renew_after = new_expires - (validity.tv_sec / 4);
+	dout(" expires=%lu renew_after=%lu\n", new_expires,
+	     new_renew_after);
+
+	/* ticket blob for service */
+	ceph_decode_8_safe(p, end, is_enc, bad);
+	tp = ticket_buf;
+	if (is_enc) {
+		/* encrypted */
+		dout(" encrypted ticket\n");
+		dlen = ceph_x_decrypt(&old_key, p, end, ticket_buf,
+				      TEMP_TICKET_BUF_LEN);
+		if (dlen < 0) {
+			ret = dlen;
+			goto out;
+		}
+		dlen = ceph_decode_32(&tp);
+	} else {
+		/* unencrypted */
+		ceph_decode_32_safe(p, end, dlen, bad);
+		ceph_decode_need(p, end, dlen, bad);
+		ceph_decode_copy(p, ticket_buf, dlen);
+	}
+	tpend = tp + dlen;
+	dout(" ticket blob is %d bytes\n", dlen);
+	ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
+	blob_struct_v = ceph_decode_8(&tp);
+	new_secret_id = ceph_decode_64(&tp);
+	ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
+	if (ret)
+		goto out;
+
+	/* all is well, update our ticket */
+	ceph_crypto_key_destroy(&th->session_key);
+	if (th->ticket_blob)
+		ceph_buffer_put(th->ticket_blob);
+	th->session_key = new_session_key;
+	th->ticket_blob = new_ticket_blob;
+	th->validity = new_validity;
+	th->secret_id = new_secret_id;
+	th->expires = new_expires;
+	th->renew_after = new_renew_after;
+	dout(" got ticket service %d (%s) secret_id %lld len %d\n",
+	     type, ceph_entity_type_name(type), th->secret_id,
+	     (int)th->ticket_blob->vec.iov_len);
+	xi->have_keys |= th->service;
+
+out:
+	return ret;
+
+bad:
+	ret = -EINVAL;
+	goto out;
+}
+
 static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 				    struct ceph_crypto_key *secret,
 				    void *buf, void *end)
 {
-	struct ceph_x_info *xi = ac->private;
-	int num;
 	void *p = buf;
-	int ret;
 	char *dbuf;
 	char *ticket_buf;
 	u8 reply_struct_v;
+	u32 num;
+	int ret;
 
 	dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
 	if (!dbuf)
@@ -150,112 +264,18 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 	if (!ticket_buf)
 		goto out_dbuf;
 
-	ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
-	reply_struct_v = ceph_decode_8(&p);
+	ceph_decode_8_safe(&p, end, reply_struct_v, bad);
 	if (reply_struct_v != 1)
-		goto bad;
-	num = ceph_decode_32(&p);
+		return -EINVAL;
+
+	ceph_decode_32_safe(&p, end, num, bad);
 	dout("%d tickets\n", num);
+
 	while (num--) {
-		int type;
-		u8 tkt_struct_v, blob_struct_v;
-		struct ceph_x_ticket_handler *th;
-		void *dp, *dend;
-		int dlen;
-		char is_enc;
-		struct timespec validity;
-		struct ceph_crypto_key old_key;
-		void *tp, *tpend;
-		struct ceph_timespec new_validity;
-		struct ceph_crypto_key new_session_key;
-		struct ceph_buffer *new_ticket_blob;
-		unsigned long new_expires, new_renew_after;
-		u64 new_secret_id;
-
-		ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
-
-		type = ceph_decode_32(&p);
-		dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
-
-		tkt_struct_v = ceph_decode_8(&p);
-		if (tkt_struct_v != 1)
-			goto bad;
-
-		th = get_ticket_handler(ac, type);
-		if (IS_ERR(th)) {
-			ret = PTR_ERR(th);
-			goto out;
-		}
-
-		/* blob for me */
-		dlen = ceph_x_decrypt(secret, &p, end, dbuf,
-				      TEMP_TICKET_BUF_LEN);
-		if (dlen <= 0) {
-			ret = dlen;
-			goto out;
-		}
-		dout(" decrypted %d bytes\n", dlen);
-		dend = dbuf + dlen;
-		dp = dbuf;
-
-		tkt_struct_v = ceph_decode_8(&dp);
-		if (tkt_struct_v != 1)
-			goto bad;
-
-		memcpy(&old_key, &th->session_key, sizeof(old_key));
-		ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
+		ret = process_one_ticket(ac, secret, &p, end,
+					 dbuf, ticket_buf);
 		if (ret)
 			goto out;
-
-		ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
-		ceph_decode_timespec(&validity, &new_validity);
-		new_expires = get_seconds() + validity.tv_sec;
-		new_renew_after = new_expires - (validity.tv_sec / 4);
-		dout(" expires=%lu renew_after=%lu\n", new_expires,
-		     new_renew_after);
-
-		/* ticket blob for service */
-		ceph_decode_8_safe(&p, end, is_enc, bad);
-		tp = ticket_buf;
-		if (is_enc) {
-			/* encrypted */
-			dout(" encrypted ticket\n");
-			dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf,
-					      TEMP_TICKET_BUF_LEN);
-			if (dlen < 0) {
-				ret = dlen;
-				goto out;
-			}
-			dlen = ceph_decode_32(&tp);
-		} else {
-			/* unencrypted */
-			ceph_decode_32_safe(&p, end, dlen, bad);
-			ceph_decode_need(&p, end, dlen, bad);
-			ceph_decode_copy(&p, ticket_buf, dlen);
-		}
-		tpend = tp + dlen;
-		dout(" ticket blob is %d bytes\n", dlen);
-		ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
-		blob_struct_v = ceph_decode_8(&tp);
-		new_secret_id = ceph_decode_64(&tp);
-		ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
-		if (ret)
-			goto out;
-
-		/* all is well, update our ticket */
-		ceph_crypto_key_destroy(&th->session_key);
-		if (th->ticket_blob)
-			ceph_buffer_put(th->ticket_blob);
-		th->session_key = new_session_key;
-		th->ticket_blob = new_ticket_blob;
-		th->validity = new_validity;
-		th->secret_id = new_secret_id;
-		th->expires = new_expires;
-		th->renew_after = new_renew_after;
-		dout(" got ticket service %d (%s) secret_id %lld len %d\n",
-		     type, ceph_entity_type_name(type), th->secret_id,
-		     (int)th->ticket_blob->vec.iov_len);
-		xi->have_keys |= th->service;
 	}
 
 	ret = 0;

From 9c38ff707bbe0635121f8fb6f108ee376cff90fe Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Tue, 9 Sep 2014 19:39:15 +0400
Subject: [PATCH 0368/1185] libceph: do not hard code max auth ticket len

commit c27a3e4d667fdcad3db7b104f75659478e0c68d8 upstream.

We hard code cephx auth ticket buffer size to 256 bytes.  This isn't
enough for any moderate setups and, in case tickets themselves are not
encrypted, leads to buffer overflows (ceph_x_decrypt() errors out, but
ceph_decode_copy() doesn't - it's just a memcpy() wrapper).  Since the
buffer is allocated dynamically anyway, allocated it a bit later, at
the point where we know how much is going to be needed.

Fixes: http://tracker.ceph.com/issues/8979

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ceph/auth_x.c | 64 +++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 0eb146dce1aa..de6662b14e1f 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -13,8 +13,6 @@
 #include "auth_x.h"
 #include "auth_x_protocol.h"
 
-#define TEMP_TICKET_BUF_LEN	256
-
 static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
 
 static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
@@ -64,7 +62,7 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret,
 }
 
 static int ceph_x_decrypt(struct ceph_crypto_key *secret,
-			  void **p, void *end, void *obuf, size_t olen)
+			  void **p, void *end, void **obuf, size_t olen)
 {
 	struct ceph_x_encrypt_header head;
 	size_t head_len = sizeof(head);
@@ -75,8 +73,14 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret,
 		return -EINVAL;
 
 	dout("ceph_x_decrypt len %d\n", len);
-	ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen,
-			    *p, len);
+	if (*obuf == NULL) {
+		*obuf = kmalloc(len, GFP_NOFS);
+		if (!*obuf)
+			return -ENOMEM;
+		olen = len;
+	}
+
+	ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len);
 	if (ret)
 		return ret;
 	if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
@@ -131,18 +135,19 @@ static void remove_ticket_handler(struct ceph_auth_client *ac,
 
 static int process_one_ticket(struct ceph_auth_client *ac,
 			      struct ceph_crypto_key *secret,
-			      void **p, void *end,
-			      void *dbuf, void *ticket_buf)
+			      void **p, void *end)
 {
 	struct ceph_x_info *xi = ac->private;
 	int type;
 	u8 tkt_struct_v, blob_struct_v;
 	struct ceph_x_ticket_handler *th;
+	void *dbuf = NULL;
 	void *dp, *dend;
 	int dlen;
 	char is_enc;
 	struct timespec validity;
 	struct ceph_crypto_key old_key;
+	void *ticket_buf = NULL;
 	void *tp, *tpend;
 	struct ceph_timespec new_validity;
 	struct ceph_crypto_key new_session_key;
@@ -167,8 +172,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	}
 
 	/* blob for me */
-	dlen = ceph_x_decrypt(secret, p, end, dbuf,
-			      TEMP_TICKET_BUF_LEN);
+	dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0);
 	if (dlen <= 0) {
 		ret = dlen;
 		goto out;
@@ -195,20 +199,25 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 
 	/* ticket blob for service */
 	ceph_decode_8_safe(p, end, is_enc, bad);
-	tp = ticket_buf;
 	if (is_enc) {
 		/* encrypted */
 		dout(" encrypted ticket\n");
-		dlen = ceph_x_decrypt(&old_key, p, end, ticket_buf,
-				      TEMP_TICKET_BUF_LEN);
+		dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0);
 		if (dlen < 0) {
 			ret = dlen;
 			goto out;
 		}
+		tp = ticket_buf;
 		dlen = ceph_decode_32(&tp);
 	} else {
 		/* unencrypted */
 		ceph_decode_32_safe(p, end, dlen, bad);
+		ticket_buf = kmalloc(dlen, GFP_NOFS);
+		if (!ticket_buf) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		tp = ticket_buf;
 		ceph_decode_need(p, end, dlen, bad);
 		ceph_decode_copy(p, ticket_buf, dlen);
 	}
@@ -237,6 +246,8 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	xi->have_keys |= th->service;
 
 out:
+	kfree(ticket_buf);
+	kfree(dbuf);
 	return ret;
 
 bad:
@@ -249,21 +260,10 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 				    void *buf, void *end)
 {
 	void *p = buf;
-	char *dbuf;
-	char *ticket_buf;
 	u8 reply_struct_v;
 	u32 num;
 	int ret;
 
-	dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
-	if (!dbuf)
-		return -ENOMEM;
-
-	ret = -ENOMEM;
-	ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
-	if (!ticket_buf)
-		goto out_dbuf;
-
 	ceph_decode_8_safe(&p, end, reply_struct_v, bad);
 	if (reply_struct_v != 1)
 		return -EINVAL;
@@ -272,22 +272,15 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 	dout("%d tickets\n", num);
 
 	while (num--) {
-		ret = process_one_ticket(ac, secret, &p, end,
-					 dbuf, ticket_buf);
+		ret = process_one_ticket(ac, secret, &p, end);
 		if (ret)
-			goto out;
+			return ret;
 	}
 
-	ret = 0;
-out:
-	kfree(ticket_buf);
-out_dbuf:
-	kfree(dbuf);
-	return ret;
+	return 0;
 
 bad:
-	ret = -EINVAL;
-	goto out;
+	return -EINVAL;
 }
 
 static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
@@ -603,13 +596,14 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
 	struct ceph_x_ticket_handler *th;
 	int ret = 0;
 	struct ceph_x_authorize_reply reply;
+	void *preply = &reply;
 	void *p = au->reply_buf;
 	void *end = p + sizeof(au->reply_buf);
 
 	th = get_ticket_handler(ac, au->service);
 	if (IS_ERR(th))
 		return PTR_ERR(th);
-	ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
+	ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply));
 	if (ret < 0)
 		return ret;
 	if (ret != sizeof(reply))

From 9b1eceeba2e3aeefa4725f51ca617fe683ddfa65 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Fri, 18 Jul 2014 18:25:52 +0400
Subject: [PATCH 0369/1185] CIFS: Fix STATUS_CANNOT_DELETE error mapping for
 SMB2

commit 21496687a79424572f46a84c690d331055f4866f upstream.

The existing mapping causes unlink() call to return error after delete
operation. Changing the mapping to -EACCES makes the client process
the call like CIFS protocol does - reset dos attributes with ATTR_READONLY
flag masked off and retry the operation.

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/smb2maperror.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 7c2f45c06fc2..824696fb24db 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -605,7 +605,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
 	{STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"},
 	{STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"},
 	{STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"},
-	{STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"},
+	{STATUS_CANNOT_DELETE, -EACCES, "STATUS_CANNOT_DELETE"},
 	{STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"},
 	{STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"},
 	{STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"},

From 8f516091b6c747c2db53b33c0eb955c5b3975792 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Fri, 27 Jun 2014 10:33:11 +0400
Subject: [PATCH 0370/1185] CIFS: Fix async reading on reconnects

commit 038bc961c31b070269ecd07349a7ee2e839d4fec upstream.

If we get into read_into_pages() from cifs_readv_receive() and then
loose a network, we issue cifs_reconnect that moves all mids to
a private list and issue their callbacks. The callback of the async
read request sets a mid to retry, frees it and wakes up a process
that waits on the rdata completion.

After the connection is established we return from read_into_pages()
with a short read, use the mid that was freed before and try to read
the remaining data from the a newly created socket. Both actions are
not what we want to do. In reconnect cases (-EAGAIN) we should not
mask off the error with a short read but should return the error
code instead.

Acked-by: Jeff Layton <jlayton@samba.org>
Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8b0c656f2ab2..97b03895ac8c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2809,7 +2809,7 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
 		total_read += result;
 	}
 
-	return total_read > 0 ? total_read : result;
+	return total_read > 0 && result != -EAGAIN ? total_read : result;
 }
 
 static ssize_t
@@ -3232,7 +3232,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
 		total_read += result;
 	}
 
-	return total_read > 0 ? total_read : result;
+	return total_read > 0 && result != -EAGAIN ? total_read : result;
 }
 
 static int cifs_readpages(struct file *file, struct address_space *mapping,

From c6bef3b64c1e605f4059189153de3251855846b9 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Sun, 17 Aug 2014 00:22:24 -0500
Subject: [PATCH 0371/1185] CIFS: Possible null ptr deref in SMB2_tcon

commit 18f39e7be0121317550d03e267e3ebd4dbfbb3ce upstream.

As Raphael Geissert pointed out, tcon_error_exit can dereference tcon
and there is one path in which tcon can be null.

Signed-off-by: Steve French <smfrench@gmail.com>
Reported-by: Raphael Geissert <geissert@debian.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/smb2pdu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index c7a6fd87bb6e..184c55820d1f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -809,7 +809,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 tcon_error_exit:
 	if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
 		cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
-		tcon->bad_network_name = true;
+		if (tcon)
+			tcon->bad_network_name = true;
 	}
 	goto tcon_exit;
 }

From 4cf2ef68d23fcdea34b72325af7b9ac5f35f52ef Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Mon, 18 Aug 2014 20:49:58 +0400
Subject: [PATCH 0372/1185] CIFS: Fix wrong directory attributes after rename

commit b46799a8f28c43c5264ac8d8ffa28b311b557e03 upstream.

When we requests rename we also need to update attributes
of both source and target parent directories. Not doing it
causes generic/309 xfstest to fail on SMB2 mounts. Fix this
by marking these directories for force revalidating.

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/inode.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9d463501348f..c9bce9b43855 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1647,6 +1647,12 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
 				    target_dentry, to_name);
 	}
 
+	/* force revalidate to go get info when needed */
+	CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
+
+	source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime =
+		target_dir->i_mtime = current_fs_time(source_dir->i_sb);
+
 cifs_rename_exit:
 	kfree(info_buf_source);
 	kfree(from_name);

From 0c17ceb6f88f685e8a23019052891a198434aba8 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Fri, 22 Aug 2014 13:32:11 +0400
Subject: [PATCH 0373/1185] CIFS: Fix wrong filename length for SMB2

commit 1bbe4997b13de903c421c1cc78440e544b5f9064 upstream.

The existing code uses the old MAX_NAME constant. This causes
XFS test generic/013 to fail. Fix it by replacing MAX_NAME with
PATH_MAX that SMB1 uses. Also remove an unused MAX_NAME constant
definition.

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/cifsglob.h  | 5 -----
 fs/cifs/smb2file.c  | 2 +-
 fs/cifs/smb2inode.c | 2 +-
 fs/cifs/smb2ops.c   | 2 +-
 fs/cifs/smb2pdu.c   | 2 +-
 5 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e2c2d96491fa..52480240168e 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -74,11 +74,6 @@
 #define SERVER_NAME_LENGTH 40
 #define SERVER_NAME_LEN_WITH_NULL     (SERVER_NAME_LENGTH + 1)
 
-/* used to define string lengths for reversing unicode strings */
-/*         (256+1)*2 = 514                                     */
-/*           (max path length + 1 for null) * 2 for unicode    */
-#define MAX_NAME 514
-
 /* SMB echo "timeout" -- FIXME: tunable? */
 #define SMB_ECHO_INTERVAL (60 * HZ)
 
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 5da1b55a2258..d801f63cddd0 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -73,7 +73,7 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
 		goto out;
 	}
 
-	smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+	smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
 			    GFP_KERNEL);
 	if (smb2_data == NULL) {
 		rc = -ENOMEM;
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index fff6dfba6204..6d535797ec76 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -123,7 +123,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 
 	*adjust_tz = false;
 
-	smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+	smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
 			    GFP_KERNEL);
 	if (smb2_data == NULL)
 		return -ENOMEM;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e2756bb40b4d..fe7ac989c6c4 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -243,7 +243,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
 	int rc;
 	struct smb2_file_all_info *smb2_data;
 
-	smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+	smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
 			    GFP_KERNEL);
 	if (smb2_data == NULL)
 		return -ENOMEM;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 184c55820d1f..e37790841446 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1204,7 +1204,7 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 {
 	return query_info(xid, tcon, persistent_fid, volatile_fid,
 			  FILE_ALL_INFORMATION,
-			  sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+			  sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
 			  sizeof(struct smb2_file_all_info), data);
 }
 

From 659c639916aabca0165fbd9f8a966055a519e44b Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 26 Aug 2014 19:04:44 +0400
Subject: [PATCH 0374/1185] CIFS: Fix wrong restart readdir for SMB1

commit f736906a7669a77cf8cabdcbcf1dc8cb694e12ef upstream.

The existing code calls server->ops->close() that is not
right. This causes XFS test generic/310 to fail. Fix this
by using server->ops->closedir() function.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/readdir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 036279c064ff..87d125f682cd 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -585,8 +585,8 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon,
 		if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
 			cfile->invalidHandle = true;
 			spin_unlock(&cifs_file_list_lock);
-			if (server->ops->close)
-				server->ops->close(xid, tcon, &cfile->fid);
+			if (server->ops->close_dir)
+				server->ops->close_dir(xid, tcon, &cfile->fid);
 		} else
 			spin_unlock(&cifs_file_list_lock);
 		if (cfile->srch_inf.ntwrk_buf_start) {

From a9d28db622269c29825f13c28bae0f587f77aada Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Thu, 3 Jul 2014 10:35:26 +0800
Subject: [PATCH 0375/1185] mtd/ftl: fix the double free of the buffers
 allocated in build_maps()

commit a152056c912db82860a8b4c23d0bd3a5aa89e363 upstream.

I got the following panic on my fsl p5020ds board.

  Unable to handle kernel paging request for data at address 0x7375627379737465
  Faulting instruction address: 0xc000000000100778
  Oops: Kernel access of bad area, sig: 11 [#1]
  SMP NR_CPUS=24 CoreNet Generic
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.15.0-next-20140613 #145
  task: c0000000fe080000 ti: c0000000fe088000 task.ti: c0000000fe088000
  NIP: c000000000100778 LR: c00000000010073c CTR: 0000000000000000
  REGS: c0000000fe08aa00 TRAP: 0300   Not tainted  (3.15.0-next-20140613)
  MSR: 0000000080029000 <CE,EE,ME>  CR: 24ad2e24  XER: 00000000
  DEAR: 7375627379737465 ESR: 0000000000000000 SOFTE: 1
  GPR00: c0000000000c99b0 c0000000fe08ac80 c0000000009598e0 c0000000fe001d80
  GPR04: 00000000000000d0 0000000000000913 c000000007902b20 0000000000000000
  GPR08: c0000000feaae888 0000000000000000 0000000007091000 0000000000200200
  GPR12: 0000000028ad2e28 c00000000fff4000 c0000000007abe08 0000000000000000
  GPR16: c0000000007ab160 c0000000007aaf98 c00000000060ba68 c0000000007abda8
  GPR20: c0000000007abde8 c0000000feaea6f8 c0000000feaea708 c0000000007abd10
  GPR24: c000000000989370 c0000000008c6228 00000000000041ed c0000000fe00a400
  GPR28: c00000000017c1cc 00000000000000d0 7375627379737465 c0000000fe001d80
  NIP [c000000000100778] .__kmalloc_track_caller+0x70/0x168
  LR [c00000000010073c] .__kmalloc_track_caller+0x34/0x168
  Call Trace:
  [c0000000fe08ac80] [c00000000087e6b8] uevent_sock_list+0x0/0x10 (unreliable)
  [c0000000fe08ad20] [c0000000000c99b0] .kstrdup+0x44/0x90
  [c0000000fe08adc0] [c00000000017c1cc] .__kernfs_new_node+0x4c/0x130
  [c0000000fe08ae70] [c00000000017d7e4] .kernfs_new_node+0x2c/0x64
  [c0000000fe08aef0] [c00000000017db00] .kernfs_create_dir_ns+0x34/0xc8
  [c0000000fe08af80] [c00000000018067c] .sysfs_create_dir_ns+0x58/0xcc
  [c0000000fe08b010] [c0000000002c711c] .kobject_add_internal+0xc8/0x384
  [c0000000fe08b0b0] [c0000000002c7644] .kobject_add+0x64/0xc8
  [c0000000fe08b140] [c000000000355ebc] .device_add+0x11c/0x654
  [c0000000fe08b200] [c0000000002b5988] .add_disk+0x20c/0x4b4
  [c0000000fe08b2c0] [c0000000003a21d4] .add_mtd_blktrans_dev+0x340/0x514
  [c0000000fe08b350] [c0000000003a3410] .mtdblock_add_mtd+0x74/0xb4
  [c0000000fe08b3e0] [c0000000003a32cc] .blktrans_notify_add+0x64/0x94
  [c0000000fe08b470] [c00000000039b5b4] .add_mtd_device+0x1d4/0x368
  [c0000000fe08b520] [c00000000039b830] .mtd_device_parse_register+0xe8/0x104
  [c0000000fe08b5c0] [c0000000003b8408] .of_flash_probe+0x72c/0x734
  [c0000000fe08b750] [c00000000035ba40] .platform_drv_probe+0x38/0x84
  [c0000000fe08b7d0] [c0000000003599a4] .really_probe+0xa4/0x29c
  [c0000000fe08b870] [c000000000359d3c] .__driver_attach+0x100/0x104
  [c0000000fe08b900] [c00000000035746c] .bus_for_each_dev+0x84/0xe4
  [c0000000fe08b9a0] [c0000000003593c0] .driver_attach+0x24/0x38
  [c0000000fe08ba10] [c000000000358f24] .bus_add_driver+0x1c8/0x2ac
  [c0000000fe08bab0] [c00000000035a3a4] .driver_register+0x8c/0x158
  [c0000000fe08bb30] [c00000000035b9f4] .__platform_driver_register+0x6c/0x80
  [c0000000fe08bba0] [c00000000084e080] .of_flash_driver_init+0x1c/0x30
  [c0000000fe08bc10] [c000000000001864] .do_one_initcall+0xbc/0x238
  [c0000000fe08bd00] [c00000000082cdc0] .kernel_init_freeable+0x188/0x268
  [c0000000fe08bdb0] [c0000000000020a0] .kernel_init+0x1c/0xf7c
  [c0000000fe08be30] [c000000000000884] .ret_from_kernel_thread+0x58/0xd4
  Instruction dump:
  41bd0010 480000c8 4bf04eb5 60000000 e94d0028 e93f0000 7cc95214 e8a60008
  7fc9502a 2fbe0000 419e00c8 e93f0022 <7f7e482a> 39200000 88ed06b2 992d06b2
  ---[ end trace b4c9a94804a42d40 ]---

It seems that the corrupted partition header on my mtd device triggers
a bug in the ftl. In function build_maps() it will allocate the buffers
needed by the mtd partition, but if something goes wrong such as kmalloc
failure, mtd read error or invalid partition header parameter, it will
free all allocated buffers and then return non-zero. In my case, it
seems that partition header parameter 'NumTransferUnits' is invalid.

And the ftl_freepart() is a function which free all the partition
buffers allocated by build_maps(). Given the build_maps() is a self
cleaning function, so there is no need to invoke this function even
if build_maps() return with error. Otherwise it will causes the
buffers to be freed twice and then weird things would happen.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ftl.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index 19d637266fcd..71e4f6ccae2f 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -1075,7 +1075,6 @@ static void ftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 			return;
 	}
 
-	ftl_freepart(partition);
 	kfree(partition);
 }
 

From 6562c0cc805b391489e2f511983300e802864aea Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Mon, 25 Aug 2014 16:15:33 -0700
Subject: [PATCH 0376/1185] mtd: nand: omap: Fix 1-bit Hamming code scheme,
 omap_calculate_ecc()

commit 40ddbf5069bd4e11447c0088fc75318e0aac53f0 upstream.

commit 65b97cf6b8de introduced in v3.7 caused a regression
by using a reversed CS_MASK thus causing omap_calculate_ecc to
always fail. As the NAND base driver never checks for .calculate()'s
return value, the zeroed ECC values are used as is without showing
any error to the user. However, this won't work and the NAND device
won't be guarded by any error code.

Fix the issue by using the correct mask.

Code was tested on omap3beagle using the following procedure
- flash the primary bootloader (MLO) from the kernel to the first
NAND partition using nandwrite.
- boot the board from NAND. This utilizes OMAP ROM loader that
relies on 1-bit Hamming code ECC.

Fixes: 65b97cf6b8de (mtd: nand: omap2: handle nand on gpmc)

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/nand/omap2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 8c4eb287bbdb..e9b1797cdb5f 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -948,7 +948,7 @@ static int omap_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
 	u32 val;
 
 	val = readl(info->reg.gpmc_ecc_config);
-	if (((val >> ECC_CONFIG_CS_SHIFT)  & ~CS_MASK) != info->gpmc_cs)
+	if (((val >> ECC_CONFIG_CS_SHIFT) & CS_MASK) != info->gpmc_cs)
 		return -EINVAL;
 
 	/* read ecc result */

From f5b48b7a3d0d1ab761ba939b6cbb4a07d37a750b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 5 Jul 2014 18:43:21 -0400
Subject: [PATCH 0377/1185] blkcg: don't call into policy draining if root_blkg
 is already gone

commit 2a1b4cf2331d92bc009bf94fa02a24604cdaf24c upstream.

While a queue is being destroyed, all the blkgs are destroyed and its
->root_blkg pointer is set to NULL.  If someone else starts to drain
while the queue is in this state, the following oops happens.

  NULL pointer dereference at 0000000000000028
  IP: [<ffffffff8144e944>] blk_throtl_drain+0x84/0x230
  PGD e4a1067 PUD b773067 PMD 0
  Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
  Modules linked in: cfq_iosched(-) [last unloaded: cfq_iosched]
  CPU: 1 PID: 537 Comm: bash Not tainted 3.16.0-rc3-work+ #2
  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  task: ffff88000e222250 ti: ffff88000efd4000 task.ti: ffff88000efd4000
  RIP: 0010:[<ffffffff8144e944>]  [<ffffffff8144e944>] blk_throtl_drain+0x84/0x230
  RSP: 0018:ffff88000efd7bf0  EFLAGS: 00010046
  RAX: 0000000000000000 RBX: ffff880015091450 RCX: 0000000000000001
  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
  RBP: ffff88000efd7c10 R08: 0000000000000000 R09: 0000000000000001
  R10: ffff88000e222250 R11: 0000000000000000 R12: ffff880015091450
  R13: ffff880015092e00 R14: ffff880015091d70 R15: ffff88001508fc28
  FS:  00007f1332650740(0000) GS:ffff88001fa80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
  CR2: 0000000000000028 CR3: 0000000009446000 CR4: 00000000000006e0
  Stack:
   ffffffff8144e8f6 ffff880015091450 0000000000000000 ffff880015091d80
   ffff88000efd7c28 ffffffff8144ae2f ffff880015091450 ffff88000efd7c58
   ffffffff81427641 ffff880015091450 ffffffff82401f00 ffff880015091450
  Call Trace:
   [<ffffffff8144ae2f>] blkcg_drain_queue+0x1f/0x60
   [<ffffffff81427641>] __blk_drain_queue+0x71/0x180
   [<ffffffff81429b3e>] blk_queue_bypass_start+0x6e/0xb0
   [<ffffffff814498b8>] blkcg_deactivate_policy+0x38/0x120
   [<ffffffff8144ec44>] blk_throtl_exit+0x34/0x50
   [<ffffffff8144aea5>] blkcg_exit_queue+0x35/0x40
   [<ffffffff8142d476>] blk_release_queue+0x26/0xd0
   [<ffffffff81454968>] kobject_cleanup+0x38/0x70
   [<ffffffff81454848>] kobject_put+0x28/0x60
   [<ffffffff81427505>] blk_put_queue+0x15/0x20
   [<ffffffff817d07bb>] scsi_device_dev_release_usercontext+0x16b/0x1c0
   [<ffffffff810bc339>] execute_in_process_context+0x89/0xa0
   [<ffffffff817d064c>] scsi_device_dev_release+0x1c/0x20
   [<ffffffff817930e2>] device_release+0x32/0xa0
   [<ffffffff81454968>] kobject_cleanup+0x38/0x70
   [<ffffffff81454848>] kobject_put+0x28/0x60
   [<ffffffff817934d7>] put_device+0x17/0x20
   [<ffffffff817d11b9>] __scsi_remove_device+0xa9/0xe0
   [<ffffffff817d121b>] scsi_remove_device+0x2b/0x40
   [<ffffffff817d1257>] sdev_store_delete+0x27/0x30
   [<ffffffff81792ca8>] dev_attr_store+0x18/0x30
   [<ffffffff8126f75e>] sysfs_kf_write+0x3e/0x50
   [<ffffffff8126ea87>] kernfs_fop_write+0xe7/0x170
   [<ffffffff811f5e9f>] vfs_write+0xaf/0x1d0
   [<ffffffff811f69bd>] SyS_write+0x4d/0xc0
   [<ffffffff81d24692>] system_call_fastpath+0x16/0x1b

776687bce42b ("block, blk-mq: draining can't be skipped even if
bypass_depth was non-zero") made it easier to trigger this bug by
making blk_queue_bypass_start() drain even when it loses the first
bypass test to blk_cleanup_queue(); however, the bug has always been
there even before the commit as blk_queue_bypass_start() could race
against queue destruction, win the initial bypass test but perform the
actual draining after blk_cleanup_queue() already destroyed all blkgs.

Fix it by skippping calling into policy draining if all the blkgs are
already gone.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Shirish Pargaonkar <spargaonkar@suse.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Reported-by: Jet Chen <jet.chen@intel.com>
Tested-by: Shirish Pargaonkar <spargaonkar@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/blk-cgroup.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b95219d2168d..1ff8e97f853a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -876,6 +876,13 @@ void blkcg_drain_queue(struct request_queue *q)
 {
 	lockdep_assert_held(q->queue_lock);
 
+	/*
+	 * @q could be exiting and already have destroyed all blkgs as
+	 * indicated by NULL root_blkg.  If so, don't confuse policies.
+	 */
+	if (!q->root_blkg)
+		return;
+
 	/*
 	 * @q could be exiting and already have destroyed all blkgs as
 	 * indicated by NULL root_blkg.  If so, don't confuse policies.

From 70efec16cf060603b54ea71c9cb4499f052efd69 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 9 Jul 2014 15:57:26 +0200
Subject: [PATCH 0378/1185] IB/srp: Fix deadlock between host removal and
 multipathd

commit bcc05910359183b431da92713e98eed478edf83a upstream.

If scsi_remove_host() is invoked after a SCSI device has been blocked,
if the fast_io_fail_tmo or dev_loss_tmo work gets scheduled on the
workqueue executing srp_remove_work() and if an I/O request is
scheduled after the SCSI device had been blocked by e.g. multipathd
then the following deadlock can occur:

    kworker/6:1     D ffff880831f3c460     0   195      2 0x00000000
    Call Trace:
     [<ffffffff814aafd9>] schedule+0x29/0x70
     [<ffffffff814aa0ef>] schedule_timeout+0x10f/0x2a0
     [<ffffffff8105af6f>] msleep+0x2f/0x40
     [<ffffffff8123b0ae>] __blk_drain_queue+0x4e/0x180
     [<ffffffff8123d2d5>] blk_cleanup_queue+0x225/0x230
     [<ffffffffa0010732>] __scsi_remove_device+0x62/0xe0 [scsi_mod]
     [<ffffffffa000ed2f>] scsi_forget_host+0x6f/0x80 [scsi_mod]
     [<ffffffffa0002eba>] scsi_remove_host+0x7a/0x130 [scsi_mod]
     [<ffffffffa07cf5c5>] srp_remove_work+0x95/0x180 [ib_srp]
     [<ffffffff8106d7aa>] process_one_work+0x1ea/0x6c0
     [<ffffffff8106dd9b>] worker_thread+0x11b/0x3a0
     [<ffffffff810758bd>] kthread+0xed/0x110
     [<ffffffff814b972c>] ret_from_fork+0x7c/0xb0
    multipathd      D ffff880096acc460     0  5340      1 0x00000000
    Call Trace:
     [<ffffffff814aafd9>] schedule+0x29/0x70
     [<ffffffff814aa0ef>] schedule_timeout+0x10f/0x2a0
     [<ffffffff814ab79b>] io_schedule_timeout+0x9b/0xf0
     [<ffffffff814abe1c>] wait_for_completion_io_timeout+0xdc/0x110
     [<ffffffff81244b9b>] blk_execute_rq+0x9b/0x100
     [<ffffffff8124f665>] sg_io+0x1a5/0x450
     [<ffffffff8124fd21>] scsi_cmd_ioctl+0x2a1/0x430
     [<ffffffff8124fef2>] scsi_cmd_blk_ioctl+0x42/0x50
     [<ffffffffa00ec97e>] sd_ioctl+0xbe/0x140 [sd_mod]
     [<ffffffff8124bd04>] blkdev_ioctl+0x234/0x840
     [<ffffffff811cb491>] block_ioctl+0x41/0x50
     [<ffffffff811a0df0>] do_vfs_ioctl+0x300/0x520
     [<ffffffff811a1051>] SyS_ioctl+0x41/0x80
     [<ffffffff814b9962>] tracesys+0xd0/0xd5

Fix this by scheduling removal work on another workqueue than the
transport layer timers.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: David Dillow <dave@thedillows.org>
Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/srp/ib_srp.c | 38 +++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 1954daac0b59..35dd5ff662f1 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -93,6 +93,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
 
 static struct scsi_transport_template *ib_srp_transport_template;
+static struct workqueue_struct *srp_remove_wq;
 
 static struct ib_client srp_client = {
 	.name   = "srp",
@@ -456,7 +457,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target)
 	spin_unlock_irq(&target->lock);
 
 	if (changed)
-		queue_work(system_long_wq, &target->remove_work);
+		queue_work(srp_remove_wq, &target->remove_work);
 
 	return changed;
 }
@@ -2530,9 +2531,10 @@ static void srp_remove_one(struct ib_device *device)
 		spin_unlock(&host->target_lock);
 
 		/*
-		 * Wait for target port removal tasks.
+		 * Wait for tl_err and target port removal tasks.
 		 */
 		flush_workqueue(system_long_wq);
+		flush_workqueue(srp_remove_wq);
 
 		kfree(host);
 	}
@@ -2577,16 +2579,22 @@ static int __init srp_init_module(void)
 		indirect_sg_entries = cmd_sg_entries;
 	}
 
+	srp_remove_wq = create_workqueue("srp_remove");
+	if (IS_ERR(srp_remove_wq)) {
+		ret = PTR_ERR(srp_remove_wq);
+		goto out;
+	}
+
+	ret = -ENOMEM;
 	ib_srp_transport_template =
 		srp_attach_transport(&ib_srp_transport_functions);
 	if (!ib_srp_transport_template)
-		return -ENOMEM;
+		goto destroy_wq;
 
 	ret = class_register(&srp_class);
 	if (ret) {
 		pr_err("couldn't register class infiniband_srp\n");
-		srp_release_transport(ib_srp_transport_template);
-		return ret;
+		goto release_tr;
 	}
 
 	ib_sa_register_client(&srp_sa_client);
@@ -2594,13 +2602,22 @@ static int __init srp_init_module(void)
 	ret = ib_register_client(&srp_client);
 	if (ret) {
 		pr_err("couldn't register IB client\n");
-		srp_release_transport(ib_srp_transport_template);
-		ib_sa_unregister_client(&srp_sa_client);
-		class_unregister(&srp_class);
-		return ret;
+		goto unreg_sa;
 	}
 
-	return 0;
+out:
+	return ret;
+
+unreg_sa:
+	ib_sa_unregister_client(&srp_sa_client);
+	class_unregister(&srp_class);
+
+release_tr:
+	srp_release_transport(ib_srp_transport_template);
+
+destroy_wq:
+	destroy_workqueue(srp_remove_wq);
+	goto out;
 }
 
 static void __exit srp_cleanup_module(void)
@@ -2609,6 +2626,7 @@ static void __exit srp_cleanup_module(void)
 	ib_sa_unregister_client(&srp_sa_client);
 	class_unregister(&srp_class);
 	srp_release_transport(ib_srp_transport_template);
+	destroy_workqueue(srp_remove_wq);
 }
 
 module_init(srp_init_module);

From a6c56468b3f3274896ee8da73608dc48ad4103e0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Oct 2013 16:41:01 -0400
Subject: [PATCH 0379/1185] dcache.c: get rid of pointless macros

commit 482db9066199813d6b999b65a3171afdbec040b6 upstream.

D_HASH{MASK,BITS} are used once each, both in the same function (d_hash()).
At this point they are actively misguiding - they imply that values are
compiler constants, which is no longer true.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/dcache.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 9a59653d3449..f867c53a7989 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -96,8 +96,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
  * This hash-function tries to avoid losing too many bits of hash
  * information, yet avoid using a prime hash-size or similar.
  */
-#define D_HASHBITS     d_hash_shift
-#define D_HASHMASK     d_hash_mask
 
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
@@ -108,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 					unsigned int hash)
 {
 	hash += (unsigned long) parent / L1_CACHE_BYTES;
-	hash = hash + (hash >> D_HASHBITS);
-	return dentry_hashtable + (hash & D_HASHMASK);
+	hash = hash + (hash >> d_hash_shift);
+	return dentry_hashtable + (hash & d_hash_mask);
 }
 
 /* Statistics gathering. */

From d4c96061fddd129778ce8b70fb093aa532f422d0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 13 Sep 2014 11:30:10 -0700
Subject: [PATCH 0380/1185] vfs: fix bad hashing of dentries

commit 99d263d4c5b2f541dfacb5391e22e8c91ea982a6 upstream.

Josef Bacik found a performance regression between 3.2 and 3.10 and
narrowed it down to commit bfcfaa77bdf0 ("vfs: use 'unsigned long'
accesses for dcache name comparison and hashing"). He reports:

 "The test case is essentially

      for (i = 0; i < 1000000; i++)
              mkdir("a$i");

  On xfs on a fio card this goes at about 20k dir/sec with 3.2, and 12k
  dir/sec with 3.10.  This is because we spend waaaaay more time in
  __d_lookup on 3.10 than in 3.2.

  The new hashing function for strings is suboptimal for <
  sizeof(unsigned long) string names (and hell even > sizeof(unsigned
  long) string names that I've tested).  I broke out the old hashing
  function and the new one into a userspace helper to get real numbers
  and this is what I'm getting:

      Old hash table had 1000000 entries, 0 dupes, 0 max dupes
      New hash table had 12628 entries, 987372 dupes, 900 max dupes
      We had 11400 buckets with a p50 of 30 dupes, p90 of 240 dupes, p99 of 567 dupes for the new hash

  My test does the hash, and then does the d_hash into a integer pointer
  array the same size as the dentry hash table on my system, and then
  just increments the value at the address we got to see how many
  entries we overlap with.

  As you can see the old hash function ended up with all 1 million
  entries in their own bucket, whereas the new one they are only
  distributed among ~12.5k buckets, which is why we're using so much
  more CPU in __d_lookup".

The reason for this hash regression is two-fold:

 - On 64-bit architectures the down-mixing of the original 64-bit
   word-at-a-time hash into the final 32-bit hash value is very
   simplistic and suboptimal, and just adds the two 32-bit parts
   together.

   In particular, because there is no bit shuffling and the mixing
   boundary is also a byte boundary, similar character patterns in the
   low and high word easily end up just canceling each other out.

 - the old byte-at-a-time hash mixed each byte into the final hash as it
   hashed the path component name, resulting in the low bits of the hash
   generally being a good source of hash data.  That is not true for the
   word-at-a-time case, and the hash data is distributed among all the
   bits.

The fix is the same in both cases: do a better job of mixing the bits up
and using as much of the hash data as possible.  We already have the
"hash_32|64()" functions to do that.

Reported-by: Josef Bacik <jbacik@fb.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Chris Mason <clm@fb.com>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/dcache.c | 3 +--
 fs/namei.c  | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index f867c53a7989..25c0a1b5f6c0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -106,8 +106,7 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 					unsigned int hash)
 {
 	hash += (unsigned long) parent / L1_CACHE_BYTES;
-	hash = hash + (hash >> d_hash_shift);
-	return dentry_hashtable + (hash & d_hash_mask);
+	return dentry_hashtable + hash_32(hash, d_hash_shift);
 }
 
 /* Statistics gathering. */
diff --git a/fs/namei.c b/fs/namei.c
index 6ac16a37ded2..f7c4393f8535 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -34,6 +34,7 @@
 #include <linux/device_cgroup.h>
 #include <linux/fs_struct.h>
 #include <linux/posix_acl.h>
+#include <linux/hash.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -1647,8 +1648,7 @@ static inline int can_lookup(struct inode *inode)
 
 static inline unsigned int fold_hash(unsigned long hash)
 {
-	hash += hash >> (8*sizeof(int));
-	return hash;
+	return hash_64(hash, 32);
 }
 
 #else	/* 32-bit case */

From d64269e30131fbd8a2228323266c9e84ee6ab80d Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Wed, 21 May 2014 18:26:44 -0600
Subject: [PATCH 0381/1185] tpm: Provide a generic means to override the chip
 returned timeouts

commit 8e54caf407b98efa05409e1fee0e5381abd2b088 upstream.

Some Atmel TPMs provide completely wrong timeouts from their
TPM_CAP_PROP_TIS_TIMEOUT query. This patch detects that and returns
new correct values via a DID/VID table in the TIS driver.

Tested on ARM using an AT97SC3204T FW version 37.16

[PHuewe: without this fix these 'broken' Atmel TPMs won't function on
older kernels]
Signed-off-by: "Berg, Christopher" <Christopher.Berg@atmel.com>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
[bwh: Backported to 3.10:
 - Adjust filename, context
 - s/chip->ops->/chip->vendor./]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm.c     | 62 +++++++++++++++++++++++++-------------
 drivers/char/tpm/tpm.h     |  3 ++
 drivers/char/tpm/tpm_tis.c | 31 +++++++++++++++++++
 3 files changed, 75 insertions(+), 21 deletions(-)

diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 01d6968a9e47..f659a571ad23 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -533,11 +533,10 @@ static int tpm_startup(struct tpm_chip *chip, __be16 startup_type)
 int tpm_get_timeouts(struct tpm_chip *chip)
 {
 	struct tpm_cmd_t tpm_cmd;
-	struct timeout_t *timeout_cap;
+	unsigned long new_timeout[4];
+	unsigned long old_timeout[4];
 	struct duration_t *duration_cap;
 	ssize_t rc;
-	u32 timeout;
-	unsigned int scale = 1;
 
 	tpm_cmd.header.in = tpm_getcap_header;
 	tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
@@ -571,25 +570,46 @@ int tpm_get_timeouts(struct tpm_chip *chip)
 	    != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32))
 		return -EINVAL;
 
-	timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout;
-	/* Don't overwrite default if value is 0 */
-	timeout = be32_to_cpu(timeout_cap->a);
-	if (timeout && timeout < 1000) {
-		/* timeouts in msec rather usec */
-		scale = 1000;
-		chip->vendor.timeout_adjusted = true;
+	old_timeout[0] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.a);
+	old_timeout[1] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.b);
+	old_timeout[2] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.c);
+	old_timeout[3] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.d);
+	memcpy(new_timeout, old_timeout, sizeof(new_timeout));
+
+	/*
+	 * Provide ability for vendor overrides of timeout values in case
+	 * of misreporting.
+	 */
+	if (chip->vendor.update_timeouts != NULL)
+		chip->vendor.timeout_adjusted =
+			chip->vendor.update_timeouts(chip, new_timeout);
+
+	if (!chip->vendor.timeout_adjusted) {
+		/* Don't overwrite default if value is 0 */
+		if (new_timeout[0] != 0 && new_timeout[0] < 1000) {
+			int i;
+
+			/* timeouts in msec rather usec */
+			for (i = 0; i != ARRAY_SIZE(new_timeout); i++)
+				new_timeout[i] *= 1000;
+			chip->vendor.timeout_adjusted = true;
+		}
 	}
-	if (timeout)
-		chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale);
-	timeout = be32_to_cpu(timeout_cap->b);
-	if (timeout)
-		chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale);
-	timeout = be32_to_cpu(timeout_cap->c);
-	if (timeout)
-		chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale);
-	timeout = be32_to_cpu(timeout_cap->d);
-	if (timeout)
-		chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale);
+
+	/* Report adjusted timeouts */
+	if (chip->vendor.timeout_adjusted) {
+		dev_info(chip->dev,
+			 HW_ERR "Adjusting reported timeouts: A %lu->%luus B %lu->%luus C %lu->%luus D %lu->%luus\n",
+			 old_timeout[0], new_timeout[0],
+			 old_timeout[1], new_timeout[1],
+			 old_timeout[2], new_timeout[2],
+			 old_timeout[3], new_timeout[3]);
+	}
+
+	chip->vendor.timeout_a = usecs_to_jiffies(new_timeout[0]);
+	chip->vendor.timeout_b = usecs_to_jiffies(new_timeout[1]);
+	chip->vendor.timeout_c = usecs_to_jiffies(new_timeout[2]);
+	chip->vendor.timeout_d = usecs_to_jiffies(new_timeout[3]);
 
 duration:
 	tpm_cmd.header.in = tpm_getcap_header;
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 0770d1d79366..deffda7678a0 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -95,6 +95,9 @@ struct tpm_vendor_specific {
 	int (*send) (struct tpm_chip *, u8 *, size_t);
 	void (*cancel) (struct tpm_chip *);
 	u8 (*status) (struct tpm_chip *);
+	bool (*update_timeouts)(struct tpm_chip *chip,
+				unsigned long *timeout_cap);
+
 	void (*release) (struct device *);
 	struct miscdevice miscdev;
 	struct attribute_group *attr_group;
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 8a41b6be23a0..72f21377fa02 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -373,6 +373,36 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
 	return rc;
 }
 
+struct tis_vendor_timeout_override {
+	u32 did_vid;
+	unsigned long timeout_us[4];
+};
+
+static const struct tis_vendor_timeout_override vendor_timeout_overrides[] = {
+	/* Atmel 3204 */
+	{ 0x32041114, { (TIS_SHORT_TIMEOUT*1000), (TIS_LONG_TIMEOUT*1000),
+			(TIS_SHORT_TIMEOUT*1000), (TIS_SHORT_TIMEOUT*1000) } },
+};
+
+static bool tpm_tis_update_timeouts(struct tpm_chip *chip,
+				    unsigned long *timeout_cap)
+{
+	int i;
+	u32 did_vid;
+
+	did_vid = ioread32(chip->vendor.iobase + TPM_DID_VID(0));
+
+	for (i = 0; i != ARRAY_SIZE(vendor_timeout_overrides); i++) {
+		if (vendor_timeout_overrides[i].did_vid != did_vid)
+			continue;
+		memcpy(timeout_cap, vendor_timeout_overrides[i].timeout_us,
+		       sizeof(vendor_timeout_overrides[i].timeout_us));
+		return true;
+	}
+
+	return false;
+}
+
 /*
  * Early probing for iTPM with STS_DATA_EXPECT flaw.
  * Try sending command without itpm flag set and if that
@@ -475,6 +505,7 @@ static struct tpm_vendor_specific tpm_tis = {
 	.recv = tpm_tis_recv,
 	.send = tpm_tis_send,
 	.cancel = tpm_tis_ready,
+	.update_timeouts = tpm_tis_update_timeouts,
 	.req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
 	.req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
 	.req_canceled = tpm_tis_req_canceled,

From 842a5780d61743550cf319f1bb4aee6778088b1c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Thu, 9 Jan 2014 20:08:21 +0200
Subject: [PATCH 0382/1185] libceph: rename ceph_msg::front_max to
 front_alloc_len

commit 3cea4c3071d4e55e9d7356efe9d0ebf92f0c2204 upstream.

Rename front_max field of struct ceph_msg to front_alloc_len to make
its purpose more clear.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ceph/messenger.h | 2 +-
 net/ceph/messenger.c           | 6 +++---
 net/ceph/mon_client.c          | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 7c1420bb1dce..6ade97de7a85 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -157,7 +157,7 @@ struct ceph_msg {
 	bool front_is_vmalloc;
 	bool more_to_follow;
 	bool needs_out_seq;
-	int front_max;
+	int front_alloc_len;
 	unsigned long ack_stamp;        /* tx: when we were acked */
 
 	struct ceph_msgpool *pool;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 6ff7d9dc240f..66e77f380fce 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3144,7 +3144,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 	INIT_LIST_HEAD(&m->data);
 
 	/* front */
-	m->front_max = front_len;
+	m->front_alloc_len = front_len;
 	if (front_len) {
 		if (front_len > PAGE_CACHE_SIZE) {
 			m->front.iov_base = __vmalloc(front_len, flags,
@@ -3319,8 +3319,8 @@ EXPORT_SYMBOL(ceph_msg_last_put);
 
 void ceph_msg_dump(struct ceph_msg *msg)
 {
-	pr_debug("msg_dump %p (front_max %d length %zd)\n", msg,
-		 msg->front_max, msg->data_length);
+	pr_debug("msg_dump %p (front_alloc_len %d length %zd)\n", msg,
+		 msg->front_alloc_len, msg->data_length);
 	print_hex_dump(KERN_DEBUG, "header: ",
 		       DUMP_PREFIX_OFFSET, 16, 1,
 		       &msg->hdr, sizeof(msg->hdr), true);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 1fe25cd29d0e..2ac9ef35110b 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -152,7 +152,7 @@ static int __open_session(struct ceph_mon_client *monc)
 		/* initiatiate authentication handshake */
 		ret = ceph_auth_build_hello(monc->auth,
 					    monc->m_auth->front.iov_base,
-					    monc->m_auth->front_max);
+					    monc->m_auth->front_alloc_len);
 		__send_prepared_auth_request(monc, ret);
 	} else {
 		dout("open_session mon%d already open\n", monc->cur_mon);
@@ -196,7 +196,7 @@ static void __send_subscribe(struct ceph_mon_client *monc)
 		int num;
 
 		p = msg->front.iov_base;
-		end = p + msg->front_max;
+		end = p + msg->front_alloc_len;
 
 		num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
 		ceph_encode_32(&p, num);
@@ -897,7 +897,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
 	ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
 				     msg->front.iov_len,
 				     monc->m_auth->front.iov_base,
-				     monc->m_auth->front_max);
+				     monc->m_auth->front_alloc_len);
 	if (ret < 0) {
 		monc->client->auth_err = ret;
 		wake_up_all(&monc->client->auth_wq);
@@ -939,7 +939,7 @@ static int __validate_auth(struct ceph_mon_client *monc)
 		return 0;
 
 	ret = ceph_build_auth(monc->auth, monc->m_auth->front.iov_base,
-			      monc->m_auth->front_max);
+			      monc->m_auth->front_alloc_len);
 	if (ret <= 0)
 		return ret; /* either an error, or no need to authenticate */
 	__send_prepared_auth_request(monc, ret);

From 12477ec830cb1bd188f23b80f6a0d976dd19090e Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@redhat.com>
Date: Mon, 4 Aug 2014 07:01:54 -0700
Subject: [PATCH 0383/1185] libceph: gracefully handle large reply messages
 from the mon

commit 73c3d4812b4c755efeca0140f606f83772a39ce4 upstream.

We preallocate a few of the message types we get back from the mon.  If we
get a larger message than we are expecting, fall back to trying to allocate
a new one instead of blindly using the one we have.

Signed-off-by: Sage Weil <sage@redhat.com>
Reviewed-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ceph/mon_client.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2ac9ef35110b..dbcbf5a4707f 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1041,7 +1041,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
 	if (!m) {
 		pr_info("alloc_msg unknown type %d\n", type);
 		*skip = 1;
+	} else if (front_len > m->front_alloc_len) {
+		pr_warning("mon_alloc_msg front %d > prealloc %d (%u#%llu)\n",
+			   front_len, m->front_alloc_len,
+			   (unsigned int)con->peer_name.type,
+			   le64_to_cpu(con->peer_name.num));
+		ceph_msg_put(m);
+		m = ceph_msg_new(type, front_len, GFP_NOFS, false);
 	}
+
 	return m;
 }
 

From 339f8f37f0203884332585e38c06536c8477d475 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 17 Sep 2014 09:04:18 -0700
Subject: [PATCH 0384/1185] Linux 3.10.55

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 9429aa5e89de..6141df04fcb5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 54
+SUBLEVEL = 55
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From eaa22ea13d0f07969465cb7c05d74271368d4c12 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 16 Dec 2013 21:04:35 +0000
Subject: [PATCH 0385/1185] arm64: drop redundant macros from read_cpuid()

asm/cputype.h contains a bunch of #defines for CPU id registers
that essentially map to themselves. Remove the #defines and pass
the tokens directly to the inline asm() that reads the registers.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cputype.h | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index cf2749488cd4..261db2729217 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -16,23 +16,13 @@
 #ifndef __ASM_CPUTYPE_H
 #define __ASM_CPUTYPE_H
 
-#define ID_MIDR_EL1		"midr_el1"
-#define ID_MPIDR_EL1		"mpidr_el1"
-#define ID_CTR_EL0		"ctr_el0"
-
-#define ID_AA64PFR0_EL1		"id_aa64pfr0_el1"
-#define ID_AA64DFR0_EL1		"id_aa64dfr0_el1"
-#define ID_AA64AFR0_EL1		"id_aa64afr0_el1"
-#define ID_AA64ISAR0_EL1	"id_aa64isar0_el1"
-#define ID_AA64MMFR0_EL1	"id_aa64mmfr0_el1"
-
 #define INVALID_HWID		ULONG_MAX
 
 #define MPIDR_HWID_BITMASK	0xff00ffffff
 
 #define read_cpuid(reg) ({						\
 	u64 __val;							\
-	asm("mrs	%0, " reg : "=r" (__val));			\
+	asm("mrs	%0, " #reg : "=r" (__val));			\
 	__val;								\
 })
 
@@ -51,12 +41,12 @@
  */
 static inline u32 __attribute_const__ read_cpuid_id(void)
 {
-	return read_cpuid(ID_MIDR_EL1);
+	return read_cpuid(MIDR_EL1);
 }
 
 static inline u64 __attribute_const__ read_cpuid_mpidr(void)
 {
-	return read_cpuid(ID_MPIDR_EL1);
+	return read_cpuid(MPIDR_EL1);
 }
 
 static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
@@ -71,7 +61,7 @@ static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
 
 static inline u32 __attribute_const__ read_cpuid_cachetype(void)
 {
-	return read_cpuid(ID_CTR_EL0);
+	return read_cpuid(CTR_EL0);
 }
 
 #endif /* __ASSEMBLY__ */

From 8fdfc481c880ccf3e7d499c5d7313d96c500531b Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 2 Sep 2013 16:33:54 +0100
Subject: [PATCH 0386/1185] arm64: Remove unused cpu_name ascii in
 arch/arm64/mm/proc.S

This string has been moved to arch/arm64/kernel/cputable.c.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/proc.S | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 3e5dcd9897db..25929f65b5f2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -95,10 +95,6 @@ ENTRY(cpu_do_switch_mm)
 	ret
 ENDPROC(cpu_do_switch_mm)
 
-cpu_name:
-	.ascii	"AArch64 Processor"
-	.align
-
 	.section ".text.init", #alloc, #execinstr
 
 /*

From 4149e0de6d206ef915e299d4736fc5044486adff Mon Sep 17 00:00:00 2001
From: JP Abgrall <jpa@google.com>
Date: Wed, 17 Sep 2014 15:01:45 -0700
Subject: [PATCH 0387/1185] seccomp: revert previous patches in prep for
 updated ones

This reverts the seccomp related patches committed around 2014-08-27.
This allows for a cleaner cherry-pick of newly landed upstream patches.

 f56b1aa arm: fixup NR_syscalls to accommodate the new seccomp syscall
 81ff7fa seccomp: implement SECCOMP_FILTER_FLAG_TSYNC
 d924727 seccomp: allow mode setting across threads
 743266a seccomp: introduce writer locking
 3497a88 seccomp: split filter prep from check and apply
 2c6d7de MIPS: add seccomp syscall
 83f1ccba ARM: add seccomp syscall
 a75a29b seccomp: add "seccomp" syscall
 1a63bce seccomp: split mode setting routines
 c208e4e seccomp: extract check/assign mode helpers
 6862b01 seccomp: create internal mode-setting function
 1ba2ccb MAINTAINERS: create seccomp entry
 c2da3eb seccomp: fix memory leak on filter attach
 945a225 ARM: 7888/1: seccomp: not compatible with ARM OABI

Change-Id: I3f129263d68a7b3c206d79f84f7f9908d13064f6
Signed-off-by: JP Abgrall <jpa@google.com>
---
 MAINTAINERS                         |  10 -
 arch/Kconfig                        |   1 -
 arch/arm/Kconfig                    |   7 +-
 arch/arm/include/asm/unistd.h       |   2 +-
 arch/arm/include/uapi/asm/unistd.h  |   6 -
 arch/arm/kernel/calls.S             |   4 -
 arch/mips/include/uapi/asm/unistd.h |  30 +-
 arch/mips/kernel/scall32-o32.S      |   6 -
 arch/mips/kernel/scall64-64.S       |   4 -
 arch/mips/kernel/scall64-n32.S      |   4 -
 arch/mips/kernel/scall64-o32.S      |   6 +-
 arch/x86/syscalls/syscall_32.tbl    |   5 -
 arch/x86/syscalls/syscall_64.tbl    |   5 -
 fs/exec.c                           |   2 +-
 include/linux/seccomp.h             |   8 +-
 include/linux/syscalls.h            |   2 -
 include/uapi/asm-generic/unistd.h   |  12 +-
 include/uapi/linux/seccomp.h        |   7 -
 kernel/fork.c                       |  49 +--
 kernel/seccomp.c                    | 454 +++++-----------------------
 kernel/sys_ni.c                     |   3 -
 21 files changed, 83 insertions(+), 544 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 614483608ba4..ad7e322ad17b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7131,16 +7131,6 @@ S:	Maintained
 F:	drivers/mmc/host/sdhci.*
 F:	drivers/mmc/host/sdhci-pltfm.[ch]
 
-SECURE COMPUTING
-M:	Kees Cook <keescook@chromium.org>
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git seccomp
-S:	Supported
-F:	kernel/seccomp.c
-F:	include/uapi/linux/seccomp.h
-F:	include/linux/seccomp.h
-K:	\bsecure_computing
-K:	\bTIF_SECCOMP\b
-
 SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
 M:	Anton Vorontsov <avorontsov@ru.mvista.com>
 L:	linuxppc-dev@lists.ozlabs.org
diff --git a/arch/Kconfig b/arch/Kconfig
index 84c94a89e75b..a4429bcd609e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -331,7 +331,6 @@ config HAVE_ARCH_SECCOMP_FILTER
 	  - secure_computing is called from a ptrace_event()-safe context
 	  - secure_computing return value is checked and a return value of -1
 	    results in the system call being skipped immediately.
-	  - seccomp syscall wired up
 
 config SECCOMP_FILTER
 	def_bool y
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0dde9b9e91c2..99887aaa04bb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -22,7 +22,7 @@ config ARM
 	select HAVE_AOUT
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
-	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
+	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_BPF_JIT
 	select HAVE_C_RECORDMCOUNT
@@ -1681,11 +1681,6 @@ config OABI_COMPAT
 	  in memory differs between the legacy ABI and the new ARM EABI
 	  (only for non "thumb" binaries). This option adds a tiny
 	  overhead to all syscalls and produces a slightly larger kernel.
-
-	  The seccomp filter system will not be available when this is
-	  selected, since there is no way yet to sensibly distinguish
-	  between calling conventions during filtering.
-
 	  If you know you'll be using only pure EABI user space then you
 	  can say N here. If this option is not selected and you attempt
 	  to execute a legacy ABI binary then the result will be
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index acabef1a75df..141baa3f9a72 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include <uapi/asm/unistd.h>
 
-#define __NR_syscalls  (384)
+#define __NR_syscalls  (380)
 #define __ARM_NR_cmpxchg		(__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index bbe80a7cba0c..af33b44990ed 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -406,12 +406,6 @@
 #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
 #define __NR_kcmp			(__NR_SYSCALL_BASE+378)
 #define __NR_finit_module		(__NR_SYSCALL_BASE+379)
-/* Backporting seccomp, skip a few ...
- * #define __NR_sched_setattr		(__NR_SYSCALL_BASE+380)
- * #define __NR_sched_getattr		(__NR_SYSCALL_BASE+381)
- * #define __NR_renameat2			(__NR_SYSCALL_BASE+382)
- */
-#define __NR_seccomp			(__NR_SYSCALL_BASE+383)
 
 /*
  * This may need to be greater than __NR_last_syscall+1 in order to
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 1a2e529a1340..c6ca7e376773 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -389,10 +389,6 @@
 		CALL(sys_process_vm_writev)
 		CALL(sys_kcmp)
 		CALL(sys_finit_module)
-/* 380 */	CALL(sys_ni_syscall) /* CALL(sys_sched_setattr) */
-		CALL(sys_ni_syscall) /* CALL(sys_sched_getattr) */
-		CALL(sys_ni_syscall) /* CALL(sys_renameat2) */
-		CALL(sys_seccomp)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index af4d5c0a2f02..1dee279f9665 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -369,22 +369,16 @@
 #define __NR_process_vm_writev		(__NR_Linux + 346)
 #define __NR_kcmp			(__NR_Linux + 347)
 #define __NR_finit_module		(__NR_Linux + 348)
-/* Backporting seccomp, skip a few ...
- * #define __NR_sched_setattr		(__NR_Linux + 349)
- * #define __NR_sched_getattr		(__NR_Linux + 350)
- * #define __NR_renameat2			(__NR_Linux + 351)
- */
-#define __NR_seccomp			(__NR_Linux + 352)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		352
+#define __NR_Linux_syscalls		348
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		352
+#define __NR_O32_Linux_syscalls		348
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -701,22 +695,16 @@
 #define __NR_kcmp			(__NR_Linux + 306)
 #define __NR_finit_module		(__NR_Linux + 307)
 #define __NR_getdents64			(__NR_Linux + 308)
-/* Backporting seccomp, skip a few ...
- * #define __NR_sched_setattr		(__NR_Linux + 309)
- * #define __NR_sched_getattr		(__NR_Linux + 310)
- * #define __NR_renameat2			(__NR_Linux + 311)
- */
-#define __NR_seccomp			(__NR_Linux + 312)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		312
+#define __NR_Linux_syscalls		308
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		312
+#define __NR_64_Linux_syscalls		308
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1037,21 +1025,15 @@
 #define __NR_process_vm_writev		(__NR_Linux + 310)
 #define __NR_kcmp			(__NR_Linux + 311)
 #define __NR_finit_module		(__NR_Linux + 312)
-/* Backporting seccomp, skip a few ...
- * #define __NR_sched_setattr		(__NR_Linux + 313)
- * #define __NR_sched_getattr		(__NR_Linux + 314)
- * #define __NR_renameat2			(__NR_Linux + 315)
- */
-#define __NR_seccomp			(__NR_Linux + 316)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		316
+#define __NR_Linux_syscalls		312
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		316
+#define __NR_N32_Linux_syscalls		312
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index bcb2184e8a47..9b36424b03c5 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -593,12 +593,6 @@ einval: li	v0, -ENOSYS
 	sys	sys_process_vm_writev	6
 	sys	sys_kcmp		5
 	sys	sys_finit_module	3
-	/* Backporting seccomp, skip a few ... */
-	sys sys_ni_syscall		0	/* sys_sched_setattr */
-	sys sys_ni_syscall		0	/* sys_sched_getattr */		/* 4350 */
-	sys sys_ni_syscall		0	/* sys_renameat2 */
-	sys	sys_seccomp 3
-
 	.endm
 
 	/* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 285872f9d6d1..97a5909a61cf 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -424,8 +424,4 @@ sys_call_table:
 	PTR	sys_kcmp
 	PTR	sys_finit_module
 	PTR	sys_getdents64
-	sys sys_ni_syscall	/* sys_sched_setattr */
-	sys sys_ni_syscall	/* sys_sched_getattr */		/* 5310 */
-	sys sys_ni_syscall	/* sys_renameat2 */
-	sys	sys_seccomp
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index bdee1a1ed1c2..edcb6594e7b5 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -417,8 +417,4 @@ EXPORT(sysn32_call_table)
 	PTR	compat_sys_process_vm_writev	/* 6310 */
 	PTR	sys_kcmp
 	PTR	sys_finit_module
-	sys sys_ni_syscall	/* sys_sched_setattr */
-	sys sys_ni_syscall	/* sys_sched_getattr */
-	sys sys_ni_syscall	/* sys_renameat2 */			/* 6315 */
-	sys	sys_seccomp
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index a1f826a24578..74f485d3c0ef 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -541,8 +541,4 @@ sys_call_table:
 	PTR	compat_sys_process_vm_writev
 	PTR	sys_kcmp
 	PTR	sys_finit_module
-	sys sys_ni_syscall	/* sys_sched_setattr */
-	sys sys_ni_syscall	/* sys_sched_getattr */		/* 4350 */
-	sys sys_ni_syscall	/* sys_renameat2 */
-	sys	sys_seccomp
-	.size	sys32_call_table,.-sys32_call_table
+	.size	sys_call_table,.-sys_call_table
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index de6d048d0305..aabfb8380a1c 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -357,8 +357,3 @@
 348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
 350	i386	finit_module		sys_finit_module
-# Backporting seccomp, skip a few ...
-# 351	i386	sched_setattr		sys_sched_setattr
-# 352	i386	sched_getattr		sys_sched_getattr
-# 353	i386	renameat2		sys_renameat2
-354	i386	seccomp			sys_seccomp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index a40bd6eda554..38ae65dfd14f 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -320,11 +320,6 @@
 311	64	process_vm_writev	sys_process_vm_writev
 312	common	kcmp			sys_kcmp
 313	common	finit_module		sys_finit_module
-# Backporting seccomp, skip a few ...
-# 314	common	sched_setattr		sys_sched_setattr
-# 315	common	sched_getattr		sys_sched_getattr
-# 316	common	renameat2		sys_renameat2
-317	common	seccomp			sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/exec.c b/fs/exec.c
index b331086ace95..ffd7a813ad3d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds);
 /*
  * determine how safe it is to execute the proposed program
  * - the caller must hold ->cred_guard_mutex to protect against
- *   PTRACE_ATTACH or seccomp thread-sync
+ *   PTRACE_ATTACH
  */
 static int check_unsafe_exec(struct linux_binprm *bprm)
 {
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 9687691799ff..6f19cfd1840e 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,8 +3,6 @@
 
 #include <uapi/linux/seccomp.h>
 
-#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC)
-
 #ifdef CONFIG_SECCOMP
 
 #include <linux/thread_info.h>
@@ -16,11 +14,11 @@ struct seccomp_filter;
  *
  * @mode:  indicates one of the valid values above for controlled
  *         system calls available to a process.
- * @filter: must always point to a valid seccomp-filter or NULL as it is
- *          accessed without locking during system call entry.
+ * @filter: The metadata and ruleset for determining what system calls
+ *          are allowed for a task.
  *
  *          @filter must only be accessed from the context of current as there
- *          is no read locking.
+ *          is no locking.
  */
 struct seccomp {
 	int mode;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2a955dcc863c..4147d700a293 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -841,6 +841,4 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
-asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
-			    const char __user *uargs);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index b422ad5d238b..0cc74c4403e4 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -692,19 +692,9 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 273
 __SYSCALL(__NR_finit_module, sys_finit_module)
-/* Backporting seccomp, skip a few ...
- * #define __NR_sched_setattr 274
-__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
- * #define __NR_sched_getattr 275
-__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
- * #define __NR_renameat2 276
-__SYSCALL(__NR_renameat2, sys_renameat2)
- */
-#define __NR_seccomp 277
-__SYSCALL(__NR_seccomp, sys_seccomp)
 
 #undef __NR_syscalls
-#define __NR_syscalls 278
+#define __NR_syscalls 274
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0f238a43ff1e..ac2dc9f72973 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -10,13 +10,6 @@
 #define SECCOMP_MODE_STRICT	1 /* uses hard-coded filter. */
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
-/* Valid operations for seccomp syscall. */
-#define SECCOMP_SET_MODE_STRICT	0
-#define SECCOMP_SET_MODE_FILTER	1
-
-/* Valid flags for SECCOMP_SET_MODE_FILTER */
-#define SECCOMP_FILTER_FLAG_TSYNC	1
-
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/fork.c b/kernel/fork.c
index 8a3e9a91130c..41671a5d637d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -327,15 +327,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 		goto free_ti;
 
 	tsk->stack = ti;
-#ifdef CONFIG_SECCOMP
-	/*
-	 * We must handle setting up seccomp filters once we're under
-	 * the sighand lock in case orig has changed between now and
-	 * then. Until then, filter must be NULL to avoid messing up
-	 * the usage counts on the error path calling free_task.
-	 */
-	tsk->seccomp.filter = NULL;
-#endif
 
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
@@ -1111,39 +1102,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 	p->flags = new_flags;
 }
 
-static void copy_seccomp(struct task_struct *p)
-{
-#ifdef CONFIG_SECCOMP
-	/*
-	 * Must be called with sighand->lock held, which is common to
-	 * all threads in the group. Holding cred_guard_mutex is not
-	 * needed because this new task is not yet running and cannot
-	 * be racing exec.
-	 */
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
-
-	/* Ref-count the new filter user, and assign it. */
-	get_seccomp_filter(current);
-	p->seccomp = current->seccomp;
-
-	/*
-	 * Explicitly enable no_new_privs here in case it got set
-	 * between the task_struct being duplicated and holding the
-	 * sighand lock. The seccomp state and nnp must be in sync.
-	 */
-	if (task_no_new_privs(current))
-		task_set_no_new_privs(p);
-
-	/*
-	 * If the parent gained a seccomp mode after copying thread
-	 * flags and between before we held the sighand lock, we have
-	 * to manually enable the seccomp thread flag here.
-	 */
-	if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
-		set_tsk_thread_flag(p, TIF_SECCOMP);
-#endif
-}
-
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
 	current->clear_child_tid = tidptr;
@@ -1247,6 +1205,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto fork_out;
 
 	ftrace_graph_init_task(p);
+	get_seccomp_filter(p);
 
 	rt_mutex_init_task(p);
 
@@ -1488,12 +1447,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	spin_lock(&current->sighand->siglock);
 
-	/*
-	 * Copy seccomp details explicitly here, in case they were changed
-	 * before holding sighand lock.
-	 */
-	copy_seccomp(p);
-
 	/*
 	 * Process group and session signals need to be delivered to just the
 	 * parent before the fork or both the parent and the child after the
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 2d13b264d850..b7a10048a32c 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,17 +18,15 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
-#include <linux/slab.h>
-#include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
 
 #ifdef CONFIG_SECCOMP_FILTER
 #include <asm/syscall.h>
 #include <linux/filter.h>
-#include <linux/pid.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
+#include <linux/slab.h>
 #include <linux/tracehook.h>
 #include <linux/uaccess.h>
 
@@ -203,184 +201,45 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  */
 static u32 seccomp_run_filters(int syscall)
 {
-	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
-	struct seccomp_data sd;
+	struct seccomp_filter *f;
 	u32 ret = SECCOMP_RET_ALLOW;
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
-	if (unlikely(WARN_ON(f == NULL)))
+	if (WARN_ON(current->seccomp.filter == NULL))
 		return SECCOMP_RET_KILL;
 
-	/* Make sure cross-thread synced filter points somewhere sane. */
-	smp_read_barrier_depends();
-
-	populate_seccomp_data(&sd);
-
 	/*
 	 * All filters in the list are evaluated and the lowest BPF return
 	 * value always takes priority (ignoring the DATA).
 	 */
-	for (; f; f = f->prev) {
-		u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
-
+	for (f = current->seccomp.filter; f; f = f->prev) {
+		u32 cur_ret = sk_run_filter(NULL, f->insns);
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
 	}
 	return ret;
 }
-#endif /* CONFIG_SECCOMP_FILTER */
-
-static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
-{
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
-
-	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
-		return false;
-
-	return true;
-}
-
-static inline void seccomp_assign_mode(struct task_struct *task,
-				       unsigned long seccomp_mode)
-{
-	BUG_ON(!spin_is_locked(&task->sighand->siglock));
-
-	task->seccomp.mode = seccomp_mode;
-	/*
-	 * Make sure TIF_SECCOMP cannot be set before the mode (and
-	 * filter) is set.
-	 */
-	smp_mb__before_atomic();
-	set_tsk_thread_flag(task, TIF_SECCOMP);
-}
-
-#ifdef CONFIG_SECCOMP_FILTER
-/* Returns 1 if the parent is an ancestor of the child. */
-static int is_ancestor(struct seccomp_filter *parent,
-		       struct seccomp_filter *child)
-{
-	/* NULL is the root ancestor. */
-	if (parent == NULL)
-		return 1;
-	for (; child; child = child->prev)
-		if (child == parent)
-			return 1;
-	return 0;
-}
 
 /**
- * seccomp_can_sync_threads: checks if all threads can be synchronized
- *
- * Expects sighand and cred_guard_mutex locks to be held.
- *
- * Returns 0 on success, -ve on error, or the pid of a thread which was
- * either not in the correct seccomp mode or it did not have an ancestral
- * seccomp filter.
- */
-static inline pid_t seccomp_can_sync_threads(void)
-{
-	struct task_struct *thread, *caller;
-
-	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
-
-	/* Validate all threads being eligible for synchronization. */
-	caller = current;
-	for_each_thread(caller, thread) {
-		pid_t failed;
-
-		/* Skip current, since it is initiating the sync. */
-		if (thread == caller)
-			continue;
-
-		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
-		    (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
-		     is_ancestor(thread->seccomp.filter,
-				 caller->seccomp.filter)))
-			continue;
-
-		/* Return the first thread that cannot be synchronized. */
-		failed = task_pid_vnr(thread);
-		/* If the pid cannot be resolved, then return -ESRCH */
-		if (unlikely(WARN_ON(failed == 0)))
-			failed = -ESRCH;
-		return failed;
-	}
-
-	return 0;
-}
-
-/**
- * seccomp_sync_threads: sets all threads to use current's filter
- *
- * Expects sighand and cred_guard_mutex locks to be held, and for
- * seccomp_can_sync_threads() to have returned success already
- * without dropping the locks.
- *
- */
-static inline void seccomp_sync_threads(void)
-{
-	struct task_struct *thread, *caller;
-
-	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
-
-	/* Synchronize all threads. */
-	caller = current;
-	for_each_thread(caller, thread) {
-		/* Skip current, since it needs no changes. */
-		if (thread == caller)
-			continue;
-
-		/* Get a task reference for the new leaf node. */
-		get_seccomp_filter(caller);
-		/*
-		 * Drop the task reference to the shared ancestor since
-		 * current's path will hold a reference.  (This also
-		 * allows a put before the assignment.)
-		 */
-		put_seccomp_filter(thread);
-		smp_store_release(&thread->seccomp.filter,
-				  caller->seccomp.filter);
-		/*
-		 * Opt the other thread into seccomp if needed.
-		 * As threads are considered to be trust-realm
-		 * equivalent (see ptrace_may_access), it is safe to
-		 * allow one thread to transition the other.
-		 */
-		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
-			/*
-			 * Don't let an unprivileged task work around
-			 * the no_new_privs restriction by creating
-			 * a thread that sets it up, enters seccomp,
-			 * then dies.
-			 */
-			if (task_no_new_privs(caller))
-				task_set_no_new_privs(thread);
-
-			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
-		}
-	}
-}
-
-/**
- * seccomp_prepare_filter: Prepares a seccomp filter for use.
+ * seccomp_attach_filter: Attaches a seccomp filter to current.
  * @fprog: BPF program to install
  *
- * Returns filter on success or an ERR_PTR on failure.
+ * Returns 0 on success or an errno on failure.
  */
-static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
+static long seccomp_attach_filter(struct sock_fprog *fprog)
 {
 	struct seccomp_filter *filter;
-	unsigned long fp_size;
-	struct sock_filter *fp;
-	int new_len;
+	unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
+	unsigned long total_insns = fprog->len;
 	long ret;
 
 	if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
-		return ERR_PTR(-EINVAL);
-	BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
-	fp_size = fprog->len * sizeof(struct sock_filter);
+		return -EINVAL;
+
+	for (filter = current->seccomp.filter; filter; filter = filter->prev)
+		total_insns += filter->len + 4;  /* include a 4 instr penalty */
+	if (total_insns > MAX_INSNS_PER_PATH)
+		return -ENOMEM;
 
 	/*
 	 * Installing a seccomp filter requires that the task have
@@ -391,11 +250,15 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 	if (!current->no_new_privs &&
 	    security_capable_noaudit(current_cred(), current_user_ns(),
 				     CAP_SYS_ADMIN) != 0)
-		return ERR_PTR(-EACCES);
+		return -EACCES;
 
-	fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
-	if (!fp)
-		return ERR_PTR(-ENOMEM);
+	/* Allocate a new seccomp_filter */
+	filter = kzalloc(sizeof(struct seccomp_filter) + fp_size,
+			 GFP_KERNEL|__GFP_NOWARN);
+	if (!filter)
+		return -ENOMEM;
+	atomic_set(&filter->usage, 1);
+	filter->len = fprog->len;
 
 	/* Copy the instructions from fprog. */
 	ret = -EFAULT;
@@ -410,46 +273,30 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 	/* Check and rewrite the fprog for seccomp use */
 	ret = seccomp_check_filter(filter->insns, filter->len);
 	if (ret)
-		goto free_prog;
+		goto fail;
 
-	/* Allocate a new seccomp_filter */
-	ret = -ENOMEM;
-	filter = kzalloc(sizeof(struct seccomp_filter) +
-			 sizeof(struct sock_filter_int) * new_len,
-			 GFP_KERNEL|__GFP_NOWARN);
-	if (!filter)
-		goto free_prog;
-
-	ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
-	if (ret)
-		goto free_filter;
-	kfree(fp);
-
-	atomic_set(&filter->usage, 1);
-	filter->len = new_len;
-
-	return filter;
-
-free_filter_prog:
-	kfree(filter->prog);
-free_filter:
+	/*
+	 * If there is an existing filter, make it the prev and don't drop its
+	 * task reference.
+	 */
+	filter->prev = current->seccomp.filter;
+	current->seccomp.filter = filter;
+	return 0;
+fail:
 	kfree(filter);
-free_prog:
-	kfree(fp);
-	return ERR_PTR(ret);
+	return ret;
 }
 
 /**
- * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
+ * seccomp_attach_user_filter - attaches a user-supplied sock_fprog
  * @user_filter: pointer to the user data containing a sock_fprog.
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static struct seccomp_filter *
-seccomp_prepare_user_filter(const char __user *user_filter)
+long seccomp_attach_user_filter(char __user *user_filter)
 {
 	struct sock_fprog fprog;
-	struct seccomp_filter *filter = ERR_PTR(-EFAULT);
+	long ret = -EFAULT;
 
 #ifdef CONFIG_COMPAT
 	if (is_compat_task()) {
@@ -462,56 +309,9 @@ seccomp_prepare_user_filter(const char __user *user_filter)
 #endif
 	if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
 		goto out;
-	filter = seccomp_prepare_filter(&fprog);
+	ret = seccomp_attach_filter(&fprog);
 out:
-	return filter;
-}
-
-/**
- * seccomp_attach_filter: validate and attach filter
- * @flags:  flags to change filter behavior
- * @filter: seccomp filter to add to the current process
- *
- * Caller must be holding current->sighand->siglock lock.
- *
- * Returns 0 on success, -ve on error.
- */
-static long seccomp_attach_filter(unsigned int flags,
-				  struct seccomp_filter *filter)
-{
-	unsigned long total_insns;
-	struct seccomp_filter *walker;
-
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
-
-	/* Validate resulting filter length. */
-	total_insns = filter->prog->len;
-	for (walker = current->seccomp.filter; walker; walker = walker->prev)
-		total_insns += walker->prog->len + 4;  /* 4 instr penalty */
-	if (total_insns > MAX_INSNS_PER_PATH)
-		return -ENOMEM;
-
-	/* If thread sync has been requested, check that it is possible. */
-	if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
-		int ret;
-
-		ret = seccomp_can_sync_threads();
-		if (ret)
-			return ret;
-	}
-
-	/*
-	 * If there is an existing filter, make it the prev and don't drop its
-	 * task reference.
-	 */
-	filter->prev = current->seccomp.filter;
-	current->seccomp.filter = filter;
-
-	/* Now that the new filter is in place, synchronize to all threads. */
-	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
-		seccomp_sync_threads();
-
-	return 0;
+	return ret;
 }
 
 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
@@ -524,14 +324,6 @@ void get_seccomp_filter(struct task_struct *tsk)
 	atomic_inc(&orig->usage);
 }
 
-static inline void seccomp_filter_free(struct seccomp_filter *filter)
-{
-	if (filter) {
-		sk_filter_free(filter->prog);
-		kfree(filter);
-	}
-}
-
 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
 void put_seccomp_filter(struct task_struct *tsk)
 {
@@ -540,7 +332,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
-		seccomp_filter_free(freeme);
+		kfree(freeme);
 	}
 }
 
@@ -584,17 +376,12 @@ static int mode1_syscalls_32[] = {
 
 int __secure_computing(int this_syscall)
 {
+	int mode = current->seccomp.mode;
 	int exit_sig = 0;
 	int *syscall;
 	u32 ret;
 
-	/*
-	 * Make sure that any changes to mode from another thread have
-	 * been seen after TIF_SECCOMP was seen.
-	 */
-	rmb();
-
-	switch (current->seccomp.mode) {
+	switch (mode) {
 	case SECCOMP_MODE_STRICT:
 		syscall = mode1_syscalls;
 #ifdef CONFIG_COMPAT
@@ -679,153 +466,48 @@ long prctl_get_seccomp(void)
 	return current->seccomp.mode;
 }
 
-/**
- * seccomp_set_mode_strict: internal function for setting strict seccomp
- *
- * Once current->seccomp.mode is non-zero, it may not be changed.
- *
- * Returns 0 on success or -EINVAL on failure.
- */
-static long seccomp_set_mode_strict(void)
-{
-	const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
-	long ret = -EINVAL;
-
-	spin_lock_irq(&current->sighand->siglock);
-
-	if (!seccomp_may_assign_mode(seccomp_mode))
-		goto out;
-
-#ifdef TIF_NOTSC
-	disable_TSC();
-#endif
-	seccomp_assign_mode(current, seccomp_mode);
-	ret = 0;
-
-out:
-	spin_unlock_irq(&current->sighand->siglock);
-
-	return ret;
-}
-
-#ifdef CONFIG_SECCOMP_FILTER
-/**
- * seccomp_set_mode_filter: internal function for setting seccomp filter
- * @flags:  flags to change filter behavior
- * @filter: struct sock_fprog containing filter
- *
- * This function may be called repeatedly to install additional filters.
- * Every filter successfully installed will be evaluated (in reverse order)
- * for each system call the task makes.
- *
- * Once current->seccomp.mode is non-zero, it may not be changed.
- *
- * Returns 0 on success or -EINVAL on failure.
- */
-static long seccomp_set_mode_filter(unsigned int flags,
-				    const char __user *filter)
-{
-	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
-	struct seccomp_filter *prepared = NULL;
-	long ret = -EINVAL;
-
-	/* Validate flags. */
-	if (flags & ~SECCOMP_FILTER_FLAG_MASK)
-		return -EINVAL;
-
-	/* Prepare the new filter before holding any locks. */
-	prepared = seccomp_prepare_user_filter(filter);
-	if (IS_ERR(prepared))
-		return PTR_ERR(prepared);
-
-	/*
-	 * Make sure we cannot change seccomp or nnp state via TSYNC
-	 * while another thread is in the middle of calling exec.
-	 */
-	if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
-	    mutex_lock_killable(&current->signal->cred_guard_mutex))
-		goto out_free;
-
-	spin_lock_irq(&current->sighand->siglock);
-
-	if (!seccomp_may_assign_mode(seccomp_mode))
-		goto out;
-
-	ret = seccomp_attach_filter(flags, prepared);
-	if (ret)
-		goto out;
-	/* Do not free the successfully attached filter. */
-	prepared = NULL;
-
-	seccomp_assign_mode(current, seccomp_mode);
-out:
-	spin_unlock_irq(&current->sighand->siglock);
-	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
-		mutex_unlock(&current->signal->cred_guard_mutex);
-out_free:
-	seccomp_filter_free(prepared);
-	return ret;
-}
-#else
-static inline long seccomp_set_mode_filter(unsigned int flags,
-					   const char __user *filter)
-{
-	return -EINVAL;
-}
-#endif
-
-/* Common entry point for both prctl and syscall. */
-static long do_seccomp(unsigned int op, unsigned int flags,
-		       const char __user *uargs)
-{
-	switch (op) {
-	case SECCOMP_SET_MODE_STRICT:
-		if (flags != 0 || uargs != NULL)
-			return -EINVAL;
-		return seccomp_set_mode_strict();
-	case SECCOMP_SET_MODE_FILTER:
-		return seccomp_set_mode_filter(flags, uargs);
-	default:
-		return -EINVAL;
-	}
-}
-
-SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
-			 const char __user *, uargs)
-{
-	return do_seccomp(op, flags, uargs);
-}
-
 /**
  * prctl_set_seccomp: configures current->seccomp.mode
  * @seccomp_mode: requested mode to use
  * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
  *
+ * This function may be called repeatedly with a @seccomp_mode of
+ * SECCOMP_MODE_FILTER to install additional filters.  Every filter
+ * successfully installed will be evaluated (in reverse order) for each system
+ * call the task makes.
+ *
+ * Once current->seccomp.mode is non-zero, it may not be changed.
+ *
  * Returns 0 on success or -EINVAL on failure.
  */
 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 {
-	unsigned int op;
-	char __user *uargs;
+	long ret = -EINVAL;
+
+	if (current->seccomp.mode &&
+	    current->seccomp.mode != seccomp_mode)
+		goto out;
 
 	switch (seccomp_mode) {
 	case SECCOMP_MODE_STRICT:
-		op = SECCOMP_SET_MODE_STRICT;
-		/*
-		 * Setting strict mode through prctl always ignored filter,
-		 * so make sure it is always NULL here to pass the internal
-		 * check in do_seccomp().
-		 */
-		uargs = NULL;
+		ret = 0;
+#ifdef TIF_NOTSC
+		disable_TSC();
+#endif
 		break;
+#ifdef CONFIG_SECCOMP_FILTER
 	case SECCOMP_MODE_FILTER:
-		op = SECCOMP_SET_MODE_FILTER;
-		uargs = filter;
+		ret = seccomp_attach_user_filter(filter);
+		if (ret)
+			goto out;
 		break;
+#endif
 	default:
-		return -EINVAL;
+		goto out;
 	}
 
-	/* prctl interface doesn't have flags, so they are always zero. */
-	return do_seccomp(op, 0, uargs);
+	current->seccomp.mode = seccomp_mode;
+	set_thread_flag(TIF_SECCOMP);
+out:
+	return ret;
 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7e7fc0a082c4..7078052284fd 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -209,6 +209,3 @@ cond_syscall(compat_sys_open_by_handle_at);
 
 /* compare kernel pointers */
 cond_syscall(sys_kcmp);
-
-/* operate on Secure Computing state */
-cond_syscall(sys_seccomp);

From f91c274ab850b627eaa7862ef1241ce5d720e03e Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Wed, 30 Apr 2014 10:51:29 +0100
Subject: [PATCH 0388/1185] arm64: make a single hook to syscall_trace() for
 all syscall features

Currently syscall_trace() is called only for ptrace.
With additional TIF_xx flags defined, it is now called in all the cases
of audit, ftrace and seccomp in addition to ptrace.

Acked-by: Richard Guy Briggs <rgb@redhat.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

Conflicts:
	arch/arm64/include/asm/thread_info.h
	arch/arm64/kernel/entry.S

Change-Id: Iee71c44c45b363194a1cc7182906c0afa6b5348b
---
 arch/arm64/include/asm/thread_info.h | 13 +++++++++++++
 arch/arm64/kernel/entry.S            |  5 +++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 5e95a6ce074a..c77b13b4e36e 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -97,6 +97,9 @@ static inline struct thread_info *current_thread_info(void)
 /*
  * thread information flags:
  *  TIF_SYSCALL_TRACE	- syscall trace active
+ *  TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
+ *  TIF_SYSCALL_AUDIT	- syscall auditing
+ *  TIF_SECOMP		- syscall secure computing
  *  TIF_SIGPENDING	- signal pending
  *  TIF_NEED_RESCHED	- rescheduling necessary
  *  TIF_NOTIFY_RESUME	- callback before returning to user
@@ -108,6 +111,9 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
 #define TIF_SYSCALL_TRACE	8
+#define TIF_SYSCALL_AUDIT	9
+#define TIF_SYSCALL_TRACEPOINT	10
+#define TIF_SECCOMP		11
 #define TIF_POLLING_NRFLAG	16
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_FREEZE		19
@@ -120,10 +126,17 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
 
+#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 0b65510230bb..0c609291e963 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -642,8 +642,9 @@ el0_svc_naked:					// compat entry point
 	enable_irq
 
 	get_thread_info tsk
-	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall tracing
-	tbnz	x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls?
+	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall hooks
+	tst	x16, #_TIF_SYSCALL_WORK
+	b.ne	__sys_trace
 	adr	lr, ret_fast_syscall		// return address
 	cmp     scno, sc_nr                     // check upper syscall limit
 	b.hs	ni_sys

From 26108f2dc78941405736a7ab424d15f04b3d1ec3 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Wed, 30 Apr 2014 10:51:30 +0100
Subject: [PATCH 0389/1185] arm64: split syscall_trace() into separate
 functions for enter/exit

As done in arm, this change makes it easy to confirm we invoke syscall
related hooks, including syscall tracepoint, audit and seccomp which would
be implemented later, in correct order. That is, undoing operations in the
opposite order on exit that they were done on entry.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry.S  | 10 +++-----
 arch/arm64/kernel/ptrace.c | 50 ++++++++++++++++++++++----------------
 2 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 0c609291e963..5b368d51369e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -660,9 +660,8 @@ ENDPROC(el0_svc)
 	 * switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	mov	x1, sp
-	mov	w0, #0				// trace entry
-	bl	syscall_trace
+	mov	x0, sp
+	bl	syscall_trace_enter
 	adr	lr, __sys_trace_return		// return address
 	uxtw	scno, w0			// syscall number (possibly new)
 	mov	x1, sp				// pointer to regs
@@ -677,9 +676,8 @@ __sys_trace:
 
 __sys_trace_return:
 	str	x0, [sp]			// save returned x0
-	mov	x1, sp
-	mov	w0, #1				// trace exit
-	bl	syscall_trace
+	mov	x0, sp
+	bl	syscall_trace_exit
 	b	ret_to_user
 
 /*
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 7190a6544cab..f3b14fd27ab4 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1064,35 +1064,43 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ptrace_request(child, request, addr, data);
 }
 
-asmlinkage int syscall_trace(int dir, struct pt_regs *regs)
+enum ptrace_syscall_dir {
+	PTRACE_SYSCALL_ENTER = 0,
+	PTRACE_SYSCALL_EXIT,
+};
+
+static void tracehook_report_syscall(struct pt_regs *regs,
+				     enum ptrace_syscall_dir dir)
 {
+	int regno;
 	unsigned long saved_reg;
 
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return regs->syscallno;
+	/*
+	 * A scratch register (ip(r12) on AArch32, x7 on AArch64) is
+	 * used to denote syscall entry/exit:
+	 */
+	regno = (is_compat_task() ? 12 : 7);
+	saved_reg = regs->regs[regno];
+	regs->regs[regno] = dir;
 
-	if (is_compat_task()) {
-		/* AArch32 uses ip (r12) for scratch */
-		saved_reg = regs->regs[12];
-		regs->regs[12] = dir;
-	} else {
-		/*
-		 * Save X7. X7 is used to denote syscall entry/exit:
-		 *   X7 = 0 -> entry, = 1 -> exit
-		 */
-		saved_reg = regs->regs[7];
-		regs->regs[7] = dir;
-	}
-
-	if (dir)
+	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
 		regs->syscallno = ~0UL;
 
-	if (is_compat_task())
-		regs->regs[12] = saved_reg;
-	else
-		regs->regs[7] = saved_reg;
+	regs->regs[regno] = saved_reg;
+}
+
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
+{
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
 
 	return regs->syscallno;
 }
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+}

From cfc7e99e9e3900056028a7d90072e9ea0d886f8d Mon Sep 17 00:00:00 2001
From: JP Abgrall <jpa@google.com>
Date: Wed, 17 Sep 2014 17:59:28 -0700
Subject: [PATCH 0390/1185] arm64: Add __NR_* definitions for compat syscalls

This patch adds __NR_* definitions to asm/unistd32.h, moves the
__NR_compat_* definitions to asm/unistd.h and removes all the explicit
unistd32.h includes apart from the one building the compat syscall
table. The aim is to have the compat __NR_* definitions available but
without colliding with the native syscall definitions (required by
lib/compat_audit.c to avoid duplicating the audit header files between
native and compat).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

Conflicts:
	arch/arm64/include/asm/unistd32.h
	arch/arm64/kernel/kuser32.S

Change-Id: I8776881b5beb39769aadc4c4f14a51ea54325112
---
 arch/arm64/include/asm/unistd.h   |   17 +
 arch/arm64/include/asm/unistd32.h | 1156 +++++++++++++++++++----------
 arch/arm64/kernel/entry.S         |    1 -
 arch/arm64/kernel/kuser32.S       |    3 +
 arch/arm64/kernel/signal32.c      |    2 +-
 arch/arm64/kernel/sys_compat.c    |    2 +-
 6 files changed, 782 insertions(+), 399 deletions(-)

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 82ce217e94cf..38f8799a3d31 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -25,6 +25,23 @@
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
+
+/*
+ * Compat syscall numbers used by the AArch64 kernel.
+ */
+#define __NR_compat_restart_syscall	0
+#define __NR_compat_sigreturn		119
+#define __NR_compat_rt_sigreturn	173
+
+/*
+ * The following SVCs are ARM private.
+ */
+#define __ARM_NR_COMPAT_BASE		0x0f0000
+#define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
+#define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
+
+#define __NR_compat_syscalls		383
 #endif
+
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 58125bf008d3..dd336c150f3f 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -21,399 +21,763 @@
 #define __SYSCALL(x, y)
 #endif
 
-__SYSCALL(0,   sys_restart_syscall)
-__SYSCALL(1,   sys_exit)
-__SYSCALL(2,   sys_fork)
-__SYSCALL(3,   sys_read)
-__SYSCALL(4,   sys_write)
-__SYSCALL(5,   compat_sys_open)
-__SYSCALL(6,   sys_close)
-__SYSCALL(7,   sys_ni_syscall)			/* 7 was sys_waitpid */
-__SYSCALL(8,   sys_creat)
-__SYSCALL(9,   sys_link)
-__SYSCALL(10,  sys_unlink)
-__SYSCALL(11,  compat_sys_execve)
-__SYSCALL(12,  sys_chdir)
-__SYSCALL(13,  sys_ni_syscall)			/* 13 was sys_time */
-__SYSCALL(14,  sys_mknod)
-__SYSCALL(15,  sys_chmod)
-__SYSCALL(16,  sys_lchown16)
-__SYSCALL(17,  sys_ni_syscall)			/* 17 was sys_break */
-__SYSCALL(18,  sys_ni_syscall)			/* 18 was sys_stat */
-__SYSCALL(19,  compat_sys_lseek)
-__SYSCALL(20,  sys_getpid)
-__SYSCALL(21,  compat_sys_mount)
-__SYSCALL(22,  sys_ni_syscall)			/* 22 was sys_umount */
-__SYSCALL(23,  sys_setuid16)
-__SYSCALL(24,  sys_getuid16)
-__SYSCALL(25,  sys_ni_syscall)			/* 25 was sys_stime */
-__SYSCALL(26,  compat_sys_ptrace)
-__SYSCALL(27,  sys_ni_syscall)			/* 27 was sys_alarm */
-__SYSCALL(28,  sys_ni_syscall)			/* 28 was sys_fstat */
-__SYSCALL(29,  sys_pause)
-__SYSCALL(30,  sys_ni_syscall)			/* 30 was sys_utime */
-__SYSCALL(31,  sys_ni_syscall)			/* 31 was sys_stty */
-__SYSCALL(32,  sys_ni_syscall)			/* 32 was sys_gtty */
-__SYSCALL(33,  sys_access)
-__SYSCALL(34,  sys_nice)
-__SYSCALL(35,  sys_ni_syscall)			/* 35 was sys_ftime */
-__SYSCALL(36,  sys_sync)
-__SYSCALL(37,  sys_kill)
-__SYSCALL(38,  sys_rename)
-__SYSCALL(39,  sys_mkdir)
-__SYSCALL(40,  sys_rmdir)
-__SYSCALL(41,  sys_dup)
-__SYSCALL(42,  sys_pipe)
-__SYSCALL(43,  compat_sys_times)
-__SYSCALL(44,  sys_ni_syscall)			/* 44 was sys_prof */
-__SYSCALL(45,  sys_brk)
-__SYSCALL(46,  sys_setgid16)
-__SYSCALL(47,  sys_getgid16)
-__SYSCALL(48,  sys_ni_syscall)			/* 48 was sys_signal */
-__SYSCALL(49,  sys_geteuid16)
-__SYSCALL(50,  sys_getegid16)
-__SYSCALL(51,  sys_acct)
-__SYSCALL(52,  sys_umount)
-__SYSCALL(53,  sys_ni_syscall)			/* 53 was sys_lock */
-__SYSCALL(54,  compat_sys_ioctl)
-__SYSCALL(55,  compat_sys_fcntl)
-__SYSCALL(56,  sys_ni_syscall)			/* 56 was sys_mpx */
-__SYSCALL(57,  sys_setpgid)
-__SYSCALL(58,  sys_ni_syscall)			/* 58 was sys_ulimit */
-__SYSCALL(59,  sys_ni_syscall)			/* 59 was sys_olduname */
-__SYSCALL(60,  sys_umask)
-__SYSCALL(61,  sys_chroot)
-__SYSCALL(62,  compat_sys_ustat)
-__SYSCALL(63,  sys_dup2)
-__SYSCALL(64,  sys_getppid)
-__SYSCALL(65,  sys_getpgrp)
-__SYSCALL(66,  sys_setsid)
-__SYSCALL(67,  compat_sys_sigaction)
-__SYSCALL(68,  sys_ni_syscall)			/* 68 was sys_sgetmask */
-__SYSCALL(69,  sys_ni_syscall)			/* 69 was sys_ssetmask */
-__SYSCALL(70,  sys_setreuid16)
-__SYSCALL(71,  sys_setregid16)
-__SYSCALL(72,  sys_sigsuspend)
-__SYSCALL(73,  compat_sys_sigpending)
-__SYSCALL(74,  sys_sethostname)
-__SYSCALL(75,  compat_sys_setrlimit)
-__SYSCALL(76,  sys_ni_syscall)			/* 76 was compat_sys_getrlimit */
-__SYSCALL(77,  compat_sys_getrusage)
-__SYSCALL(78,  compat_sys_gettimeofday)
-__SYSCALL(79,  compat_sys_settimeofday)
-__SYSCALL(80,  sys_getgroups16)
-__SYSCALL(81,  sys_setgroups16)
-__SYSCALL(82,  sys_ni_syscall)			/* 82 was compat_sys_select */
-__SYSCALL(83,  sys_symlink)
-__SYSCALL(84,  sys_ni_syscall)			/* 84 was sys_lstat */
-__SYSCALL(85,  sys_readlink)
-__SYSCALL(86,  sys_uselib)
-__SYSCALL(87,  sys_swapon)
-__SYSCALL(88,  sys_reboot)
-__SYSCALL(89,  sys_ni_syscall)			/* 89 was sys_readdir */
-__SYSCALL(90,  sys_ni_syscall)			/* 90 was sys_mmap */
-__SYSCALL(91,  sys_munmap)
-__SYSCALL(92,  compat_sys_truncate)
-__SYSCALL(93,  compat_sys_ftruncate)
-__SYSCALL(94,  sys_fchmod)
-__SYSCALL(95,  sys_fchown16)
-__SYSCALL(96,  sys_getpriority)
-__SYSCALL(97,  sys_setpriority)
-__SYSCALL(98,  sys_ni_syscall)			/* 98 was sys_profil */
-__SYSCALL(99,  compat_sys_statfs)
-__SYSCALL(100, compat_sys_fstatfs)
-__SYSCALL(101, sys_ni_syscall)			/* 101 was sys_ioperm */
-__SYSCALL(102, sys_ni_syscall)			/* 102 was sys_socketcall */
-__SYSCALL(103, sys_syslog)
-__SYSCALL(104, compat_sys_setitimer)
-__SYSCALL(105, compat_sys_getitimer)
-__SYSCALL(106, compat_sys_newstat)
-__SYSCALL(107, compat_sys_newlstat)
-__SYSCALL(108, compat_sys_newfstat)
-__SYSCALL(109, sys_ni_syscall)			/* 109 was sys_uname */
-__SYSCALL(110, sys_ni_syscall)			/* 110 was sys_iopl */
-__SYSCALL(111, sys_vhangup)
-__SYSCALL(112, sys_ni_syscall)			/* 112 was sys_idle */
-__SYSCALL(113, sys_ni_syscall)			/* 113 was sys_syscall */
-__SYSCALL(114, compat_sys_wait4)
-__SYSCALL(115, sys_swapoff)
-__SYSCALL(116, compat_sys_sysinfo)
-__SYSCALL(117, sys_ni_syscall)			/* 117 was sys_ipc */
-__SYSCALL(118, sys_fsync)
-__SYSCALL(119, compat_sys_sigreturn_wrapper)
-__SYSCALL(120, sys_clone)
-__SYSCALL(121, sys_setdomainname)
-__SYSCALL(122, sys_newuname)
-__SYSCALL(123, sys_ni_syscall)			/* 123 was sys_modify_ldt */
-__SYSCALL(124, compat_sys_adjtimex)
-__SYSCALL(125, sys_mprotect)
-__SYSCALL(126, compat_sys_sigprocmask)
-__SYSCALL(127, sys_ni_syscall)			/* 127 was sys_create_module */
-__SYSCALL(128, sys_init_module)
-__SYSCALL(129, sys_delete_module)
-__SYSCALL(130, sys_ni_syscall)			/* 130 was sys_get_kernel_syms */
-__SYSCALL(131, sys_quotactl)
-__SYSCALL(132, sys_getpgid)
-__SYSCALL(133, sys_fchdir)
-__SYSCALL(134, sys_bdflush)
-__SYSCALL(135, sys_sysfs)
-__SYSCALL(136, sys_personality)
-__SYSCALL(137, sys_ni_syscall)			/* 137 was sys_afs_syscall */
-__SYSCALL(138, sys_setfsuid16)
-__SYSCALL(139, sys_setfsgid16)
-__SYSCALL(140, sys_llseek)
-__SYSCALL(141, compat_sys_getdents)
-__SYSCALL(142, compat_sys_select)
-__SYSCALL(143, sys_flock)
-__SYSCALL(144, sys_msync)
-__SYSCALL(145, compat_sys_readv)
-__SYSCALL(146, compat_sys_writev)
-__SYSCALL(147, sys_getsid)
-__SYSCALL(148, sys_fdatasync)
-__SYSCALL(149, compat_sys_sysctl)
-__SYSCALL(150, sys_mlock)
-__SYSCALL(151, sys_munlock)
-__SYSCALL(152, sys_mlockall)
-__SYSCALL(153, sys_munlockall)
-__SYSCALL(154, sys_sched_setparam)
-__SYSCALL(155, sys_sched_getparam)
-__SYSCALL(156, sys_sched_setscheduler)
-__SYSCALL(157, sys_sched_getscheduler)
-__SYSCALL(158, sys_sched_yield)
-__SYSCALL(159, sys_sched_get_priority_max)
-__SYSCALL(160, sys_sched_get_priority_min)
-__SYSCALL(161, compat_sys_sched_rr_get_interval)
-__SYSCALL(162, compat_sys_nanosleep)
-__SYSCALL(163, sys_mremap)
-__SYSCALL(164, sys_setresuid16)
-__SYSCALL(165, sys_getresuid16)
-__SYSCALL(166, sys_ni_syscall)			/* 166 was sys_vm86 */
-__SYSCALL(167, sys_ni_syscall)			/* 167 was sys_query_module */
-__SYSCALL(168, sys_poll)
-__SYSCALL(169, sys_ni_syscall)
-__SYSCALL(170, sys_setresgid16)
-__SYSCALL(171, sys_getresgid16)
-__SYSCALL(172, sys_prctl)
-__SYSCALL(173, compat_sys_rt_sigreturn_wrapper)
-__SYSCALL(174, compat_sys_rt_sigaction)
-__SYSCALL(175, compat_sys_rt_sigprocmask)
-__SYSCALL(176, compat_sys_rt_sigpending)
-__SYSCALL(177, compat_sys_rt_sigtimedwait)
-__SYSCALL(178, compat_sys_rt_sigqueueinfo)
-__SYSCALL(179, compat_sys_rt_sigsuspend)
-__SYSCALL(180, compat_sys_pread64_wrapper)
-__SYSCALL(181, compat_sys_pwrite64_wrapper)
-__SYSCALL(182, sys_chown16)
-__SYSCALL(183, sys_getcwd)
-__SYSCALL(184, sys_capget)
-__SYSCALL(185, sys_capset)
-__SYSCALL(186, compat_sys_sigaltstack)
-__SYSCALL(187, compat_sys_sendfile)
-__SYSCALL(188, sys_ni_syscall)			/* 188 reserved */
-__SYSCALL(189, sys_ni_syscall)			/* 189 reserved */
-__SYSCALL(190, sys_vfork)
-__SYSCALL(191, compat_sys_getrlimit)		/* SuS compliant getrlimit */
-__SYSCALL(192, sys_mmap_pgoff)
-__SYSCALL(193, compat_sys_truncate64_wrapper)
-__SYSCALL(194, compat_sys_ftruncate64_wrapper)
-__SYSCALL(195, sys_stat64)
-__SYSCALL(196, sys_lstat64)
-__SYSCALL(197, sys_fstat64)
-__SYSCALL(198, sys_lchown)
-__SYSCALL(199, sys_getuid)
-__SYSCALL(200, sys_getgid)
-__SYSCALL(201, sys_geteuid)
-__SYSCALL(202, sys_getegid)
-__SYSCALL(203, sys_setreuid)
-__SYSCALL(204, sys_setregid)
-__SYSCALL(205, sys_getgroups)
-__SYSCALL(206, sys_setgroups)
-__SYSCALL(207, sys_fchown)
-__SYSCALL(208, sys_setresuid)
-__SYSCALL(209, sys_getresuid)
-__SYSCALL(210, sys_setresgid)
-__SYSCALL(211, sys_getresgid)
-__SYSCALL(212, sys_chown)
-__SYSCALL(213, sys_setuid)
-__SYSCALL(214, sys_setgid)
-__SYSCALL(215, sys_setfsuid)
-__SYSCALL(216, sys_setfsgid)
-__SYSCALL(217, compat_sys_getdents64)
-__SYSCALL(218, sys_pivot_root)
-__SYSCALL(219, sys_mincore)
-__SYSCALL(220, sys_madvise)
-__SYSCALL(221, compat_sys_fcntl64)
-__SYSCALL(222, sys_ni_syscall)			/* 222 for tux */
-__SYSCALL(223, sys_ni_syscall)			/* 223 is unused */
-__SYSCALL(224, sys_gettid)
-__SYSCALL(225, compat_sys_readahead_wrapper)
-__SYSCALL(226, sys_setxattr)
-__SYSCALL(227, sys_lsetxattr)
-__SYSCALL(228, sys_fsetxattr)
-__SYSCALL(229, sys_getxattr)
-__SYSCALL(230, sys_lgetxattr)
-__SYSCALL(231, sys_fgetxattr)
-__SYSCALL(232, sys_listxattr)
-__SYSCALL(233, sys_llistxattr)
-__SYSCALL(234, sys_flistxattr)
-__SYSCALL(235, sys_removexattr)
-__SYSCALL(236, sys_lremovexattr)
-__SYSCALL(237, sys_fremovexattr)
-__SYSCALL(238, sys_tkill)
-__SYSCALL(239, sys_sendfile64)
-__SYSCALL(240, compat_sys_futex)
-__SYSCALL(241, compat_sys_sched_setaffinity)
-__SYSCALL(242, compat_sys_sched_getaffinity)
-__SYSCALL(243, compat_sys_io_setup)
-__SYSCALL(244, sys_io_destroy)
-__SYSCALL(245, compat_sys_io_getevents)
-__SYSCALL(246, compat_sys_io_submit)
-__SYSCALL(247, sys_io_cancel)
-__SYSCALL(248, sys_exit_group)
-__SYSCALL(249, compat_sys_lookup_dcookie)
-__SYSCALL(250, sys_epoll_create)
-__SYSCALL(251, sys_epoll_ctl)
-__SYSCALL(252, sys_epoll_wait)
-__SYSCALL(253, sys_remap_file_pages)
-__SYSCALL(254, sys_ni_syscall)			/* 254 for set_thread_area */
-__SYSCALL(255, sys_ni_syscall)			/* 255 for get_thread_area */
-__SYSCALL(256, sys_set_tid_address)
-__SYSCALL(257, compat_sys_timer_create)
-__SYSCALL(258, compat_sys_timer_settime)
-__SYSCALL(259, compat_sys_timer_gettime)
-__SYSCALL(260, sys_timer_getoverrun)
-__SYSCALL(261, sys_timer_delete)
-__SYSCALL(262, compat_sys_clock_settime)
-__SYSCALL(263, compat_sys_clock_gettime)
-__SYSCALL(264, compat_sys_clock_getres)
-__SYSCALL(265, compat_sys_clock_nanosleep)
-__SYSCALL(266, compat_sys_statfs64_wrapper)
-__SYSCALL(267, compat_sys_fstatfs64_wrapper)
-__SYSCALL(268, sys_tgkill)
-__SYSCALL(269, compat_sys_utimes)
-__SYSCALL(270, compat_sys_fadvise64_64_wrapper)
-__SYSCALL(271, sys_pciconfig_iobase)
-__SYSCALL(272, sys_pciconfig_read)
-__SYSCALL(273, sys_pciconfig_write)
-__SYSCALL(274, compat_sys_mq_open)
-__SYSCALL(275, sys_mq_unlink)
-__SYSCALL(276, compat_sys_mq_timedsend)
-__SYSCALL(277, compat_sys_mq_timedreceive)
-__SYSCALL(278, compat_sys_mq_notify)
-__SYSCALL(279, compat_sys_mq_getsetattr)
-__SYSCALL(280, compat_sys_waitid)
-__SYSCALL(281, sys_socket)
-__SYSCALL(282, sys_bind)
-__SYSCALL(283, sys_connect)
-__SYSCALL(284, sys_listen)
-__SYSCALL(285, sys_accept)
-__SYSCALL(286, sys_getsockname)
-__SYSCALL(287, sys_getpeername)
-__SYSCALL(288, sys_socketpair)
-__SYSCALL(289, sys_send)
-__SYSCALL(290, sys_sendto)
-__SYSCALL(291, compat_sys_recv)
-__SYSCALL(292, compat_sys_recvfrom)
-__SYSCALL(293, sys_shutdown)
-__SYSCALL(294, compat_sys_setsockopt)
-__SYSCALL(295, compat_sys_getsockopt)
-__SYSCALL(296, compat_sys_sendmsg)
-__SYSCALL(297, compat_sys_recvmsg)
-__SYSCALL(298, sys_semop)
-__SYSCALL(299, sys_semget)
-__SYSCALL(300, compat_sys_semctl)
-__SYSCALL(301, compat_sys_msgsnd)
-__SYSCALL(302, compat_sys_msgrcv)
-__SYSCALL(303, sys_msgget)
-__SYSCALL(304, compat_sys_msgctl)
-__SYSCALL(305, compat_sys_shmat)
-__SYSCALL(306, sys_shmdt)
-__SYSCALL(307, sys_shmget)
-__SYSCALL(308, compat_sys_shmctl)
-__SYSCALL(309, sys_add_key)
-__SYSCALL(310, sys_request_key)
-__SYSCALL(311, compat_sys_keyctl)
-__SYSCALL(312, compat_sys_semtimedop)
-__SYSCALL(313, sys_ni_syscall)
-__SYSCALL(314, sys_ioprio_set)
-__SYSCALL(315, sys_ioprio_get)
-__SYSCALL(316, sys_inotify_init)
-__SYSCALL(317, sys_inotify_add_watch)
-__SYSCALL(318, sys_inotify_rm_watch)
-__SYSCALL(319, compat_sys_mbind)
-__SYSCALL(320, compat_sys_get_mempolicy)
-__SYSCALL(321, compat_sys_set_mempolicy)
-__SYSCALL(322, compat_sys_openat)
-__SYSCALL(323, sys_mkdirat)
-__SYSCALL(324, sys_mknodat)
-__SYSCALL(325, sys_fchownat)
-__SYSCALL(326, compat_sys_futimesat)
-__SYSCALL(327, sys_fstatat64)
-__SYSCALL(328, sys_unlinkat)
-__SYSCALL(329, sys_renameat)
-__SYSCALL(330, sys_linkat)
-__SYSCALL(331, sys_symlinkat)
-__SYSCALL(332, sys_readlinkat)
-__SYSCALL(333, sys_fchmodat)
-__SYSCALL(334, sys_faccessat)
-__SYSCALL(335, compat_sys_pselect6)
-__SYSCALL(336, compat_sys_ppoll)
-__SYSCALL(337, sys_unshare)
-__SYSCALL(338, compat_sys_set_robust_list)
-__SYSCALL(339, compat_sys_get_robust_list)
-__SYSCALL(340, sys_splice)
-__SYSCALL(341, compat_sys_sync_file_range2_wrapper)
-__SYSCALL(342, sys_tee)
-__SYSCALL(343, compat_sys_vmsplice)
-__SYSCALL(344, compat_sys_move_pages)
-__SYSCALL(345, sys_getcpu)
-__SYSCALL(346, compat_sys_epoll_pwait)
-__SYSCALL(347, compat_sys_kexec_load)
-__SYSCALL(348, compat_sys_utimensat)
-__SYSCALL(349, compat_sys_signalfd)
-__SYSCALL(350, sys_timerfd_create)
-__SYSCALL(351, sys_eventfd)
-__SYSCALL(352, compat_sys_fallocate_wrapper)
-__SYSCALL(353, compat_sys_timerfd_settime)
-__SYSCALL(354, compat_sys_timerfd_gettime)
-__SYSCALL(355, compat_sys_signalfd4)
-__SYSCALL(356, sys_eventfd2)
-__SYSCALL(357, sys_epoll_create1)
-__SYSCALL(358, sys_dup3)
-__SYSCALL(359, sys_pipe2)
-__SYSCALL(360, sys_inotify_init1)
-__SYSCALL(361, compat_sys_preadv)
-__SYSCALL(362, compat_sys_pwritev)
-__SYSCALL(363, compat_sys_rt_tgsigqueueinfo)
-__SYSCALL(364, sys_perf_event_open)
-__SYSCALL(365, compat_sys_recvmmsg)
-__SYSCALL(366, sys_accept4)
-__SYSCALL(367, sys_fanotify_init)
-__SYSCALL(368, compat_sys_fanotify_mark)
-__SYSCALL(369, sys_prlimit64)
-__SYSCALL(370, sys_name_to_handle_at)
-__SYSCALL(371, compat_sys_open_by_handle_at)
-__SYSCALL(372, compat_sys_clock_adjtime)
-__SYSCALL(373, sys_syncfs)
-__SYSCALL(374, compat_sys_sendmmsg)
-__SYSCALL(375, sys_setns)
-__SYSCALL(376, compat_sys_process_vm_readv)
-__SYSCALL(377, compat_sys_process_vm_writev)
-__SYSCALL(378, sys_ni_syscall)			/* 378 for kcmp */
-
-#define __NR_compat_syscalls		379
-
-/*
- * Compat syscall numbers used by the AArch64 kernel.
- */
-#define __NR_compat_restart_syscall	0
-#define __NR_compat_sigreturn		119
-#define __NR_compat_rt_sigreturn	173
-
-
-/*
- * The following SVCs are ARM private.
- */
-#define __ARM_NR_COMPAT_BASE		0x0f0000
-#define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
-#define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
+#define __NR_restart_syscall 0
+__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
+#define __NR_exit 1
+__SYSCALL(__NR_exit, sys_exit)
+#define __NR_fork 2
+__SYSCALL(__NR_fork, sys_fork)
+#define __NR_read 3
+__SYSCALL(__NR_read, sys_read)
+#define __NR_write 4
+__SYSCALL(__NR_write, sys_write)
+#define __NR_open 5
+__SYSCALL(__NR_open, compat_sys_open)
+#define __NR_close 6
+__SYSCALL(__NR_close, sys_close)
+			/* 7 was sys_waitpid */
+__SYSCALL(7, sys_ni_syscall)
+#define __NR_creat 8
+__SYSCALL(__NR_creat, sys_creat)
+#define __NR_link 9
+__SYSCALL(__NR_link, sys_link)
+#define __NR_unlink 10
+__SYSCALL(__NR_unlink, sys_unlink)
+#define __NR_execve 11
+__SYSCALL(__NR_execve, compat_sys_execve)
+#define __NR_chdir 12
+__SYSCALL(__NR_chdir, sys_chdir)
+			/* 13 was sys_time */
+__SYSCALL(13, sys_ni_syscall)
+#define __NR_mknod 14
+__SYSCALL(__NR_mknod, sys_mknod)
+#define __NR_chmod 15
+__SYSCALL(__NR_chmod, sys_chmod)
+#define __NR_lchown 16
+__SYSCALL(__NR_lchown, sys_lchown16)
+			/* 17 was sys_break */
+__SYSCALL(17, sys_ni_syscall)
+			/* 18 was sys_stat */
+__SYSCALL(18, sys_ni_syscall)
+#define __NR_lseek 19
+__SYSCALL(__NR_lseek, compat_sys_lseek)
+#define __NR_getpid 20
+__SYSCALL(__NR_getpid, sys_getpid)
+#define __NR_mount 21
+__SYSCALL(__NR_mount, compat_sys_mount)
+			/* 22 was sys_umount */
+__SYSCALL(22, sys_ni_syscall)
+#define __NR_setuid 23
+__SYSCALL(__NR_setuid, sys_setuid16)
+#define __NR_getuid 24
+__SYSCALL(__NR_getuid, sys_getuid16)
+			/* 25 was sys_stime */
+__SYSCALL(25, sys_ni_syscall)
+#define __NR_ptrace 26
+__SYSCALL(__NR_ptrace, compat_sys_ptrace)
+			/* 27 was sys_alarm */
+__SYSCALL(27, sys_ni_syscall)
+			/* 28 was sys_fstat */
+__SYSCALL(28, sys_ni_syscall)
+#define __NR_pause 29
+__SYSCALL(__NR_pause, sys_pause)
+			/* 30 was sys_utime */
+__SYSCALL(30, sys_ni_syscall)
+			/* 31 was sys_stty */
+__SYSCALL(31, sys_ni_syscall)
+			/* 32 was sys_gtty */
+__SYSCALL(32, sys_ni_syscall)
+#define __NR_access 33
+__SYSCALL(__NR_access, sys_access)
+#define __NR_nice 34
+__SYSCALL(__NR_nice, sys_nice)
+			/* 35 was sys_ftime */
+__SYSCALL(35, sys_ni_syscall)
+#define __NR_sync 36
+__SYSCALL(__NR_sync, sys_sync)
+#define __NR_kill 37
+__SYSCALL(__NR_kill, sys_kill)
+#define __NR_rename 38
+__SYSCALL(__NR_rename, sys_rename)
+#define __NR_mkdir 39
+__SYSCALL(__NR_mkdir, sys_mkdir)
+#define __NR_rmdir 40
+__SYSCALL(__NR_rmdir, sys_rmdir)
+#define __NR_dup 41
+__SYSCALL(__NR_dup, sys_dup)
+#define __NR_pipe 42
+__SYSCALL(__NR_pipe, sys_pipe)
+#define __NR_times 43
+__SYSCALL(__NR_times, compat_sys_times)
+			/* 44 was sys_prof */
+__SYSCALL(44, sys_ni_syscall)
+#define __NR_brk 45
+__SYSCALL(__NR_brk, sys_brk)
+#define __NR_setgid 46
+__SYSCALL(__NR_setgid, sys_setgid16)
+#define __NR_getgid 47
+__SYSCALL(__NR_getgid, sys_getgid16)
+			/* 48 was sys_signal */
+__SYSCALL(48, sys_ni_syscall)
+#define __NR_geteuid 49
+__SYSCALL(__NR_geteuid, sys_geteuid16)
+#define __NR_getegid 50
+__SYSCALL(__NR_getegid, sys_getegid16)
+#define __NR_acct 51
+__SYSCALL(__NR_acct, sys_acct)
+#define __NR_umount2 52
+__SYSCALL(__NR_umount2, sys_umount)
+			/* 53 was sys_lock */
+__SYSCALL(53, sys_ni_syscall)
+#define __NR_ioctl 54
+__SYSCALL(__NR_ioctl, compat_sys_ioctl)
+#define __NR_fcntl 55
+__SYSCALL(__NR_fcntl, compat_sys_fcntl)
+			/* 56 was sys_mpx */
+__SYSCALL(56, sys_ni_syscall)
+#define __NR_setpgid 57
+__SYSCALL(__NR_setpgid, sys_setpgid)
+			/* 58 was sys_ulimit */
+__SYSCALL(58, sys_ni_syscall)
+			/* 59 was sys_olduname */
+__SYSCALL(59, sys_ni_syscall)
+#define __NR_umask 60
+__SYSCALL(__NR_umask, sys_umask)
+#define __NR_chroot 61
+__SYSCALL(__NR_chroot, sys_chroot)
+#define __NR_ustat 62
+__SYSCALL(__NR_ustat, compat_sys_ustat)
+#define __NR_dup2 63
+__SYSCALL(__NR_dup2, sys_dup2)
+#define __NR_getppid 64
+__SYSCALL(__NR_getppid, sys_getppid)
+#define __NR_getpgrp 65
+__SYSCALL(__NR_getpgrp, sys_getpgrp)
+#define __NR_setsid 66
+__SYSCALL(__NR_setsid, sys_setsid)
+#define __NR_sigaction 67
+__SYSCALL(__NR_sigaction, compat_sys_sigaction)
+			/* 68 was sys_sgetmask */
+__SYSCALL(68, sys_ni_syscall)
+			/* 69 was sys_ssetmask */
+__SYSCALL(69, sys_ni_syscall)
+#define __NR_setreuid 70
+__SYSCALL(__NR_setreuid, sys_setreuid16)
+#define __NR_setregid 71
+__SYSCALL(__NR_setregid, sys_setregid16)
+#define __NR_sigsuspend 72
+__SYSCALL(__NR_sigsuspend, sys_sigsuspend)
+#define __NR_sigpending 73
+__SYSCALL(__NR_sigpending, compat_sys_sigpending)
+#define __NR_sethostname 74
+__SYSCALL(__NR_sethostname, sys_sethostname)
+#define __NR_setrlimit 75
+__SYSCALL(__NR_setrlimit, compat_sys_setrlimit)
+			/* 76 was compat_sys_getrlimit */
+__SYSCALL(76, sys_ni_syscall)
+#define __NR_getrusage 77
+__SYSCALL(__NR_getrusage, compat_sys_getrusage)
+#define __NR_gettimeofday 78
+__SYSCALL(__NR_gettimeofday, compat_sys_gettimeofday)
+#define __NR_settimeofday 79
+__SYSCALL(__NR_settimeofday, compat_sys_settimeofday)
+#define __NR_getgroups 80
+__SYSCALL(__NR_getgroups, sys_getgroups16)
+#define __NR_setgroups 81
+__SYSCALL(__NR_setgroups, sys_setgroups16)
+			/* 82 was compat_sys_select */
+__SYSCALL(82, sys_ni_syscall)
+#define __NR_symlink 83
+__SYSCALL(__NR_symlink, sys_symlink)
+			/* 84 was sys_lstat */
+__SYSCALL(84, sys_ni_syscall)
+#define __NR_readlink 85
+__SYSCALL(__NR_readlink, sys_readlink)
+#define __NR_uselib 86
+__SYSCALL(__NR_uselib, sys_uselib)
+#define __NR_swapon 87
+__SYSCALL(__NR_swapon, sys_swapon)
+#define __NR_reboot 88
+__SYSCALL(__NR_reboot, sys_reboot)
+			/* 89 was sys_readdir */
+__SYSCALL(89, sys_ni_syscall)
+			/* 90 was sys_mmap */
+__SYSCALL(90, sys_ni_syscall)
+#define __NR_munmap 91
+__SYSCALL(__NR_munmap, sys_munmap)
+#define __NR_truncate 92
+__SYSCALL(__NR_truncate, compat_sys_truncate)
+#define __NR_ftruncate 93
+__SYSCALL(__NR_ftruncate, compat_sys_ftruncate)
+#define __NR_fchmod 94
+__SYSCALL(__NR_fchmod, sys_fchmod)
+#define __NR_fchown 95
+__SYSCALL(__NR_fchown, sys_fchown16)
+#define __NR_getpriority 96
+__SYSCALL(__NR_getpriority, sys_getpriority)
+#define __NR_setpriority 97
+__SYSCALL(__NR_setpriority, sys_setpriority)
+			/* 98 was sys_profil */
+__SYSCALL(98, sys_ni_syscall)
+#define __NR_statfs 99
+__SYSCALL(__NR_statfs, compat_sys_statfs)
+#define __NR_fstatfs 100
+__SYSCALL(__NR_fstatfs, compat_sys_fstatfs)
+			/* 101 was sys_ioperm */
+__SYSCALL(101, sys_ni_syscall)
+			/* 102 was sys_socketcall */
+__SYSCALL(102, sys_ni_syscall)
+#define __NR_syslog 103
+__SYSCALL(__NR_syslog, sys_syslog)
+#define __NR_setitimer 104
+__SYSCALL(__NR_setitimer, compat_sys_setitimer)
+#define __NR_getitimer 105
+__SYSCALL(__NR_getitimer, compat_sys_getitimer)
+#define __NR_stat 106
+__SYSCALL(__NR_stat, compat_sys_newstat)
+#define __NR_lstat 107
+__SYSCALL(__NR_lstat, compat_sys_newlstat)
+#define __NR_fstat 108
+__SYSCALL(__NR_fstat, compat_sys_newfstat)
+			/* 109 was sys_uname */
+__SYSCALL(109, sys_ni_syscall)
+			/* 110 was sys_iopl */
+__SYSCALL(110, sys_ni_syscall)
+#define __NR_vhangup 111
+__SYSCALL(__NR_vhangup, sys_vhangup)
+			/* 112 was sys_idle */
+__SYSCALL(112, sys_ni_syscall)
+			/* 113 was sys_syscall */
+__SYSCALL(113, sys_ni_syscall)
+#define __NR_wait4 114
+__SYSCALL(__NR_wait4, compat_sys_wait4)
+#define __NR_swapoff 115
+__SYSCALL(__NR_swapoff, sys_swapoff)
+#define __NR_sysinfo 116
+__SYSCALL(__NR_sysinfo, compat_sys_sysinfo)
+			/* 117 was sys_ipc */
+__SYSCALL(117, sys_ni_syscall)
+#define __NR_fsync 118
+__SYSCALL(__NR_fsync, sys_fsync)
+#define __NR_sigreturn 119
+__SYSCALL(__NR_sigreturn, compat_sys_sigreturn_wrapper)
+#define __NR_clone 120
+__SYSCALL(__NR_clone, sys_clone)
+#define __NR_setdomainname 121
+__SYSCALL(__NR_setdomainname, sys_setdomainname)
+#define __NR_uname 122
+__SYSCALL(__NR_uname, sys_newuname)
+			/* 123 was sys_modify_ldt */
+__SYSCALL(123, sys_ni_syscall)
+#define __NR_adjtimex 124
+__SYSCALL(__NR_adjtimex, compat_sys_adjtimex)
+#define __NR_mprotect 125
+__SYSCALL(__NR_mprotect, sys_mprotect)
+#define __NR_sigprocmask 126
+__SYSCALL(__NR_sigprocmask, compat_sys_sigprocmask)
+			/* 127 was sys_create_module */
+__SYSCALL(127, sys_ni_syscall)
+#define __NR_init_module 128
+__SYSCALL(__NR_init_module, sys_init_module)
+#define __NR_delete_module 129
+__SYSCALL(__NR_delete_module, sys_delete_module)
+			/* 130 was sys_get_kernel_syms */
+__SYSCALL(130, sys_ni_syscall)
+#define __NR_quotactl 131
+__SYSCALL(__NR_quotactl, sys_quotactl)
+#define __NR_getpgid 132
+__SYSCALL(__NR_getpgid, sys_getpgid)
+#define __NR_fchdir 133
+__SYSCALL(__NR_fchdir, sys_fchdir)
+#define __NR_bdflush 134
+__SYSCALL(__NR_bdflush, sys_bdflush)
+#define __NR_sysfs 135
+__SYSCALL(__NR_sysfs, sys_sysfs)
+#define __NR_personality 136
+__SYSCALL(__NR_personality, sys_personality)
+			/* 137 was sys_afs_syscall */
+__SYSCALL(137, sys_ni_syscall)
+#define __NR_setfsuid 138
+__SYSCALL(__NR_setfsuid, sys_setfsuid16)
+#define __NR_setfsgid 139
+__SYSCALL(__NR_setfsgid, sys_setfsgid16)
+#define __NR__llseek 140
+__SYSCALL(__NR__llseek, sys_llseek)
+#define __NR_getdents 141
+__SYSCALL(__NR_getdents, compat_sys_getdents)
+#define __NR__newselect 142
+__SYSCALL(__NR__newselect, compat_sys_select)
+#define __NR_flock 143
+__SYSCALL(__NR_flock, sys_flock)
+#define __NR_msync 144
+__SYSCALL(__NR_msync, sys_msync)
+#define __NR_readv 145
+__SYSCALL(__NR_readv, compat_sys_readv)
+#define __NR_writev 146
+__SYSCALL(__NR_writev, compat_sys_writev)
+#define __NR_getsid 147
+__SYSCALL(__NR_getsid, sys_getsid)
+#define __NR_fdatasync 148
+__SYSCALL(__NR_fdatasync, sys_fdatasync)
+#define __NR__sysctl 149
+__SYSCALL(__NR__sysctl, compat_sys_sysctl)
+#define __NR_mlock 150
+__SYSCALL(__NR_mlock, sys_mlock)
+#define __NR_munlock 151
+__SYSCALL(__NR_munlock, sys_munlock)
+#define __NR_mlockall 152
+__SYSCALL(__NR_mlockall, sys_mlockall)
+#define __NR_munlockall 153
+__SYSCALL(__NR_munlockall, sys_munlockall)
+#define __NR_sched_setparam 154
+__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
+#define __NR_sched_getparam 155
+__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
+#define __NR_sched_setscheduler 156
+__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
+#define __NR_sched_getscheduler 157
+__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
+#define __NR_sched_yield 158
+__SYSCALL(__NR_sched_yield, sys_sched_yield)
+#define __NR_sched_get_priority_max 159
+__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
+#define __NR_sched_get_priority_min 160
+__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+#define __NR_sched_rr_get_interval 161
+__SYSCALL(__NR_sched_rr_get_interval, compat_sys_sched_rr_get_interval)
+#define __NR_nanosleep 162
+__SYSCALL(__NR_nanosleep, compat_sys_nanosleep)
+#define __NR_mremap 163
+__SYSCALL(__NR_mremap, sys_mremap)
+#define __NR_setresuid 164
+__SYSCALL(__NR_setresuid, sys_setresuid16)
+#define __NR_getresuid 165
+__SYSCALL(__NR_getresuid, sys_getresuid16)
+			/* 166 was sys_vm86 */
+__SYSCALL(166, sys_ni_syscall)
+			/* 167 was sys_query_module */
+__SYSCALL(167, sys_ni_syscall)
+#define __NR_poll 168
+__SYSCALL(__NR_poll, sys_poll)
+#define __NR_nfsservctl 169
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
+#define __NR_setresgid 170
+__SYSCALL(__NR_setresgid, sys_setresgid16)
+#define __NR_getresgid 171
+__SYSCALL(__NR_getresgid, sys_getresgid16)
+#define __NR_prctl 172
+__SYSCALL(__NR_prctl, sys_prctl)
+#define __NR_rt_sigreturn 173
+__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn_wrapper)
+#define __NR_rt_sigaction 174
+__SYSCALL(__NR_rt_sigaction, compat_sys_rt_sigaction)
+#define __NR_rt_sigprocmask 175
+__SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask)
+#define __NR_rt_sigpending 176
+__SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending)
+#define __NR_rt_sigtimedwait 177
+__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait)
+#define __NR_rt_sigqueueinfo 178
+__SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo)
+#define __NR_rt_sigsuspend 179
+__SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend)
+#define __NR_pread64 180
+__SYSCALL(__NR_pread64, compat_sys_pread64_wrapper)
+#define __NR_pwrite64 181
+__SYSCALL(__NR_pwrite64, compat_sys_pwrite64_wrapper)
+#define __NR_chown 182
+__SYSCALL(__NR_chown, sys_chown16)
+#define __NR_getcwd 183
+__SYSCALL(__NR_getcwd, sys_getcwd)
+#define __NR_capget 184
+__SYSCALL(__NR_capget, sys_capget)
+#define __NR_capset 185
+__SYSCALL(__NR_capset, sys_capset)
+#define __NR_sigaltstack 186
+__SYSCALL(__NR_sigaltstack, compat_sys_sigaltstack)
+#define __NR_sendfile 187
+__SYSCALL(__NR_sendfile, compat_sys_sendfile)
+			/* 188 reserved */
+__SYSCALL(188, sys_ni_syscall)
+			/* 189 reserved */
+__SYSCALL(189, sys_ni_syscall)
+#define __NR_vfork 190
+__SYSCALL(__NR_vfork, sys_vfork)
+#define __NR_ugetrlimit 191	/* SuS compliant getrlimit */
+__SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit)		/* SuS compliant getrlimit */
+#define __NR_mmap2 192
+__SYSCALL(__NR_mmap2, sys_mmap_pgoff)
+#define __NR_truncate64 193
+__SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper)
+#define __NR_ftruncate64 194
+__SYSCALL(__NR_ftruncate64, compat_sys_ftruncate64_wrapper)
+#define __NR_stat64 195
+__SYSCALL(__NR_stat64, sys_stat64)
+#define __NR_lstat64 196
+__SYSCALL(__NR_lstat64, sys_lstat64)
+#define __NR_fstat64 197
+__SYSCALL(__NR_fstat64, sys_fstat64)
+#define __NR_lchown32 198
+__SYSCALL(__NR_lchown32, sys_lchown)
+#define __NR_getuid32 199
+__SYSCALL(__NR_getuid32, sys_getuid)
+#define __NR_getgid32 200
+__SYSCALL(__NR_getgid32, sys_getgid)
+#define __NR_geteuid32 201
+__SYSCALL(__NR_geteuid32, sys_geteuid)
+#define __NR_getegid32 202
+__SYSCALL(__NR_getegid32, sys_getegid)
+#define __NR_setreuid32 203
+__SYSCALL(__NR_setreuid32, sys_setreuid)
+#define __NR_setregid32 204
+__SYSCALL(__NR_setregid32, sys_setregid)
+#define __NR_getgroups32 205
+__SYSCALL(__NR_getgroups32, sys_getgroups)
+#define __NR_setgroups32 206
+__SYSCALL(__NR_setgroups32, sys_setgroups)
+#define __NR_fchown32 207
+__SYSCALL(__NR_fchown32, sys_fchown)
+#define __NR_setresuid32 208
+__SYSCALL(__NR_setresuid32, sys_setresuid)
+#define __NR_getresuid32 209
+__SYSCALL(__NR_getresuid32, sys_getresuid)
+#define __NR_setresgid32 210
+__SYSCALL(__NR_setresgid32, sys_setresgid)
+#define __NR_getresgid32 211
+__SYSCALL(__NR_getresgid32, sys_getresgid)
+#define __NR_chown32 212
+__SYSCALL(__NR_chown32, sys_chown)
+#define __NR_setuid32 213
+__SYSCALL(__NR_setuid32, sys_setuid)
+#define __NR_setgid32 214
+__SYSCALL(__NR_setgid32, sys_setgid)
+#define __NR_setfsuid32 215
+__SYSCALL(__NR_setfsuid32, sys_setfsuid)
+#define __NR_setfsgid32 216
+__SYSCALL(__NR_setfsgid32, sys_setfsgid)
+#define __NR_getdents64 217
+__SYSCALL(__NR_getdents64, compat_sys_getdents64)
+#define __NR_pivot_root 218
+__SYSCALL(__NR_pivot_root, sys_pivot_root)
+#define __NR_mincore 219
+__SYSCALL(__NR_mincore, sys_mincore)
+#define __NR_madvise 220
+__SYSCALL(__NR_madvise, sys_madvise)
+#define __NR_fcntl64 221
+__SYSCALL(__NR_fcntl64, compat_sys_fcntl64)
+			/* 222 for tux */
+__SYSCALL(222, sys_ni_syscall)
+			/* 223 is unused */
+__SYSCALL(223, sys_ni_syscall)
+#define __NR_gettid 224
+__SYSCALL(__NR_gettid, sys_gettid)
+#define __NR_readahead 225
+__SYSCALL(__NR_readahead, compat_sys_readahead_wrapper)
+#define __NR_setxattr 226
+__SYSCALL(__NR_setxattr, sys_setxattr)
+#define __NR_lsetxattr 227
+__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
+#define __NR_fsetxattr 228
+__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
+#define __NR_getxattr 229
+__SYSCALL(__NR_getxattr, sys_getxattr)
+#define __NR_lgetxattr 230
+__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
+#define __NR_fgetxattr 231
+__SYSCALL(__NR_fgetxattr, sys_fgetxattr)
+#define __NR_listxattr 232
+__SYSCALL(__NR_listxattr, sys_listxattr)
+#define __NR_llistxattr 233
+__SYSCALL(__NR_llistxattr, sys_llistxattr)
+#define __NR_flistxattr 234
+__SYSCALL(__NR_flistxattr, sys_flistxattr)
+#define __NR_removexattr 235
+__SYSCALL(__NR_removexattr, sys_removexattr)
+#define __NR_lremovexattr 236
+__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
+#define __NR_fremovexattr 237
+__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
+#define __NR_tkill 238
+__SYSCALL(__NR_tkill, sys_tkill)
+#define __NR_sendfile64 239
+__SYSCALL(__NR_sendfile64, sys_sendfile64)
+#define __NR_futex 240
+__SYSCALL(__NR_futex, compat_sys_futex)
+#define __NR_sched_setaffinity 241
+__SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity)
+#define __NR_sched_getaffinity 242
+__SYSCALL(__NR_sched_getaffinity, compat_sys_sched_getaffinity)
+#define __NR_io_setup 243
+__SYSCALL(__NR_io_setup, compat_sys_io_setup)
+#define __NR_io_destroy 244
+__SYSCALL(__NR_io_destroy, sys_io_destroy)
+#define __NR_io_getevents 245
+__SYSCALL(__NR_io_getevents, compat_sys_io_getevents)
+#define __NR_io_submit 246
+__SYSCALL(__NR_io_submit, compat_sys_io_submit)
+#define __NR_io_cancel 247
+__SYSCALL(__NR_io_cancel, sys_io_cancel)
+#define __NR_exit_group 248
+__SYSCALL(__NR_exit_group, sys_exit_group)
+#define __NR_lookup_dcookie 249
+__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie)
+#define __NR_epoll_create 250
+__SYSCALL(__NR_epoll_create, sys_epoll_create)
+#define __NR_epoll_ctl 251
+__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
+#define __NR_epoll_wait 252
+__SYSCALL(__NR_epoll_wait, sys_epoll_wait)
+#define __NR_remap_file_pages 253
+__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
+			/* 254 for set_thread_area */
+__SYSCALL(254, sys_ni_syscall)
+			/* 255 for get_thread_area */
+__SYSCALL(255, sys_ni_syscall)
+#define __NR_set_tid_address 256
+__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
+#define __NR_timer_create 257
+__SYSCALL(__NR_timer_create, compat_sys_timer_create)
+#define __NR_timer_settime 258
+__SYSCALL(__NR_timer_settime, compat_sys_timer_settime)
+#define __NR_timer_gettime 259
+__SYSCALL(__NR_timer_gettime, compat_sys_timer_gettime)
+#define __NR_timer_getoverrun 260
+__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+#define __NR_timer_delete 261
+__SYSCALL(__NR_timer_delete, sys_timer_delete)
+#define __NR_clock_settime 262
+__SYSCALL(__NR_clock_settime, compat_sys_clock_settime)
+#define __NR_clock_gettime 263
+__SYSCALL(__NR_clock_gettime, compat_sys_clock_gettime)
+#define __NR_clock_getres 264
+__SYSCALL(__NR_clock_getres, compat_sys_clock_getres)
+#define __NR_clock_nanosleep 265
+__SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep)
+#define __NR_statfs64 266
+__SYSCALL(__NR_statfs64, compat_sys_statfs64_wrapper)
+#define __NR_fstatfs64 267
+__SYSCALL(__NR_fstatfs64, compat_sys_fstatfs64_wrapper)
+#define __NR_tgkill 268
+__SYSCALL(__NR_tgkill, sys_tgkill)
+#define __NR_utimes 269
+__SYSCALL(__NR_utimes, compat_sys_utimes)
+#define __NR_arm_fadvise64_64 270
+__SYSCALL(__NR_arm_fadvise64_64, compat_sys_fadvise64_64_wrapper)
+#define __NR_pciconfig_iobase 271
+__SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase)
+#define __NR_pciconfig_read 272
+__SYSCALL(__NR_pciconfig_read, sys_pciconfig_read)
+#define __NR_pciconfig_write 273
+__SYSCALL(__NR_pciconfig_write, sys_pciconfig_write)
+#define __NR_mq_open 274
+__SYSCALL(__NR_mq_open, compat_sys_mq_open)
+#define __NR_mq_unlink 275
+__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+#define __NR_mq_timedsend 276
+__SYSCALL(__NR_mq_timedsend, compat_sys_mq_timedsend)
+#define __NR_mq_timedreceive 277
+__SYSCALL(__NR_mq_timedreceive, compat_sys_mq_timedreceive)
+#define __NR_mq_notify 278
+__SYSCALL(__NR_mq_notify, compat_sys_mq_notify)
+#define __NR_mq_getsetattr 279
+__SYSCALL(__NR_mq_getsetattr, compat_sys_mq_getsetattr)
+#define __NR_waitid 280
+__SYSCALL(__NR_waitid, compat_sys_waitid)
+#define __NR_socket 281
+__SYSCALL(__NR_socket, sys_socket)
+#define __NR_bind 282
+__SYSCALL(__NR_bind, sys_bind)
+#define __NR_connect 283
+__SYSCALL(__NR_connect, sys_connect)
+#define __NR_listen 284
+__SYSCALL(__NR_listen, sys_listen)
+#define __NR_accept 285
+__SYSCALL(__NR_accept, sys_accept)
+#define __NR_getsockname 286
+__SYSCALL(__NR_getsockname, sys_getsockname)
+#define __NR_getpeername 287
+__SYSCALL(__NR_getpeername, sys_getpeername)
+#define __NR_socketpair 288
+__SYSCALL(__NR_socketpair, sys_socketpair)
+#define __NR_send 289
+__SYSCALL(__NR_send, sys_send)
+#define __NR_sendto 290
+__SYSCALL(__NR_sendto, sys_sendto)
+#define __NR_recv 291
+__SYSCALL(__NR_recv, compat_sys_recv)
+#define __NR_recvfrom 292
+__SYSCALL(__NR_recvfrom, compat_sys_recvfrom)
+#define __NR_shutdown 293
+__SYSCALL(__NR_shutdown, sys_shutdown)
+#define __NR_setsockopt 294
+__SYSCALL(__NR_setsockopt, compat_sys_setsockopt)
+#define __NR_getsockopt 295
+__SYSCALL(__NR_getsockopt, compat_sys_getsockopt)
+#define __NR_sendmsg 296
+__SYSCALL(__NR_sendmsg, compat_sys_sendmsg)
+#define __NR_recvmsg 297
+__SYSCALL(__NR_recvmsg, compat_sys_recvmsg)
+#define __NR_semop 298
+__SYSCALL(__NR_semop, sys_semop)
+#define __NR_semget 299
+__SYSCALL(__NR_semget, sys_semget)
+#define __NR_semctl 300
+__SYSCALL(__NR_semctl, compat_sys_semctl)
+#define __NR_msgsnd 301
+__SYSCALL(__NR_msgsnd, compat_sys_msgsnd)
+#define __NR_msgrcv 302
+__SYSCALL(__NR_msgrcv, compat_sys_msgrcv)
+#define __NR_msgget 303
+__SYSCALL(__NR_msgget, sys_msgget)
+#define __NR_msgctl 304
+__SYSCALL(__NR_msgctl, compat_sys_msgctl)
+#define __NR_shmat 305
+__SYSCALL(__NR_shmat, compat_sys_shmat)
+#define __NR_shmdt 306
+__SYSCALL(__NR_shmdt, sys_shmdt)
+#define __NR_shmget 307
+__SYSCALL(__NR_shmget, sys_shmget)
+#define __NR_shmctl 308
+__SYSCALL(__NR_shmctl, compat_sys_shmctl)
+#define __NR_add_key 309
+__SYSCALL(__NR_add_key, sys_add_key)
+#define __NR_request_key 310
+__SYSCALL(__NR_request_key, sys_request_key)
+#define __NR_keyctl 311
+__SYSCALL(__NR_keyctl, compat_sys_keyctl)
+#define __NR_semtimedop 312
+__SYSCALL(__NR_semtimedop, compat_sys_semtimedop)
+#define __NR_vserver 313
+__SYSCALL(__NR_vserver, sys_ni_syscall)
+#define __NR_ioprio_set 314
+__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
+#define __NR_ioprio_get 315
+__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
+#define __NR_inotify_init 316
+__SYSCALL(__NR_inotify_init, sys_inotify_init)
+#define __NR_inotify_add_watch 317
+__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
+#define __NR_inotify_rm_watch 318
+__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
+#define __NR_mbind 319
+__SYSCALL(__NR_mbind, compat_sys_mbind)
+#define __NR_get_mempolicy 320
+__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy)
+#define __NR_set_mempolicy 321
+__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy)
+#define __NR_openat 322
+__SYSCALL(__NR_openat, compat_sys_openat)
+#define __NR_mkdirat 323
+__SYSCALL(__NR_mkdirat, sys_mkdirat)
+#define __NR_mknodat 324
+__SYSCALL(__NR_mknodat, sys_mknodat)
+#define __NR_fchownat 325
+__SYSCALL(__NR_fchownat, sys_fchownat)
+#define __NR_futimesat 326
+__SYSCALL(__NR_futimesat, compat_sys_futimesat)
+#define __NR_fstatat64 327
+__SYSCALL(__NR_fstatat64, sys_fstatat64)
+#define __NR_unlinkat 328
+__SYSCALL(__NR_unlinkat, sys_unlinkat)
+#define __NR_renameat 329
+__SYSCALL(__NR_renameat, sys_renameat)
+#define __NR_linkat 330
+__SYSCALL(__NR_linkat, sys_linkat)
+#define __NR_symlinkat 331
+__SYSCALL(__NR_symlinkat, sys_symlinkat)
+#define __NR_readlinkat 332
+__SYSCALL(__NR_readlinkat, sys_readlinkat)
+#define __NR_fchmodat 333
+__SYSCALL(__NR_fchmodat, sys_fchmodat)
+#define __NR_faccessat 334
+__SYSCALL(__NR_faccessat, sys_faccessat)
+#define __NR_pselect6 335
+__SYSCALL(__NR_pselect6, compat_sys_pselect6)
+#define __NR_ppoll 336
+__SYSCALL(__NR_ppoll, compat_sys_ppoll)
+#define __NR_unshare 337
+__SYSCALL(__NR_unshare, sys_unshare)
+#define __NR_set_robust_list 338
+__SYSCALL(__NR_set_robust_list, compat_sys_set_robust_list)
+#define __NR_get_robust_list 339
+__SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list)
+#define __NR_splice 340
+__SYSCALL(__NR_splice, sys_splice)
+#define __NR_sync_file_range2 341
+__SYSCALL(__NR_sync_file_range2, compat_sys_sync_file_range2_wrapper)
+#define __NR_tee 342
+__SYSCALL(__NR_tee, sys_tee)
+#define __NR_vmsplice 343
+__SYSCALL(__NR_vmsplice, compat_sys_vmsplice)
+#define __NR_move_pages 344
+__SYSCALL(__NR_move_pages, compat_sys_move_pages)
+#define __NR_getcpu 345
+__SYSCALL(__NR_getcpu, sys_getcpu)
+#define __NR_epoll_pwait 346
+__SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait)
+#define __NR_kexec_load 347
+__SYSCALL(__NR_kexec_load, compat_sys_kexec_load)
+#define __NR_utimensat 348
+__SYSCALL(__NR_utimensat, compat_sys_utimensat)
+#define __NR_signalfd 349
+__SYSCALL(__NR_signalfd, compat_sys_signalfd)
+#define __NR_timerfd_create 350
+__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+#define __NR_eventfd 351
+__SYSCALL(__NR_eventfd, sys_eventfd)
+#define __NR_fallocate 352
+__SYSCALL(__NR_fallocate, compat_sys_fallocate_wrapper)
+#define __NR_timerfd_settime 353
+__SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime)
+#define __NR_timerfd_gettime 354
+__SYSCALL(__NR_timerfd_gettime, compat_sys_timerfd_gettime)
+#define __NR_signalfd4 355
+__SYSCALL(__NR_signalfd4, compat_sys_signalfd4)
+#define __NR_eventfd2 356
+__SYSCALL(__NR_eventfd2, sys_eventfd2)
+#define __NR_epoll_create1 357
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
+#define __NR_dup3 358
+__SYSCALL(__NR_dup3, sys_dup3)
+#define __NR_pipe2 359
+__SYSCALL(__NR_pipe2, sys_pipe2)
+#define __NR_inotify_init1 360
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_preadv 361
+__SYSCALL(__NR_preadv, compat_sys_preadv)
+#define __NR_pwritev 362
+__SYSCALL(__NR_pwritev, compat_sys_pwritev)
+#define __NR_rt_tgsigqueueinfo 363
+__SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo)
+#define __NR_perf_event_open 364
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_recvmmsg 365
+__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg)
+#define __NR_accept4 366
+__SYSCALL(__NR_accept4, sys_accept4)
+#define __NR_fanotify_init 367
+__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
+#define __NR_fanotify_mark 368
+__SYSCALL(__NR_fanotify_mark, compat_sys_fanotify_mark)
+#define __NR_prlimit64 369
+__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_name_to_handle_at 370
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at 371
+__SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at)
+#define __NR_clock_adjtime 372
+__SYSCALL(__NR_clock_adjtime, compat_sys_clock_adjtime)
+#define __NR_syncfs 373
+__SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_sendmmsg 374
+__SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg)
+#define __NR_setns 375
+__SYSCALL(__NR_setns, sys_setns)
+#define __NR_process_vm_readv 376
+__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv)
+#define __NR_process_vm_writev 377
+__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev)
+#define __NR_kcmp 378
+__SYSCALL(__NR_kcmp, sys_kcmp)
+#define __NR_finit_module 379
+__SYSCALL(__NR_finit_module, sys_finit_module)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 5b368d51369e..3c93e1a9ea8d 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -27,7 +27,6 @@
 #include <asm/esr.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
-#include <asm/unistd32.h>
 
 /*
  * Bad Abort numbers
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 8b69ecb1d8bc..2f5b3ff7e30d 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -27,6 +27,9 @@
  *
  * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
  */
+
+#include <asm/unistd.h>
+
 	.align	5
 	.globl	__kuser_helper_start
 __kuser_helper_start:
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 31fbeaf4dd62..b4692837e7ab 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -26,7 +26,7 @@
 #include <asm/fpsimd.h>
 #include <asm/signal32.h>
 #include <asm/uaccess.h>
-#include <asm/unistd32.h>
+#include <asm/unistd.h>
 
 struct compat_sigcontext {
 	/* We always set these two fields to 0 */
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 26e9c4eeaba8..de2b0226e06d 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -26,7 +26,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/cacheflush.h>
-#include <asm/unistd32.h>
+#include <asm/unistd.h>
 
 static inline void
 do_compat_cache_op(unsigned long start, unsigned long end, int flags)

From 4f2f36a138e56efea5028970ee940ff1e2215817 Mon Sep 17 00:00:00 2001
From: Dan Aloni <alonid@stratoscale.com>
Date: Wed, 28 Aug 2013 14:24:53 +0100
Subject: [PATCH 0391/1185] Move the EM_ARM and EM_AARCH64 definitions to
 uapi/linux/elf-em.h

Signed-off-by: Dan Aloni <alonid@stratoscale.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/elf.h   | 2 --
 arch/arm64/include/asm/elf.h | 3 ---
 include/uapi/linux/elf-em.h  | 2 ++
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index 38050b1c4800..e110a672e160 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -19,8 +19,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 typedef struct user_fp elf_fpregset_t;
 
-#define EM_ARM	40
-
 #define EF_ARM_EABI_MASK	0xff000000
 #define EF_ARM_EABI_UNKNOWN	0x00000000
 #define EF_ARM_EABI_VER1	0x01000000
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index fe32c0e4ac01..e7fa87f9201b 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -33,8 +33,6 @@ typedef unsigned long elf_greg_t;
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 typedef struct user_fpsimd_state elf_fpregset_t;
 
-#define EM_AARCH64		183
-
 /*
  * AArch64 static relocation types.
  */
@@ -151,7 +149,6 @@ extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
 #ifdef CONFIG_COMPAT
-#define EM_ARM				40
 #define COMPAT_ELF_PLATFORM		("v8l")
 
 #define COMPAT_ELF_ET_DYN_BASE		(randomize_et_dyn(2 * TASK_SIZE_32 / 3))
diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h
index 8e2b7bac4378..59c17a2d38ad 100644
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@@ -22,6 +22,7 @@
 #define EM_PPC		20	/* PowerPC */
 #define EM_PPC64	21	 /* PowerPC64 */
 #define EM_SPU		23	/* Cell BE SPU */
+#define EM_ARM		40	/* ARM 32 bit */
 #define EM_SH		42	/* SuperH */
 #define EM_SPARCV9	43	/* SPARC v9 64-bit */
 #define EM_IA_64	50	/* HP/Intel IA-64 */
@@ -34,6 +35,7 @@
 #define EM_MN10300	89	/* Panasonic/MEI MN10300, AM33 */
 #define EM_BLACKFIN     106     /* ADI Blackfin Processor */
 #define EM_TI_C6000	140	/* TI C6X DSPs */
+#define EM_AARCH64	183	/* ARM 64 bit */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
 #define EM_AVR32	0x18ad	/* Atmel AVR32 */
 

From bf11863d45eb3dac0d0cf1f818ded11ade6e28d3 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Fri, 4 Jul 2014 08:28:30 +0100
Subject: [PATCH 0392/1185] arm64: Add audit support

On AArch64, audit is supported through generic lib/audit.c and
compat_audit.c, and so this patch adds arch specific definitions required.

Acked-by Will Deacon <will.deacon@arm.com>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

Conflicts:
	arch/arm64/Kconfig
	include/uapi/linux/audit.h

Change-Id: Ia6d7b25786843d43191e67d514928e3ecba11e2f
---
 arch/arm64/Kconfig               |  2 ++
 arch/arm64/include/asm/syscall.h | 14 ++++++++++++++
 include/uapi/linux/audit.h       |  2 ++
 3 files changed, 18 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 71c2a070ace4..2221396c2a6c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -7,6 +7,7 @@ config ARM64
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
+	select AUDIT_ARCH_COMPAT_GENERIC
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select GENERIC_CLOCKEVENTS
@@ -18,6 +19,7 @@ config ARM64
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
+	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_DEBUG_KMEMLEAK
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 89c047f9a971..d7de9333f714 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -16,6 +16,8 @@
 #ifndef __ASM_SYSCALL_H
 #define __ASM_SYSCALL_H
 
+#include <uapi/linux/audit.h>
+#include <linux/compat.h>
 #include <linux/err.h>
 
 
@@ -98,4 +100,16 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->regs[i], args, n * sizeof(args[0]));
 }
 
+/*
+ * We don't care about endianness (__AUDIT_ARCH_LE bit) here because
+ * AArch64 has the same system calls both on little- and big- endian.
+ */
+static inline int syscall_get_arch(void)
+{
+	if (is_compat_task())
+		return AUDIT_ARCH_ARM;
+
+	return AUDIT_ARCH_AARCH64;
+}
+
 #endif	/* __ASM_SYSCALL_H */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 75cef3fd97ad..ce8750f8788a 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -324,6 +324,8 @@ enum {
 /* distinguish syscall tables */
 #define __AUDIT_ARCH_64BIT 0x80000000
 #define __AUDIT_ARCH_LE	   0x40000000
+
+#define AUDIT_ARCH_AARCH64	(EM_AARCH64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_ALPHA	(EM_ALPHA|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_ARM		(EM_ARM|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_ARMEB	(EM_ARM)

From 3e21c0bb663a23436e0eb3f61860d4fedc233bab Mon Sep 17 00:00:00 2001
From: JP Abgrall <jpa@google.com>
Date: Wed, 17 Sep 2014 18:18:11 -0700
Subject: [PATCH 0393/1185] arm64: audit: Add audit hook in
 syscall_trace_enter/exit()

This patch adds auditing functions on entry to or exit from
every system call invocation.

Acked-by: Richard Guy Briggs <rgb@redhat.com>
Acked-by Will Deacon <will.deacon@arm.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

Conflicts:
	arch/arm64/kernel/ptrace.c

Change-Id: I7ebff5df4acbdab56c74e584dbc5fef5d8bfc9a8
---
 arch/arm64/kernel/ptrace.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index f3b14fd27ab4..ee856d9f6f64 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -19,6 +19,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/audit.h>
+#include <linux/compat.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
@@ -38,6 +40,7 @@
 #include <asm/compat.h>
 #include <asm/debug-monitors.h>
 #include <asm/pgtable.h>
+#include <asm/syscall.h>
 #include <asm/traps.h>
 #include <asm/system_misc.h>
 
@@ -1096,11 +1099,16 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
 
+	audit_syscall_entry(syscall_get_arch(), regs->syscallno,
+		regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]);
+
 	return regs->syscallno;
 }
 
 asmlinkage void syscall_trace_exit(struct pt_regs *regs)
 {
+	audit_syscall_exit(regs);
+
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
 }

From 4b3735631fcc1056af00e382833727342f12c259 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Wed, 30 Apr 2014 10:51:31 +0100
Subject: [PATCH 0394/1185] arm64: Add regs_return_value() in syscall.h

This macro, regs_return_value, is used mainly for audit to record system
call's results, but may also be used in test_kprobes.c.

Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/ptrace.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index fff28950e660..dbe3b00d1eb7 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -133,6 +133,11 @@ struct pt_regs {
 #define user_stack_pointer(regs) \
 	(!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp)
 
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
+
 /*
  * Are the current registers suitable for user mode? (used to maintain
  * security in signal handlers)

From ba27127b4730d9352bfda24553d1994cddd9edb3 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Wed, 30 Apr 2014 10:51:32 +0100
Subject: [PATCH 0395/1185] arm64: is_compat_task is defined both in
 asm/compat.h and linux/compat.h

Some kernel files may include both linux/compat.h and asm/compat.h directly
or indirectly. Since both header files contain is_compat_task() under
!CONFIG_COMPAT, compiling them with !CONFIG_COMPAT will eventually fail.
Such files include kernel/auditsc.c, kernel/seccomp.c and init/do_mountfs.c
(do_mountfs.c may read asm/compat.h via asm/ftrace.h once ftrace is
implemented).

So this patch proactively
1) removes is_compat_task() under !CONFIG_COMPAT from asm/compat.h
2) replaces asm/compat.h to linux/compat.h in kernel/*.c,
   but asm/compat.h is still necessary in ptrace.c and process.c because
   they use is_compat_thread().

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

Conflicts:
	arch/arm64/kernel/hw_breakpoint.c
	arch/arm64/kernel/ptrace.c

Change-Id: I5b8330e43ab8bdd383cd410d8223d6c1a39fa0fc
---
 arch/arm64/include/asm/compat.h   | 5 -----
 arch/arm64/kernel/hw_breakpoint.c | 2 +-
 arch/arm64/kernel/process.c       | 1 +
 arch/arm64/kernel/signal.c        | 2 +-
 4 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 7058eec269ab..ae0004fe6c23 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -291,11 +291,6 @@ static inline int is_compat_thread(struct thread_info *thread)
 
 #else /* !CONFIG_COMPAT */
 
-static inline int is_compat_task(void)
-{
-	return 0;
-}
-
 static inline int is_compat_thread(struct thread_info *thread)
 {
 	return 0;
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 5ab825c59db9..68e371ea1bda 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -20,13 +20,13 @@
 
 #define pr_fmt(fmt) "hw-breakpoint: " fmt
 
+#include <linux/compat.h>
 #include <linux/errno.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/smp.h>
 
-#include <asm/compat.h>
 #include <asm/current.h>
 #include <asm/debug-monitors.h>
 #include <asm/hw_breakpoint.h>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 7c11c74f7f54..977ca8aa7dd1 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -20,6 +20,7 @@
 
 #include <stdarg.h>
 
+#include <linux/compat.h>
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 882f01774365..182b6fc01190 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/compat.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/personality.h>
@@ -25,7 +26,6 @@
 #include <linux/tracehook.h>
 #include <linux/ratelimit.h>
 
-#include <asm/compat.h>
 #include <asm/debug-monitors.h>
 #include <asm/elf.h>
 #include <asm/cacheflush.h>

From f884d7fae12bd203a2243883e320a0c59168deb3 Mon Sep 17 00:00:00 2001
From: JP Abgrall <jpa@google.com>
Date: Wed, 17 Sep 2014 19:26:43 -0700
Subject: [PATCH 0396/1185] arm64: Fixup __NR_* compat syscalls count.

Should have gone in the cherry-pick
  cfc7e99e9e3900056028a7d90072e9ea0d886f8d
   arm64: Add __NR_* definitions for compat syscalls

Change-Id: I69a69e4b1f206aad4ece1a8b06f9e23e99adcbfb
---
 arch/arm64/include/asm/unistd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 38f8799a3d31..a8f8f6992987 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -40,7 +40,7 @@
 #define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
 
-#define __NR_compat_syscalls		383
+#define __NR_compat_syscalls		378
 #endif
 
 #define __ARCH_WANT_SYS_CLONE

From c5b2a660db70c8e591e8e2e3f7024073177b7124 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Wed, 30 Apr 2014 10:51:31 +0100
Subject: [PATCH 0397/1185] arm64: Add regs_return_value() in syscall.h

This macro, regs_return_value, is used mainly for audit to record system
call's results, but may also be used in test_kprobes.c.

Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit d34a3ebd8d25cf691a94fae66a957a480cf46430)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/ptrace.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 41a71ee4c3df..f38a8aa58d4c 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -133,6 +133,11 @@ struct pt_regs {
 #define user_stack_pointer(regs) \
 	((regs)->sp)
 
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
+
 /*
  * Are the current registers suitable for user mode? (used to maintain
  * security in signal handlers)

From 03c6fb711a50ed882fdafce6acf5dd92272fd0b7 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andrey.konovalov@linaro.org>
Date: Wed, 24 Sep 2014 01:08:14 +0530
Subject: [PATCH 0398/1185] export tcp_nuke_addr to build ipv6 as module

Otherwise we run into following build error:
-----8<-----
  ...
  CC [M]  net/ipv6/netfilter/ip6t_REJECT.o
  CC [M]  net/xfrm/xfrm_ipcomp.o
  Building modules, stage 2.
  MODPOST 36 modules
ERROR: "tcp_nuke_addr" [net/ipv6/ipv6.ko] undefined!
make[1]: *** [__modpost] Error 1
make: *** [modules] Error 2
-----8<-----

Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
---
 net/ipv4/tcp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7921d1abc449..089a948fe834 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3565,3 +3565,4 @@ int tcp_nuke_addr(struct net *net, struct sockaddr *addr)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(tcp_nuke_addr);

From 555f3ed9473f5e71966842b557772dd5e0e5a364 Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <Badhri@google.com>
Date: Wed, 24 Sep 2014 18:58:23 -0700
Subject: [PATCH 0399/1185] usb: u_ether: Add workqueue as bottom half handler
 for rx data path

u_ether driver passes rx data to network layer and resubmits the
request back to usb hardware in interrupt context. Network layer
processes rx data by scheduling tasklet. For high throughput
scenarios on rx data path driver is spending lot of time in interrupt
context due to rx data processing by tasklet and continuous completion
and re-submission of the usb requests which results in watchdog bark.
Hence move the rx data processing and usb request submission to a
workqueue bottom half handler.

Change-Id: I316de8e267997137ac189a8b7b2846fa325f4a5a
Signed-off-by: Badhri Jagan Sridharan <Badhri@google.com>
---
 drivers/usb/gadget/u_ether.c | 118 ++++++++++++++++++++++++-----------
 1 file changed, 81 insertions(+), 37 deletions(-)

diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 801e326e54c0..734d64b8e2c1 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -48,6 +48,8 @@
 
 #define UETH__VERSION	"29-May-2008"
 
+static struct workqueue_struct	*uether_wq;
+
 struct eth_dev {
 	/* lock is held while accessing port_usb
 	 */
@@ -71,6 +73,7 @@ struct eth_dev {
 						struct sk_buff_head *list);
 
 	struct work_struct	work;
+	struct work_struct	rx_work;
 
 	unsigned long		todo;
 #define	WORK_RX_MEMORY		0
@@ -259,18 +262,16 @@ rx_submit(struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags)
 		DBG(dev, "rx submit --> %d\n", retval);
 		if (skb)
 			dev_kfree_skb_any(skb);
-		spin_lock_irqsave(&dev->req_lock, flags);
-		list_add(&req->list, &dev->rx_reqs);
-		spin_unlock_irqrestore(&dev->req_lock, flags);
 	}
 	return retval;
 }
 
 static void rx_complete(struct usb_ep *ep, struct usb_request *req)
 {
-	struct sk_buff	*skb = req->context, *skb2;
+	struct sk_buff	*skb = req->context;
 	struct eth_dev	*dev = ep->driver_data;
 	int		status = req->status;
+	bool		queue = 0;
 
 	switch (status) {
 
@@ -294,30 +295,9 @@ static void rx_complete(struct usb_ep *ep, struct usb_request *req)
 		} else {
 			skb_queue_tail(&dev->rx_frames, skb);
 		}
-		skb = NULL;
 
-		skb2 = skb_dequeue(&dev->rx_frames);
-		while (skb2) {
-			if (status < 0
-					|| ETH_HLEN > skb2->len
-					|| skb2->len > VLAN_ETH_FRAME_LEN) {
-				dev->net->stats.rx_errors++;
-				dev->net->stats.rx_length_errors++;
-				DBG(dev, "rx length %d\n", skb2->len);
-				dev_kfree_skb_any(skb2);
-				goto next_frame;
-			}
-			skb2->protocol = eth_type_trans(skb2, dev->net);
-			dev->net->stats.rx_packets++;
-			dev->net->stats.rx_bytes += skb2->len;
-
-			/* no buffer copies needed, unless hardware can't
-			 * use skb buffers.
-			 */
-			status = netif_rx(skb2);
-next_frame:
-			skb2 = skb_dequeue(&dev->rx_frames);
-		}
+		if (!status)
+			queue = 1;
 		break;
 
 	/* software-driven interface shutdown */
@@ -340,22 +320,20 @@ static void rx_complete(struct usb_ep *ep, struct usb_request *req)
 		/* FALLTHROUGH */
 
 	default:
+		queue = 1;
+		dev_kfree_skb_any(skb);
 		dev->net->stats.rx_errors++;
 		DBG(dev, "rx status %d\n", status);
 		break;
 	}
 
-	if (skb)
-		dev_kfree_skb_any(skb);
-	if (!netif_running(dev->net)) {
 clean:
-		spin_lock(&dev->req_lock);
-		list_add(&req->list, &dev->rx_reqs);
-		spin_unlock(&dev->req_lock);
-		req = NULL;
-	}
-	if (req)
-		rx_submit(dev, req, GFP_ATOMIC);
+	spin_lock(&dev->req_lock);
+	list_add(&req->list, &dev->rx_reqs);
+	spin_unlock(&dev->req_lock);
+
+	if (queue)
+		queue_work(uether_wq, &dev->rx_work);
 }
 
 static int prealloc(struct list_head *list, struct usb_ep *ep, unsigned n)
@@ -420,16 +398,24 @@ static void rx_fill(struct eth_dev *dev, gfp_t gfp_flags)
 {
 	struct usb_request	*req;
 	unsigned long		flags;
+	int			req_cnt = 0;
 
 	/* fill unused rxq slots with some skb */
 	spin_lock_irqsave(&dev->req_lock, flags);
 	while (!list_empty(&dev->rx_reqs)) {
+		/* break the nexus of continuous completion and re-submission*/
+		if (++req_cnt > qlen(dev->gadget))
+			break;
+
 		req = container_of(dev->rx_reqs.next,
 				struct usb_request, list);
 		list_del_init(&req->list);
 		spin_unlock_irqrestore(&dev->req_lock, flags);
 
 		if (rx_submit(dev, req, gfp_flags) < 0) {
+			spin_lock_irqsave(&dev->req_lock, flags);
+			list_add(&req->list, &dev->rx_reqs);
+			spin_unlock_irqrestore(&dev->req_lock, flags);
 			defer_kevent(dev, WORK_RX_MEMORY);
 			return;
 		}
@@ -439,6 +425,36 @@ static void rx_fill(struct eth_dev *dev, gfp_t gfp_flags)
 	spin_unlock_irqrestore(&dev->req_lock, flags);
 }
 
+static void process_rx_w(struct work_struct *work)
+{
+	struct eth_dev	*dev = container_of(work, struct eth_dev, rx_work);
+	struct sk_buff	*skb;
+	int		status = 0;
+
+	if (!dev->port_usb)
+		return;
+
+	while ((skb = skb_dequeue(&dev->rx_frames))) {
+		if (status < 0
+				|| ETH_HLEN > skb->len
+				|| skb->len > ETH_FRAME_LEN) {
+			dev->net->stats.rx_errors++;
+			dev->net->stats.rx_length_errors++;
+			DBG(dev, "rx length %d\n", skb->len);
+			dev_kfree_skb_any(skb);
+			continue;
+		}
+		skb->protocol = eth_type_trans(skb, dev->net);
+		dev->net->stats.rx_packets++;
+		dev->net->stats.rx_bytes += skb->len;
+
+		status = netif_rx_ni(skb);
+	}
+
+	if (netif_running(dev->net))
+		rx_fill(dev, GFP_KERNEL);
+}
+
 static void eth_work(struct work_struct *work)
 {
 	struct eth_dev	*dev = container_of(work, struct eth_dev, work);
@@ -775,6 +791,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, u8 ethaddr[ETH_ALEN],
 	spin_lock_init(&dev->lock);
 	spin_lock_init(&dev->req_lock);
 	INIT_WORK(&dev->work, eth_work);
+	INIT_WORK(&dev->rx_work, process_rx_w);
 	INIT_LIST_HEAD(&dev->tx_reqs);
 	INIT_LIST_HEAD(&dev->rx_reqs);
 
@@ -933,6 +950,7 @@ void gether_disconnect(struct gether *link)
 {
 	struct eth_dev		*dev = link->ioport;
 	struct usb_request	*req;
+	struct sk_buff		*skb;
 
 	WARN_ON(!dev);
 	if (!dev)
@@ -974,6 +992,12 @@ void gether_disconnect(struct gether *link)
 		spin_lock(&dev->req_lock);
 	}
 	spin_unlock(&dev->req_lock);
+
+	spin_lock(&dev->rx_frames.lock);
+	while ((skb = __skb_dequeue(&dev->rx_frames)))
+		dev_kfree_skb_any(skb);
+	spin_unlock(&dev->rx_frames.lock);
+
 	link->out_ep->driver_data = NULL;
 	link->out_ep->desc = NULL;
 
@@ -986,3 +1010,23 @@ void gether_disconnect(struct gether *link)
 	dev->port_usb = NULL;
 	spin_unlock(&dev->lock);
 }
+
+static int __init gether_init(void)
+{
+	uether_wq  = create_singlethread_workqueue("uether");
+	if (!uether_wq) {
+		pr_err("%s: Unable to create workqueue: uether\n", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+module_init(gether_init);
+
+static void __exit gether_exit(void)
+{
+	destroy_workqueue(uether_wq);
+
+}
+module_exit(gether_exit);
+MODULE_DESCRIPTION("ethernet over USB driver");
+MODULE_LICENSE("GPL v2");

From 9c861db045e11a41ab092f82ec4bf69212dd4694 Mon Sep 17 00:00:00 2001
From: "taeju.park" <taeju.park@lge.com>
Date: Fri, 14 Sep 2012 14:09:03 +0900
Subject: [PATCH 0400/1185] usb: gadget: prevent change of Host MAC address of
 'usb0' interface

On windows 7 platform, previously allocated ip address is maintained.
However, Host MAC address of 'usb0' interface is changed when the
tethering driver re-enumerated. Thus, the tethering network driver
can't be allocated ip address from dhcp. It causes connection delay
between host and phone for usb tethering.

This patch prevents from changing Host MAC address of 'usb0' interface.
In other words, this patch maintains the Host MAC address allocated when
first tethering driver although the driver is re-enumerated. However,
after reboot, the Host MAC address can be changed.

Change-Id: I43add9925e9d6d90c56cffbd3ed999104448f818
Signed-off-by: Badhri Jagan Sridharan <Badhri@google.com>
---
 drivers/usb/gadget/u_ether.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 734d64b8e2c1..6a5065966494 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -718,6 +718,8 @@ static int eth_stop(struct net_device *net)
 
 /*-------------------------------------------------------------------------*/
 
+static u8 host_ethaddr[ETH_ALEN];
+
 /* initial value, changed by "ifconfig usb0 hw ether xx:xx:xx:xx:xx:xx" */
 static char *dev_addr;
 module_param(dev_addr, charp, S_IRUGO);
@@ -749,6 +751,17 @@ static int get_ether_addr(const char *str, u8 *dev_addr)
 	return 1;
 }
 
+static int get_host_ether_addr(u8 *str, u8 *dev_addr)
+{
+	memcpy(dev_addr, str, ETH_ALEN);
+	if (is_valid_ether_addr(dev_addr))
+		return 0;
+
+	random_ether_addr(dev_addr);
+	memcpy(str, dev_addr, ETH_ALEN);
+	return 1;
+}
+
 static const struct net_device_ops eth_netdev_ops = {
 	.ndo_open		= eth_open,
 	.ndo_stop		= eth_stop,
@@ -804,9 +817,11 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, u8 ethaddr[ETH_ALEN],
 	if (get_ether_addr(dev_addr, net->dev_addr))
 		dev_warn(&g->dev,
 			"using random %s ethernet address\n", "self");
-	if (get_ether_addr(host_addr, dev->host_mac))
-		dev_warn(&g->dev,
-			"using random %s ethernet address\n", "host");
+
+	if (get_host_ether_addr(host_ethaddr, dev->host_mac))
+		dev_warn(&g->dev, "using random %s ethernet address\n", "host");
+	else
+		dev_warn(&g->dev, "using previous %s ethernet address\n", "host");
 
 	if (ethaddr)
 		memcpy(ethaddr, dev->host_mac, ETH_ALEN);

From 91da92a881344b1b04077f18ed7e9cc358cd0430 Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <Badhri@google.com>
Date: Thu, 18 Sep 2014 10:42:41 -0700
Subject: [PATCH 0401/1185] USB: gadget: u_ether: Fix data stall issue in RNDIS
 tethering mode

For dual speed gadget, with current no. of request(10), there is
possibility of corner case occurence where all 10 reuqests are queued
to HW without setting IOC bit, which could lead to data stall in
RNDIS tethering and RNDIS local networking.

With this patch, counter will be incremented before queueing request to
HW and sets IOC bit for every nth request due to which the corner case
of all requests queued to HW without IOC bit set will be avoided.

Change-Id: I26515bfd9bbc8f7af38be7835692143f7093118a
Signed-off-by: Vijayavardhan Vennapusa <vvreddy@codeaurora.org>
---
 drivers/usb/gadget/u_ether.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 6a5065966494..63b590de24d6 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -61,7 +61,7 @@ struct eth_dev {
 
 	spinlock_t		req_lock;	/* guard {rx,tx}_reqs */
 	struct list_head	tx_reqs, rx_reqs;
-	atomic_t		tx_qlen;
+	unsigned		tx_qlen;
 
 	struct sk_buff_head	rx_frames;
 
@@ -491,7 +491,6 @@ static void tx_complete(struct usb_ep *ep, struct usb_request *req)
 	spin_unlock(&dev->req_lock);
 	dev_kfree_skb_any(skb);
 
-	atomic_dec(&dev->tx_qlen);
 	if (netif_carrier_ok(dev->net))
 		netif_wake_queue(dev->net);
 }
@@ -605,12 +604,19 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 
 	req->length = length;
 
-	/* throttle high/super speed IRQ rate back slightly */
-	if (gadget_is_dualspeed(dev->gadget))
-		req->no_interrupt = (dev->gadget->speed == USB_SPEED_HIGH ||
-				     dev->gadget->speed == USB_SPEED_SUPER)
-			? ((atomic_read(&dev->tx_qlen) % qmult) != 0)
-			: 0;
+	/* throttle highspeed IRQ rate back slightly */
+	if (gadget_is_dualspeed(dev->gadget) &&
+			 (dev->gadget->speed == USB_SPEED_HIGH)) {
+		dev->tx_qlen++;
+		if (dev->tx_qlen == qmult) {
+			req->no_interrupt = 0;
+			dev->tx_qlen = 0;
+		} else {
+			req->no_interrupt = 1;
+		}
+	} else {
+		req->no_interrupt = 0;
+	}
 
 	retval = usb_ep_queue(in, req, GFP_ATOMIC);
 	switch (retval) {
@@ -619,7 +625,6 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 		break;
 	case 0:
 		net->trans_start = jiffies;
-		atomic_inc(&dev->tx_qlen);
 	}
 
 	if (retval) {
@@ -645,7 +650,7 @@ static void eth_start(struct eth_dev *dev, gfp_t gfp_flags)
 	rx_fill(dev, gfp_flags);
 
 	/* and open the tx floodgates */
-	atomic_set(&dev->tx_qlen, 0);
+	dev->tx_qlen = 0;
 	netif_wake_queue(dev->net);
 }
 

From f9039de766187be99eb2e6af45023b8a32dc26b1 Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <Badhri@google.com>
Date: Thu, 18 Sep 2014 10:46:08 -0700
Subject: [PATCH 0402/1185] RNDIS: Add Data aggregation (multi packet) support

Add data aggregation support using RNDIS Multi Packet feature
to achieve better UDP Downlink throughput. Max 3 RNDIS Packets
aggregated into one RNDIS Packet with this implementation.

With this change, seeing UDP Downlink throughput increase
from 90 Mbps to above 100 Mbps when using Iperf and sending
data more than 100 Mbps.

Change-Id: I21c39482718944bb1b1068bdd02f626531e58f08
Signed-off-by: Mayank Rana <mrana@codeaurora.org>
Signed-off-by: Rajkumar Raghupathy <raghup@codeaurora.org>
---
 drivers/usb/gadget/f_rndis.c |  14 ++++
 drivers/usb/gadget/u_ether.c | 154 ++++++++++++++++++++++++++++++++---
 2 files changed, 156 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 7646a564bfda..130efdfa7881 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -457,6 +457,7 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req)
 {
 	struct f_rndis			*rndis = req->context;
 	int				status;
+	rndis_init_msg_type		*buf;
 
 	/* received RNDIS command from USB_CDC_SEND_ENCAPSULATED_COMMAND */
 //	spin_lock(&dev->lock);
@@ -464,6 +465,19 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req)
 	if (status < 0)
 		pr_err("RNDIS command error %d, %d/%d\n",
 			status, req->actual, req->length);
+
+	buf = (rndis_init_msg_type *)req->buf;
+
+	if (buf->MessageType == RNDIS_MSG_INIT) {
+		if (buf->MaxTransferSize > 2048)
+			rndis->port.multi_pkt_xfer = 1;
+		else
+			rndis->port.multi_pkt_xfer = 0;
+		DBG(cdev, "%s: MaxTransferSize: %d : Multi_pkt_txr: %s\n",
+				__func__, buf->MaxTransferSize,
+				rndis->port.multi_pkt_xfer ? "enabled" :
+							    "disabled");
+	}
 //	spin_unlock(&dev->lock);
 }
 
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 63b590de24d6..9705c2bb15fb 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -62,6 +62,11 @@ struct eth_dev {
 	spinlock_t		req_lock;	/* guard {rx,tx}_reqs */
 	struct list_head	tx_reqs, rx_reqs;
 	unsigned		tx_qlen;
+/* Minimum number of TX USB request queued to UDC */
+#define TX_REQ_THRESHOLD	5
+	int			no_tx_req_used;
+	int			tx_skb_hold_count;
+	u32			tx_req_bufsize;
 
 	struct sk_buff_head	rx_frames;
 
@@ -88,7 +93,7 @@ struct eth_dev {
 
 #define DEFAULT_QLEN	2	/* double buffering by default */
 
-static unsigned qmult = 5;
+static unsigned qmult = 10;
 module_param(qmult, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(qmult, "queue length multiplier at high/super speed");
 
@@ -472,6 +477,11 @@ static void tx_complete(struct usb_ep *ep, struct usb_request *req)
 {
 	struct sk_buff	*skb = req->context;
 	struct eth_dev	*dev = ep->driver_data;
+	struct net_device *net = dev->net;
+	struct usb_request *new_req;
+	struct usb_ep *in;
+	int length;
+	int retval;
 
 	switch (req->status) {
 	default:
@@ -482,14 +492,73 @@ static void tx_complete(struct usb_ep *ep, struct usb_request *req)
 	case -ESHUTDOWN:		/* disconnect etc */
 		break;
 	case 0:
-		dev->net->stats.tx_bytes += skb->len;
+		if (!req->zero)
+			dev->net->stats.tx_bytes += req->length-1;
+		else
+			dev->net->stats.tx_bytes += req->length;
 	}
 	dev->net->stats.tx_packets++;
 
 	spin_lock(&dev->req_lock);
-	list_add(&req->list, &dev->tx_reqs);
-	spin_unlock(&dev->req_lock);
-	dev_kfree_skb_any(skb);
+	list_add_tail(&req->list, &dev->tx_reqs);
+
+	if (dev->port_usb->multi_pkt_xfer) {
+		dev->no_tx_req_used--;
+		req->length = 0;
+		in = dev->port_usb->in_ep;
+
+		if (!list_empty(&dev->tx_reqs)) {
+			new_req = container_of(dev->tx_reqs.next,
+					struct usb_request, list);
+			list_del(&new_req->list);
+			spin_unlock(&dev->req_lock);
+			if (new_req->length > 0) {
+				length = new_req->length;
+
+				/* NCM requires no zlp if transfer is
+				 * dwNtbInMaxSize */
+				if (dev->port_usb->is_fixed &&
+					length == dev->port_usb->fixed_in_len &&
+					(length % in->maxpacket) == 0)
+					new_req->zero = 0;
+				else
+					new_req->zero = 1;
+
+				/* use zlp framing on tx for strict CDC-Ether
+				 * conformance, though any robust network rx
+				 * path ignores extra padding. and some hardware
+				 * doesn't like to write zlps.
+				 */
+				if (new_req->zero && !dev->zlp &&
+						(length % in->maxpacket) == 0) {
+					new_req->zero = 0;
+					length++;
+				}
+
+				new_req->length = length;
+				retval = usb_ep_queue(in, new_req, GFP_ATOMIC);
+				switch (retval) {
+				default:
+					DBG(dev, "tx queue err %d\n", retval);
+					break;
+				case 0:
+					spin_lock(&dev->req_lock);
+					dev->no_tx_req_used++;
+					spin_unlock(&dev->req_lock);
+					net->trans_start = jiffies;
+				}
+			} else {
+				spin_lock(&dev->req_lock);
+				list_add(&new_req->list, &dev->tx_reqs);
+				spin_unlock(&dev->req_lock);
+			}
+		} else {
+			spin_unlock(&dev->req_lock);
+		}
+	} else {
+		spin_unlock(&dev->req_lock);
+		dev_kfree_skb_any(skb);
+	}
 
 	if (netif_carrier_ok(dev->net))
 		netif_wake_queue(dev->net);
@@ -500,6 +569,26 @@ static inline int is_promisc(u16 cdc_filter)
 	return cdc_filter & USB_CDC_PACKET_TYPE_PROMISCUOUS;
 }
 
+static void alloc_tx_buffer(struct eth_dev *dev)
+{
+	struct list_head	*act;
+	struct usb_request	*req;
+
+	dev->tx_req_bufsize = (TX_SKB_HOLD_THRESHOLD *
+				(dev->net->mtu
+				+ sizeof(struct ethhdr)
+				/* size of rndis_packet_msg_type */
+				+ 44
+				+ 22));
+
+	list_for_each(act, &dev->tx_reqs) {
+		req = container_of(act, struct usb_request, list);
+		if (!req->buf)
+			req->buf = kmalloc(dev->tx_req_bufsize,
+						GFP_ATOMIC);
+	}
+}
+
 static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 					struct net_device *net)
 {
@@ -526,6 +615,10 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 		return NETDEV_TX_OK;
 	}
 
+	/* Allocate memory for tx_reqs to support multi packet transfer */
+	if (dev->port_usb->multi_pkt_xfer && !dev->tx_req_bufsize)
+		alloc_tx_buffer(dev);
+
 	/* apply outgoing CDC or RNDIS filters */
 	if (!is_promisc(cdc_filter)) {
 		u8		*dest = skb->data;
@@ -580,11 +673,39 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 		spin_unlock_irqrestore(&dev->lock, flags);
 		if (!skb)
 			goto drop;
-
-		length = skb->len;
 	}
-	req->buf = skb->data;
-	req->context = skb;
+
+	spin_lock_irqsave(&dev->req_lock, flags);
+	dev->tx_skb_hold_count++;
+	spin_unlock_irqrestore(&dev->req_lock, flags);
+
+	if (dev->port_usb->multi_pkt_xfer) {
+		memcpy(req->buf + req->length, skb->data, skb->len);
+		req->length = req->length + skb->len;
+		length = req->length;
+		dev_kfree_skb_any(skb);
+
+		spin_lock_irqsave(&dev->req_lock, flags);
+		if (dev->tx_skb_hold_count < TX_SKB_HOLD_THRESHOLD) {
+			if (dev->no_tx_req_used > TX_REQ_THRESHOLD) {
+				list_add(&req->list, &dev->tx_reqs);
+				spin_unlock_irqrestore(&dev->req_lock, flags);
+				goto success;
+			}
+		}
+
+		dev->no_tx_req_used++;
+		spin_unlock_irqrestore(&dev->req_lock, flags);
+
+		spin_lock_irqsave(&dev->lock, flags);
+		dev->tx_skb_hold_count = 0;
+		spin_unlock_irqrestore(&dev->lock, flags);
+	} else {
+		length = skb->len;
+		req->buf = skb->data;
+		req->context = skb;
+	}
+
 	req->complete = tx_complete;
 
 	/* NCM requires no zlp if transfer is dwNtbInMaxSize */
@@ -599,8 +720,10 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 	 * though any robust network rx path ignores extra padding.
 	 * and some hardware doesn't like to write zlps.
 	 */
-	if (req->zero && !dev->zlp && (length % in->maxpacket) == 0)
+	if (req->zero && !dev->zlp && (length % in->maxpacket) == 0) {
+		req->zero = 0;
 		length++;
+	}
 
 	req->length = length;
 
@@ -608,7 +731,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 	if (gadget_is_dualspeed(dev->gadget) &&
 			 (dev->gadget->speed == USB_SPEED_HIGH)) {
 		dev->tx_qlen++;
-		if (dev->tx_qlen == qmult) {
+		if (dev->tx_qlen == (qmult/2)) {
 			req->no_interrupt = 0;
 			dev->tx_qlen = 0;
 		} else {
@@ -628,7 +751,8 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 	}
 
 	if (retval) {
-		dev_kfree_skb_any(skb);
+		if (!dev->port_usb->multi_pkt_xfer)
+			dev_kfree_skb_any(skb);
 drop:
 		dev->net->stats.tx_dropped++;
 		spin_lock_irqsave(&dev->req_lock, flags);
@@ -637,6 +761,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 		list_add(&req->list, &dev->tx_reqs);
 		spin_unlock_irqrestore(&dev->req_lock, flags);
 	}
+success:
 	return NETDEV_TX_OK;
 }
 
@@ -927,6 +1052,9 @@ struct net_device *gether_connect(struct gether *link)
 		dev->ul_max_pkts_per_xfer = link->ul_max_pkts_per_xfer;
 
 		spin_lock(&dev->lock);
+		dev->tx_skb_hold_count = 0;
+		dev->no_tx_req_used = 0;
+		dev->tx_req_bufsize = 0;
 		dev->port_usb = link;
 		if (netif_running(dev->net)) {
 			if (link->open)
@@ -993,6 +1121,8 @@ void gether_disconnect(struct gether *link)
 		list_del(&req->list);
 
 		spin_unlock(&dev->req_lock);
+		if (link->multi_pkt_xfer)
+			kfree(req->buf);
 		usb_ep_free_request(link->in_ep, req);
 		spin_lock(&dev->req_lock);
 	}

From 5cf94796ec67e08d77e0e6d91e19ee2a083228f3 Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <Badhri@google.com>
Date: Thu, 18 Sep 2014 10:48:48 -0700
Subject: [PATCH 0403/1185] ndis: Add debug support to disable RNDIS
 Multipacket Feature

This change adds module param which allows to disable RNDIS
Multi-packet Feature (Aggregation support in Downlink path)
as this feature is enabled by default.

To disable use this param before moving to RNDIS Composition:
echo 1 > /sys/module/g_android/parameters/rndis_multipacket_dl_disable

Also counts errors as Rx errors if received RNDIS packets are
not following RNDIS message format as those packets are being
discarded.

Change-Id: I764430da78f2204af92e14bb279c11b24c7e4c67
Signed-off-by: Mayank Rana <mrana@codeaurora.org>
---
 drivers/usb/gadget/f_rndis.c | 2 ++
 drivers/usb/gadget/u_ether.c | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 130efdfa7881..a0240195b46a 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -477,6 +477,8 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req)
 				__func__, buf->MaxTransferSize,
 				rndis->port.multi_pkt_xfer ? "enabled" :
 							    "disabled");
+		if (rndis_multipacket_dl_disable)
+			rndis->port.multi_pkt_xfer = 0;
 	}
 //	spin_unlock(&dev->lock);
 }
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 9705c2bb15fb..ff26f5c2d38e 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -292,6 +292,10 @@ static void rx_complete(struct usb_ep *ep, struct usb_request *req)
 				status = dev->unwrap(dev->port_usb,
 							skb,
 							&dev->rx_frames);
+				if (status == -EINVAL)
+					dev->net->stats.rx_errors++;
+				else if (status == -EOVERFLOW)
+					dev->net->stats.rx_over_errors++;
 			} else {
 				dev_kfree_skb_any(skb);
 				status = -ENOTCONN;

From 12407bb816f1b79ec5fc3c24786047c2ea41f190 Mon Sep 17 00:00:00 2001
From: xerox_lin <xerox_lin@htc.com>
Date: Thu, 4 Sep 2014 16:01:59 +0800
Subject: [PATCH 0404/1185] USB: gadget: rndis: Add module parameter for DL max
 packets per xfer

Currently DL aggregation is supported in RNDIS driver and is set to
3 by default. And there is no support to change downlink maximum
packets per transfer at runtime through module parameter. Hence add
module parameter for DL maximum packets per transfer to change it at
runtime.

echo 6 > /sys/module/g_android/parameters/rndis_dl_max_pkt_per_xfer

To disable DL aggregation during runtime,

echo 1 > /sys/module/g_android/parameters/rndis_dl_max_pkt_per_xfer

Change-Id: I3a1d0bc97358e2b6f233df7ae8725fb507de50db
Signed-off-by: Xerox Lin <xerox_lin@htc.com>
Signed-off-by: Vijayavardhan Vennapusa <vvreddy@codeaurora.org>
---
 drivers/usb/gadget/f_rndis.c | 11 ++++++-----
 drivers/usb/gadget/u_ether.c |  6 ++++--
 drivers/usb/gadget/u_ether.h |  3 +--
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index a0240195b46a..187014885687 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -66,10 +66,10 @@
  *   - MS-Windows drivers sometimes emit undocumented requests.
  */
 
-static bool rndis_multipacket_dl_disable;
-module_param(rndis_multipacket_dl_disable, bool, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(rndis_multipacket_dl_disable,
-	"Disable RNDIS Multi-packet support in DownLink");
+static unsigned int rndis_dl_max_pkt_per_xfer = 3;
+module_param(rndis_dl_max_pkt_per_xfer, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(rndis_dl_max_pkt_per_xfer,
+	"Maximum packets per transfer for DL aggregation");
 
 static unsigned int rndis_ul_max_pkt_per_xfer = 3;
 module_param(rndis_ul_max_pkt_per_xfer, uint, S_IRUGO | S_IWUSR);
@@ -477,7 +477,7 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req)
 				__func__, buf->MaxTransferSize,
 				rndis->port.multi_pkt_xfer ? "enabled" :
 							    "disabled");
-		if (rndis_multipacket_dl_disable)
+		if (rndis_dl_max_pkt_per_xfer <= 1)
 			rndis->port.multi_pkt_xfer = 0;
 	}
 //	spin_unlock(&dev->lock);
@@ -881,6 +881,7 @@ rndis_bind_config_vendor(struct usb_configuration *c, u8 ethaddr[ETH_ALEN],
 	rndis->port.wrap = rndis_add_header;
 	rndis->port.unwrap = rndis_rm_hdr;
 	rndis->port.ul_max_pkts_per_xfer = rndis_ul_max_pkt_per_xfer;
+	rndis->port.dl_max_pkts_per_xfer = rndis_dl_max_pkt_per_xfer;
 
 	rndis->port.func.name = "rndis";
 	rndis->port.func.strings = rndis_strings;
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index ff26f5c2d38e..14f587efc0f6 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -72,6 +72,7 @@ struct eth_dev {
 
 	unsigned		header_len;
 	unsigned		ul_max_pkts_per_xfer;
+	unsigned		dl_max_pkts_per_xfer;
 	struct sk_buff		*(*wrap)(struct gether *, struct sk_buff *skb);
 	int			(*unwrap)(struct gether *,
 						struct sk_buff *skb,
@@ -578,7 +579,7 @@ static void alloc_tx_buffer(struct eth_dev *dev)
 	struct list_head	*act;
 	struct usb_request	*req;
 
-	dev->tx_req_bufsize = (TX_SKB_HOLD_THRESHOLD *
+	dev->tx_req_bufsize = (dev->dl_max_pkts_per_xfer *
 				(dev->net->mtu
 				+ sizeof(struct ethhdr)
 				/* size of rndis_packet_msg_type */
@@ -690,7 +691,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 		dev_kfree_skb_any(skb);
 
 		spin_lock_irqsave(&dev->req_lock, flags);
-		if (dev->tx_skb_hold_count < TX_SKB_HOLD_THRESHOLD) {
+		if (dev->tx_skb_hold_count < dev->dl_max_pkts_per_xfer) {
 			if (dev->no_tx_req_used > TX_REQ_THRESHOLD) {
 				list_add(&req->list, &dev->tx_reqs);
 				spin_unlock_irqrestore(&dev->req_lock, flags);
@@ -1054,6 +1055,7 @@ struct net_device *gether_connect(struct gether *link)
 		dev->unwrap = link->unwrap;
 		dev->wrap = link->wrap;
 		dev->ul_max_pkts_per_xfer = link->ul_max_pkts_per_xfer;
+		dev->dl_max_pkts_per_xfer = link->dl_max_pkts_per_xfer;
 
 		spin_lock(&dev->lock);
 		dev->tx_skb_hold_count = 0;
diff --git a/drivers/usb/gadget/u_ether.h b/drivers/usb/gadget/u_ether.h
index ce803d415887..67eda50ae995 100644
--- a/drivers/usb/gadget/u_ether.h
+++ b/drivers/usb/gadget/u_ether.h
@@ -55,8 +55,7 @@ struct gether {
 	u32				fixed_out_len;
 	u32				fixed_in_len;
 	unsigned		ul_max_pkts_per_xfer;
-/* Max number of SKB packets to be used to create Multi Packet RNDIS */
-#define TX_SKB_HOLD_THRESHOLD		3
+	unsigned		dl_max_pkts_per_xfer;
 	bool				multi_pkt_xfer;
 	struct sk_buff			*(*wrap)(struct gether *port,
 						struct sk_buff *skb);

From 88835a3a4ea3b5107c3ea79ee1169aac5fadf808 Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <Badhri@google.com>
Date: Wed, 24 Sep 2014 19:36:33 -0700
Subject: [PATCH 0405/1185] USB: f_rndis: fix compile error

Change-Id: Ied5dd8ef905bdf84d176a5e560b09e292b68fbc5
Signed-off-by: Badhri Jagan Sridharan <Badhri@google.com>
---
 drivers/usb/gadget/f_rndis.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 187014885687..5c274f1d7b7a 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -473,7 +473,7 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req)
 			rndis->port.multi_pkt_xfer = 1;
 		else
 			rndis->port.multi_pkt_xfer = 0;
-		DBG(cdev, "%s: MaxTransferSize: %d : Multi_pkt_txr: %s\n",
+		pr_info("%s: MaxTransferSize: %d : Multi_pkt_txr: %s\n",
 				__func__, buf->MaxTransferSize,
 				rndis->port.multi_pkt_xfer ? "enabled" :
 							    "disabled");

From 3585453daba3b8be18deb1e472e779961be17f77 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 28 May 2013 06:22:32 +0000
Subject: [PATCH 0406/1185] thermal: cpu_cooling: fix 'descend' check in
 get_property()

The variable 'descend' is initialized as -1 in function get_property(),
and will never get any chance to be updated by the following code.

	if (freq != CPUFREQ_ENTRY_INVALID && descend != -1)
		descend = !!(freq > table[i].frequency);

This makes function get_property() return the wrong frequency for given
cooling level if the frequency table is sorted in ascending.  Fix it
by correcting the 'descend' check in if-condition to 'descend == -1'.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
(cherry picked from commit 24c7a381720843f17efb42de81f7e85aefd6f616)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/cpu_cooling.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 78de7947afac..6093862ae7ad 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -167,7 +167,7 @@ static int get_property(unsigned int cpu, unsigned long input,
 			continue;
 
 		/* get the frequency order */
-		if (freq != CPUFREQ_ENTRY_INVALID && descend != -1)
+		if (freq != CPUFREQ_ENTRY_INVALID && descend == -1)
 			descend = !!(freq > table[i].frequency);
 
 		freq = table[i].frequency;

From d05bc7105a1b8f6df6569b7c1a89a2fe466f8688 Mon Sep 17 00:00:00 2001
From: Ni Wade <wni@nvidia.com>
Date: Wed, 6 Nov 2013 14:30:13 +0800
Subject: [PATCH 0407/1185] thermal: Fix binding problem when there is thermal
 zone params

The thermal zone params can be used to set governor
to specific thermal governor for thermal zone device.
But if the thermal zone params has only governor name
without thermal bind params, then the thermal zone device
will not be binding to cooling device. Because tz->ops->bind
operator is not invoked in bind_tz() and bind_cdev() when
there is thermal zone params.

Signed-off-by: Wei Ni <wni@nvidia.com>
Signed-off-by: Jinyoung Park <jinyoungp@nvidia.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
(cherry picked from commit a9f2d19ba7be38590c84487359891d45a66b62f4)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/thermal_core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 83cc99bb5aeb..d2b7be523198 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -238,10 +238,11 @@ static void bind_cdev(struct thermal_cooling_device *cdev)
 		if (!pos->tzp && !pos->ops->bind)
 			continue;
 
-		if (!pos->tzp && pos->ops->bind) {
+		if (pos->ops->bind) {
 			ret = pos->ops->bind(pos, cdev);
 			if (ret)
 				print_bind_err_msg(pos, cdev, ret);
+			continue;
 		}
 
 		tzp = pos->tzp;
@@ -272,8 +273,8 @@ static void bind_tz(struct thermal_zone_device *tz)
 
 	mutex_lock(&thermal_list_lock);
 
-	/* If there is no platform data, try to use ops->bind */
-	if (!tzp && tz->ops->bind) {
+	/* If there is ops->bind, try to use ops->bind */
+	if (tz->ops->bind) {
 		list_for_each_entry(pos, &thermal_cdev_list, node) {
 			ret = tz->ops->bind(tz, pos);
 			if (ret)

From e86c742e299cbe321870aa00bcbd4985d1f628fe Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@ti.com>
Date: Wed, 13 Nov 2013 14:11:09 -0400
Subject: [PATCH 0408/1185] thermal: fix cpu_cooling max_level behavior

As per Documentation/thermal/sysfs-api.txt, max_level
is an index, not a counter. Thus, in case a CPU has
3 valid frequencies, max_level is expected to be 2, for instance.

The current code makes max_level == number of valid frequencies,
which is bogus. This patch fix the cpu_cooling device by
ranging max_level properly.

Reported-by: Carlos Hernandez <ceh@ti.com>
Signed-off-by: Eduardo Valentin <eduardo.valentin@ti.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
(cherry picked from commit 1c9573a40c1d34494419f32560f28c763c504d79)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/cpu_cooling.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 6093862ae7ad..3331b1a12f85 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -173,6 +173,8 @@ static int get_property(unsigned int cpu, unsigned long input,
 		freq = table[i].frequency;
 		max_level++;
 	}
+	/* max_level is an index, not a counter */
+	max_level--;
 
 	/* get max level */
 	if (property == GET_MAXL) {
@@ -181,7 +183,7 @@ static int get_property(unsigned int cpu, unsigned long input,
 	}
 
 	if (property == GET_FREQ)
-		level = descend ? input : (max_level - input - 1);
+		level = descend ? input : (max_level - input);
 
 	for (i = 0, j = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
 		/* ignore invalid entry */
@@ -197,7 +199,7 @@ static int get_property(unsigned int cpu, unsigned long input,
 
 		if (property == GET_LEVEL && (unsigned int)input == freq) {
 			/* get level by frequency */
-			*output = descend ? j : (max_level - j - 1);
+			*output = descend ? j : (max_level - j);
 			return 0;
 		}
 		if (property == GET_FREQ && level == j) {

From 25a434675eaa351f90b51866a982accac5ee6c61 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Thu, 2 Jan 2014 11:57:48 +0800
Subject: [PATCH 0409/1185] Thermal cpu cooling: return error if no valid cpu
 frequency entry

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
(cherry picked from commit a116776f7b6052599df0c67db29c30ea9d69d7ee)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/cpu_cooling.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 3331b1a12f85..e1f0830835e5 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -173,6 +173,11 @@ static int get_property(unsigned int cpu, unsigned long input,
 		freq = table[i].frequency;
 		max_level++;
 	}
+
+	/* No valid cpu frequency entry */
+	if (max_level == 0)
+		return -EINVAL;
+
 	/* max_level is an index, not a counter */
 	max_level--;
 

From fe174a059c03451611cbefa828f6efc76fc5ac78 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Fri, 24 Jan 2014 10:23:19 +0800
Subject: [PATCH 0410/1185] Thermal: thermal zone governor fix

This patch does a cleanup about the thermal zone govenor,
setting and make the following rule.
1. For thermal zone devices that are registered w/o tz->tzp,
   they can use the default thermal governor only.
2. For thermal zone devices w/ governor name specified in
   tz->tzp->governor_name, we will use the default govenor
   if the governor specified is not available at the moment,
   and update tz->governor when the matched governor is registered.

This also fixes a problem that OF registered thermal zones
are running with no governor.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Acked-by: Javi Merino <javi.merino@arm.com>
(cherry picked from commit f2234bcd03ad031225d7dc37dd18852a2f2ff2bf)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/thermal_core.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index d2b7be523198..561d61d72137 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -56,10 +56,15 @@ static LIST_HEAD(thermal_governor_list);
 static DEFINE_MUTEX(thermal_list_lock);
 static DEFINE_MUTEX(thermal_governor_lock);
 
+static struct thermal_governor *def_governor;
+
 static struct thermal_governor *__find_governor(const char *name)
 {
 	struct thermal_governor *pos;
 
+	if (!name || !name[0])
+		return def_governor;
+
 	list_for_each_entry(pos, &thermal_governor_list, governor_list)
 		if (!strnicmp(name, pos->name, THERMAL_NAME_LENGTH))
 			return pos;
@@ -82,17 +87,23 @@ int thermal_register_governor(struct thermal_governor *governor)
 	if (__find_governor(governor->name) == NULL) {
 		err = 0;
 		list_add(&governor->governor_list, &thermal_governor_list);
+		if (!def_governor && !strncmp(governor->name,
+			DEFAULT_THERMAL_GOVERNOR, THERMAL_NAME_LENGTH))
+			def_governor = governor;
 	}
 
 	mutex_lock(&thermal_list_lock);
 
 	list_for_each_entry(pos, &thermal_tz_list, node) {
+		/*
+		 * only thermal zones with specified tz->tzp->governor_name
+		 * may run with tz->govenor unset
+		 */
 		if (pos->governor)
 			continue;
-		if (pos->tzp)
-			name = pos->tzp->governor_name;
-		else
-			name = DEFAULT_THERMAL_GOVERNOR;
+
+		name = pos->tzp->governor_name;
+
 		if (!strnicmp(name, governor->name, THERMAL_NAME_LENGTH))
 			pos->governor = governor;
 	}
@@ -330,8 +341,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
 static void handle_non_critical_trips(struct thermal_zone_device *tz,
 			int trip, enum thermal_trip_type trip_type)
 {
-	if (tz->governor)
-		tz->governor->throttle(tz, trip);
+	tz->governor ? tz->governor->throttle(tz, trip) :
+		       def_governor->throttle(tz, trip);
 }
 
 static void handle_critical_trips(struct thermal_zone_device *tz,
@@ -1514,7 +1525,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	if (tz->tzp)
 		tz->governor = __find_governor(tz->tzp->governor_name);
 	else
-		tz->governor = __find_governor(DEFAULT_THERMAL_GOVERNOR);
+		tz->governor = def_governor;
 
 	mutex_unlock(&thermal_governor_lock);
 

From ed835be313220380ccacadc959f7d4d64324d9ee Mon Sep 17 00:00:00 2001
From: Ni Wade <wni@nvidia.com>
Date: Mon, 17 Feb 2014 11:02:55 +0800
Subject: [PATCH 0411/1185] Thermal: Allow first update of cooling device state

In initialization, if the cooling device is initialized at
max cooling state, and the thermal zone temperature is below
the first trip point, then the cooling state can't be updated
to the right state, untill the first trip point be triggered.

To fix this issue, allow first update of cooling device state
during registration, initialized "updated" device field as
"false" (instead of "true").

Signed-off-by: Wei Ni <wni@nvidia.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
(cherry picked from commit 5ca0cce5622bf476e3e6bf627fe8e9381d6ae174)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/thermal_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 561d61d72137..07d80cc0a91b 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1099,7 +1099,7 @@ __thermal_cooling_device_register(struct device_node *np,
 	INIT_LIST_HEAD(&cdev->thermal_instances);
 	cdev->np = np;
 	cdev->ops = ops;
-	cdev->updated = true;
+	cdev->updated = false;
 	cdev->device.class = &thermal_class;
 	cdev->devdata = devdata;
 	dev_set_name(&cdev->device, "cooling_device%d", cdev->id);

From f8b3519d0a85467970e662f75d1c12d39563c2de Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Tue, 3 Jun 2014 10:59:58 +0100
Subject: [PATCH 0412/1185] thermal: Bind cooling devices with the correct
 arguments

When binding cooling devices to thermal zones created from the device
tree the minimum and maximum cooling states are in the wrong order
leading to failure to bind.

Fix the order of cooling states in the call to
thermal_zone_bind_cooling_device to fix this.

Cc:Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
(cherry picked from commit dd354b84d47ec8ca53686bdb3cc1aecdeb75bef5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/of-thermal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 04b1be7fa018..97d312f3b670 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -156,8 +156,8 @@ static int of_thermal_bind(struct thermal_zone_device *thermal,
 
 			ret = thermal_zone_bind_cooling_device(thermal,
 						tbp->trip_id, cdev,
-						tbp->min,
-						tbp->max);
+						tbp->max,
+						tbp->min);
 			if (ret)
 				return ret;
 		}

From fca2d1a60af983180816385fc8238ce818cb72c3 Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Tue, 13 Aug 2013 10:07:28 +0800
Subject: [PATCH 0413/1185] Thermal/cpu_cooling: Return directly for the cpu
 out of allowed_cpus in the cpufreq_thermal_notifier()

cpufreq_thermal_notifier() is to change the cpu's cpufreq in the allowed_cpus mask
when associated thermal-cpufreq cdev's cooling state is changed. It's a cpufreq policy
notifier handler and it will be triggered even if those cpus out of allowed_cpus has
changed freq policy.

cpufreq_thermal_notifier() checks the policy->cpu. If it belongs to allowed_cpus,
change max_freq(default to 0) to the desire cpufreq value and pass 0 and max_freq
to cpufreq_verify_within_limits() as cpufreq scope. But if not, do nothing and
max_freq will remain 0. This will cause the cpufreq scope to become 0~0. This
is not right. This patch is to return directly after finding cpu not belonging
to allowed_cpus.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
(cherry picked from commit 044d5c26da262fa433dacbe1c6962459050d6b06)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/cpu_cooling.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index e1f0830835e5..dc50315174d8 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -329,6 +329,8 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 
 	if (cpumask_test_cpu(policy->cpu, &notify_device->allowed_cpus))
 		max_freq = notify_device->cpufreq_val;
+	else
+		return 0;
 
 	/* Never exceed user_policy.max */
 	if (max_freq > policy->user_policy.max)

From 3a68305f1f92040bc57d6ea94bedde1b694d0356 Mon Sep 17 00:00:00 2001
From: "lan,Tianyu" <tianyu.lan@intel.com>
Date: Thu, 2 Jan 2014 15:47:54 +0800
Subject: [PATCH 0414/1185] Thermal: update thermal zone device after setting
 emul_temp

This patch is to update thermal zone device after setting emul_temp
in order to make governor work according to input temperature immediately.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
(cherry picked from commit 800744bf31df54b0cd4d1104ccfa426d3f578f0e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/thermal_core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 07d80cc0a91b..edc0cb88f1d0 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -777,6 +777,9 @@ emul_temp_store(struct device *dev, struct device_attribute *attr,
 		ret = tz->ops->set_emul_temp(tz, temperature);
 	}
 
+	if (!ret)
+		thermal_zone_device_update(tz);
+
 	return ret ? ret : count;
 }
 static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);

From c60fd301f395adc8f788be8b192a58f0246ddbae Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 18 Jun 2014 16:32:08 -0700
Subject: [PATCH 0415/1185] thermal: Add braces around suspect code

It looks like this code is missing braces, otherwise the if
statement shouldn't have been indented. Fix it.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
(cherry picked from commit ca9521b770c988bb6bb8eea1241f7a487dab6ff1)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/of-thermal.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 97d312f3b670..4b2b999b7611 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -712,11 +712,12 @@ thermal_of_build_thermal_zone(struct device_node *np)
 	}
 
 	i = 0;
-	for_each_child_of_node(child, gchild)
+	for_each_child_of_node(child, gchild) {
 		ret = thermal_of_populate_bind_params(gchild, &tz->tbps[i++],
 						      tz->trips, tz->ntrips);
 		if (ret)
 			goto free_tbps;
+	}
 
 finish:
 	of_node_put(child);

From e6ac9c53208c2871e3704e5b911971522cda8a56 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 25 Oct 2013 21:55:42 +0800
Subject: [PATCH 0416/1185] thermal: cpu_cooling: fix return value check in
 cpufreq_cooling_register()

In case of error, the function thermal_cooling_device_register() returns
ERR_PTR() and never returns NULL. The NULL test in the return value check
should be replaced with IS_ERR().

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
(cherry picked from commit 73b9bcd76d13716cc0e0ab053f8c1ae41f47636e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>

Conflicts:
	drivers/thermal/cpu_cooling.c
---
 drivers/thermal/cpu_cooling.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index dc50315174d8..8c1547424502 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -482,7 +482,7 @@ __cpufreq_cooling_register(struct device_node *np,
 	if (IS_ERR(cool_dev)) {
 		release_idr(&cpufreq_idr, cpufreq_dev->id);
 		kfree(cpufreq_dev);
-		return ERR_PTR(-EINVAL);
+		return cool_dev;
 	}
 	cpufreq_dev->cool_dev = cool_dev;
 	cpufreq_dev->cpufreq_state = 0;

From d49a5d426a7906927351ca58dd14c6870975b166 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@ti.com>
Date: Thu, 15 Aug 2013 10:54:46 -0400
Subject: [PATCH 0417/1185] drivers: thermal: add check when unregistering cpu
 cooling

This patch avoids NULL pointer accesses while unregistering
cpu cooling devices, in case a NULL pointer is received.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Eduardo Valentin <eduardo.valentin@ti.com>
(cherry picked from commit 50e66c7ed8a1cd7e933628f9f5cf2617394adf5a)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/thermal/cpu_cooling.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 8c1547424502..4246262c4bd2 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -549,8 +549,12 @@ EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
  */
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
-	struct cpufreq_cooling_device *cpufreq_dev = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_dev;
 
+	if (!cdev)
+		return;
+
+	cpufreq_dev = cdev->devdata;
 	mutex_lock(&cooling_cpufreq_lock);
 	cpufreq_dev_count--;
 

From 5c4e2425d94aa1545ce9d5feb9324ca85f62ef90 Mon Sep 17 00:00:00 2001
From: viresh kumar <viresh.kumar@linaro.org>
Date: Fri, 14 Mar 2014 12:10:55 +0530
Subject: [PATCH 0418/1185] cpufreq: arm_big_little: set 'physical_cluster' for
 each CPU

We have a per-CPU variable for managing which cluster a CPU belongs to.
Currently, physical_cluster is set for policy->cpu only which leads to
the following on some SoC's:

 - There are two clusters:
   - Cluster 0 has four ARM Cortex A7 CPUs (slower ones): 0,1,2,3
   - Cluster 1 has four ARM Cortex A15 CPUs (faster ones): 4,5,6,7
 - CPUs are booted in order 0,1..7 and so initially policy->cpu for A7 cluster
   would be 0 and for A15 cluster would be 4.
 - Now CPU4 (i.e. A15_0) is hotplugged out and so policy->cpu for A15 cluster
   becomes 5 (i.e. A15_1).
 - But physical cluster is only set for CPU0 and CPU4 in ARM big LITTLE driver
   and isn't updated.
 - Now freq change request comes for A15 cluster and we would try to update freq
   of physical_cluster of CPU5, i.e. A15_1. And it is currently set to zero
   (default value of uninitialized global variables).
 - And so we actually try to change freq of A7 cluster instead of A15.
 - This also results in kernel crash as sometimes we might request freq above
   A7's limit and CPU may behave badly..

Fix this by initializing physical_cluster for all CPUs of a policy.

Based on previous work by Xin Wang.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 8f3ba3d3257be80636ed15cc221d6a2efb6a6e82)
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/cpufreq/arm_big_little.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 076f25a59e00..eaee6e222207 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -456,9 +456,12 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
 	cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu);
 
 	if (cur_cluster < MAX_CLUSTERS) {
+		int cpu;
+
 		cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
 
-		per_cpu(physical_cluster, policy->cpu) = cur_cluster;
+		for_each_cpu(cpu, policy->cpus)
+			per_cpu(physical_cluster, cpu) = cur_cluster;
 	} else {
 		/* Assumption: during init, we are always running on A15 */
 		per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;

From 176c78de32186c9c8def04bc9054f2f60d5ef6a0 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 12 Apr 2012 01:40:31 -0400
Subject: [PATCH 0419/1185] ARM: gic: add CPU migration support

This is required by the big.LITTLE switcher code.

The gic_migrate_target() changes the CPU interface mapping for the
current CPU to redirect SGIs to the specified interface, and it also
updates the target CPU for each interrupts to that CPU interface
if they were targeting the current interface.  Finally, pending
SGIs for the current CPU are forwarded to the new interface.

Because Linux does not use it, the SGI source information for the
forwarded SGIs is not preserved.  Neither is the source information
for the SGIs sent by the current CPU to other CPUs adjusted to match
the new CPU interface mapping.  The required registers are banked so
only the target CPU could do it.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
(cherry picked from commit 1a6b69b6548cd0dd82549393f30dd982ceeb79d2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c       | 87 +++++++++++++++++++++++++++++++--
 include/linux/irqchip/arm-gic.h |  4 ++
 2 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 19ceaa60e0f4..4f5741df74e7 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
+	raw_spin_lock(&irq_controller_lock);
 	mask = 0xff << shift;
 	bit = gic_cpu_map[cpu] << shift;
-
-	raw_spin_lock(&irq_controller_lock);
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
 	raw_spin_unlock(&irq_controller_lock);
@@ -646,7 +645,9 @@ static void __init gic_pm_init(struct gic_chip_data *gic)
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
 	int cpu;
-	unsigned long map = 0;
+	unsigned long flags, map = 0;
+
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 
 	/* Convert our logical CPU mask into a physical one. */
 	for_each_cpu(cpu, mask)
@@ -660,6 +661,86 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 
 	/* this always happens on GIC0 */
 	writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+#endif
+
+#ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_migrate_target - migrate IRQs to another CPU interface
+ *
+ * @new_cpu_id: the CPU target ID to migrate IRQs to
+ *
+ * Migrate all peripheral interrupts with a target matching the current CPU
+ * to the interface corresponding to @new_cpu_id.  The CPU interface mapping
+ * is also updated.  Targets to other CPU interfaces are unchanged.
+ * This must be called with IRQs locally disabled.
+ */
+void gic_migrate_target(unsigned int new_cpu_id)
+{
+	unsigned int cur_cpu_id, gic_irqs, gic_nr = 0;
+	void __iomem *dist_base;
+	int i, ror_val, cpu = smp_processor_id();
+	u32 val, cur_target_mask, active_mask;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data_dist_base(&gic_data[gic_nr]);
+	if (!dist_base)
+		return;
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+
+	cur_cpu_id = __ffs(gic_cpu_map[cpu]);
+	cur_target_mask = 0x01010101 << cur_cpu_id;
+	ror_val = (cur_cpu_id - new_cpu_id) & 31;
+
+	raw_spin_lock(&irq_controller_lock);
+
+	/* Update the target interface for this logical CPU */
+	gic_cpu_map[cpu] = 1 << new_cpu_id;
+
+	/*
+	 * Find all the peripheral interrupts targetting the current
+	 * CPU interface and migrate them to the new CPU interface.
+	 * We skip DIST_TARGET 0 to 7 as they are read-only.
+	 */
+	for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) {
+		val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+		active_mask = val & cur_target_mask;
+		if (active_mask) {
+			val &= ~active_mask;
+			val |= ror32(active_mask, ror_val);
+			writel_relaxed(val, dist_base + GIC_DIST_TARGET + i*4);
+		}
+	}
+
+	raw_spin_unlock(&irq_controller_lock);
+
+	/*
+	 * Now let's migrate and clear any potential SGIs that might be
+	 * pending for us (cur_cpu_id).  Since GIC_DIST_SGI_PENDING_SET
+	 * is a banked register, we can only forward the SGI using
+	 * GIC_DIST_SOFTINT.  The original SGI source is lost but Linux
+	 * doesn't use that information anyway.
+	 *
+	 * For the same reason we do not adjust SGI source information
+	 * for previously sent SGIs by us to other CPUs either.
+	 */
+	for (i = 0; i < 16; i += 4) {
+		int j;
+		val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i);
+		if (!val)
+			continue;
+		writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i);
+		for (j = i; j < i + 4; j++) {
+			if (val & 0xff)
+				writel_relaxed((1 << (new_cpu_id + 16)) | j,
+						dist_base + GIC_DIST_SOFTINT);
+			val >>= 8;
+		}
+	}
 }
 #endif
 
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 3e203eb23cc7..40bfcac95940 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -31,6 +31,8 @@
 #define GIC_DIST_TARGET			0x800
 #define GIC_DIST_CONFIG			0xc00
 #define GIC_DIST_SOFTINT		0xf00
+#define GIC_DIST_SGI_PENDING_CLEAR	0xf10
+#define GIC_DIST_SGI_PENDING_SET	0xf20
 
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
@@ -73,6 +75,8 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+void gic_migrate_target(unsigned int new_cpu_id);
+
 #endif /* __ASSEMBLY */
 
 #endif

From 0284d57fefb977c52748176308d30d30a148d24c Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 2 Oct 2014 09:29:59 +0200
Subject: [PATCH 0420/1185] ARM: bL_switcher: do not hardcode GIC IDs in the
 code

Currently, GIC IDs are hardcoded making the code dependent on the 4+4 b.L
configuration.  Let's allow for GIC IDs to be discovered upon switcher
initialization to support other b.L configurations such as the 1+1 one,
or 2+3 as on the VExpress TC2.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
(cherry picked from commit ed96762e3241f57aa812977cf1920d3ee0363f4d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c       | 21 +++++++++++++++++++++
 include/linux/irqchip/arm-gic.h |  1 +
 2 files changed, 22 insertions(+)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4f5741df74e7..dbd26dae2e4b 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -667,6 +667,27 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 #endif
 
 #ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_get_cpu_id - get the CPU interface ID for the specified CPU
+ *
+ * @cpu: the logical CPU number to get the GIC ID for.
+ *
+ * Return the CPU interface ID for the given logical CPU number,
+ * or -1 if the CPU number is too large or the interface ID is
+ * unknown (more than one bit set).
+ */
+int gic_get_cpu_id(unsigned int cpu)
+{
+	unsigned int cpu_bit;
+
+	if (cpu >= NR_GIC_CPU_IF)
+		return -1;
+	cpu_bit = gic_cpu_map[cpu];
+	if (cpu_bit & (cpu_bit - 1))
+		return -1;
+	return __ffs(cpu_bit);
+}
+
 /*
  * gic_migrate_target - migrate IRQs to another CPU interface
  *
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 40bfcac95940..2d7d47e8dfaf 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -75,6 +75,7 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+int gic_get_cpu_id(unsigned int cpu);
 void gic_migrate_target(unsigned int new_cpu_id);
 
 #endif /* __ASSEMBLY */

From 0f4e18f6c9cdc1246da523f25b83776ece5b4b8a Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 19 Mar 2013 23:59:04 -0400
Subject: [PATCH 0421/1185] drivers: irq-chip: irq-gic: introduce
 gic_cpu_if_down()

When processors are about to hit low power states, the assertion of
standbywfi signal, triggered by the wfi instruction, is essential to
entering low power modes. If an IRQ is pending on the processor at the
time wfi is issued, the wfi instruction completes and the processor
restarts execution without asserting the standbywfi signal. Depending
on the platform power controller HW this behaviour can be acceptable or
not; if this behaviour must be prevented software should be provided
with a way to disable the routing of interrupts to the core IRQ pins.

On systems where raw GIC distributor interrupts are connected to the power
controller as wake-up events (hence the power controller still senses
IRQs and can wake up cores upon IRQ pending), the GIC CPU interface can
be disabled on power down, so that the GIC CPU IF output is gated and wfi
cannot complete, thereby preventing the standbywfi issue.

This patch adds a simple function to the GIC driver that allows to
disable the GIC CPU IF from power down procedures.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
[rewrote commit log]

Signed-off-by: Olof Johansson <olof@lixom.net>
(cherry picked from commit 10d9eb8a17cfb697967928bde06f3e7e530b03ac)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c       | 6 ++++++
 include/linux/irqchip/arm-gic.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index dbd26dae2e4b..b8a35cc51a35 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -452,6 +452,12 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
 	writel_relaxed(1, base + GIC_CPU_CTRL);
 }
 
+void gic_cpu_if_down(void)
+{
+	void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
+	writel_relaxed(0, cpu_base + GIC_CPU_CTRL);
+}
+
 #ifdef CONFIG_CPU_PM
 /*
  * Saves the GIC distributor registers during suspend or idle.  Must be called
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 2d7d47e8dfaf..46544e381bf9 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -68,6 +68,7 @@ extern struct irq_chip gic_arch_extn;
 void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *,
 		    u32 offset, struct device_node *);
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
+void gic_cpu_if_down(void);
 
 static inline void gic_init(unsigned int nr, int start,
 			    void __iomem *dist , void __iomem *cpu)

From f4bf1bf03b1d82dea6f7f2f22dfbda1847933502 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 28 Nov 2012 18:17:25 -0500
Subject: [PATCH 0422/1185] ARM: GIC: function to retrieve the physical address
 of the SGIR

In order to have early assembly code signal other CPUs in the system,
we need to get the physical address for the SGIR register used to
send IPIs.  Because the register will be used with a precomputed CPU
interface ID number, there is no need for any locking in the assembly
code where this register is written to.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
(cherry picked from commit eeb446581ba23a5a36b4f5c7bfa2b1f8f7c9fb66)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c       | 29 +++++++++++++++++++++++++++++
 include/linux/irqchip/arm-gic.h |  1 +
 2 files changed, 30 insertions(+)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index b8a35cc51a35..0e8c7ef1fdcf 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -769,6 +769,33 @@ void gic_migrate_target(unsigned int new_cpu_id)
 		}
 	}
 }
+
+/*
+ * gic_get_sgir_physaddr - get the physical address for the SGI register
+ *
+ * REturn the physical address of the SGI register to be used
+ * by some early assembly code when the kernel is not yet available.
+ */
+static unsigned long gic_dist_physaddr;
+
+unsigned long gic_get_sgir_physaddr(void)
+{
+	if (!gic_dist_physaddr)
+		return 0;
+	return gic_dist_physaddr + GIC_DIST_SOFTINT;
+}
+
+void __init gic_init_physaddr(struct device_node *node)
+{
+	struct resource res;
+	if (of_address_to_resource(node, 0, &res) == 0) {
+		gic_dist_physaddr = res.start;
+		pr_info("GIC physical location is %#lx\n", gic_dist_physaddr);
+	}
+}
+
+#else
+#define gic_init_physaddr(node)  do { } while (0)
 #endif
 
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
@@ -952,6 +979,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent)
 		percpu_offset = 0;
 
 	gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node);
+	if (!gic_cnt)
+		gic_init_physaddr(node);
 
 	if (parent) {
 		irq = irq_of_parse_and_map(node, 0);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 46544e381bf9..dc30835099e3 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -78,6 +78,7 @@ static inline void gic_init(unsigned int nr, int start,
 
 int gic_get_cpu_id(unsigned int cpu);
 void gic_migrate_target(unsigned int new_cpu_id);
+unsigned long gic_get_sgir_physaddr(void);
 
 #endif /* __ASSEMBLY */
 

From 43eafb4c0fff64f9ec85fa98f2300c52b2c27aff Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 28 Nov 2012 18:48:19 -0500
Subject: [PATCH 0423/1185] ARM: GIC: interface to send a SGI directly

The regular gic_raise_softirq() takes as input a CPU mask which is not
adequate when we need to send an IPI to a CPU which is not represented
in the kernel to GIC mapping.  That is the case with the b.L switcher
when GIC migration to the inbound CPU has not yet occurred.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
(cherry picked from commit 14d2ca615a85e2dbc744c12c296affd35f119fa7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c       | 14 ++++++++++++++
 include/linux/irqchip/arm-gic.h |  1 +
 2 files changed, 15 insertions(+)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 0e8c7ef1fdcf..25fe39d2d2bf 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -673,6 +673,20 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 #endif
 
 #ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_send_sgi - send a SGI directly to given CPU interface number
+ *
+ * cpu_id: the ID for the destination CPU interface
+ * irq: the IPI number to send a SGI for
+ */
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq)
+{
+	BUG_ON(cpu_id >= NR_GIC_CPU_IF);
+	cpu_id = 1 << cpu_id;
+	/* this always happens on GIC0 */
+	writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+}
+
 /*
  * gic_get_cpu_id - get the CPU interface ID for the specified CPU
  *
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index dc30835099e3..cac496b1e279 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -76,6 +76,7 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
 int gic_get_cpu_id(unsigned int cpu);
 void gic_migrate_target(unsigned int new_cpu_id);
 unsigned long gic_get_sgir_physaddr(void);

From 893fe6dda7e382a2ee069c43417dd292ce422a63 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 28 Nov 2013 14:21:40 +0000
Subject: [PATCH 0424/1185] irqchip: Gic: fix boot for chained gics

As of c0114709ed: "irqchip: gic: Perform the gic_secondary_init() call
via CPU notifier", booting on a platform with chained gics (e.g.
Realview EB ARM11MPCore) will result in the gic_cpu_notifier being
registered twice, corrupting the cpu notifier list and rendering the
platform unbootable.

This patch ensures that we only register the notifier for the first
gic, allowing platforms with chained gics to boot. At the same time we
limit the pointlessly duplicated calls to set_smp_cross_call and
set_handle_irq to the first gic registered.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: marc.zyngier@arm.com
Cc: rob.herring@calxeda.com
Cc: olof@lixom.net
Link: http://lkml.kernel.org/r/1385648500-29048-1-git-send-email-mark.rutland@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
(cherry picked from commit 08332dff8adebb74171e98e008d6c20de6658c42)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 25fe39d2d2bf..036bf9c352f2 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -957,12 +957,13 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 	if (WARN_ON(!gic->domain))
 		return;
 
+	if (gic_nr == 0) {
 #ifdef CONFIG_SMP
-	set_smp_cross_call(gic_raise_softirq);
-	register_cpu_notifier(&gic_cpu_notifier);
+		set_smp_cross_call(gic_raise_softirq);
+		register_cpu_notifier(&gic_cpu_notifier);
 #endif
-
-	set_handle_irq(gic_handle_irq);
+		set_handle_irq(gic_handle_irq);
+	}
 
 	gic_chip.flags |= gic_arch_extn.flags;
 	gic_dist_init(gic);

From bd09b6492478715ef4ee191e9bd75a4ac98294a2 Mon Sep 17 00:00:00 2001
From: Sricharan R <r.sricharan@ti.com>
Date: Tue, 3 Dec 2013 15:57:22 +0530
Subject: [PATCH 0425/1185] DRIVERS: IRQCHIP: IRQ-GIC: Add support for routable
 irqs

In some socs the gic can be preceded by a crossbar IP which
routes the peripheral interrupts to the gic inputs. The peripheral
interrupts are associated with a fixed crossbar input line and the
crossbar routes that to one of the free gic input line.

The DT entries for peripherals provides the fixed crossbar input line
as its interrupt number and the mapping code should associate this with
a free gic input line. This patch adds the support inside the gic irqchip
to handle such routable irqs. The routable irqs are registered in a linear
domain. The registered routable domain's callback should be implemented
to get a free irq and to configure the IP to route it.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Rajendra Nayak <rnayak@ti.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Sricharan R <r.sricharan@ti.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
(cherry picked from commit 006e983bbc805431c44e2135e13841f66059a045)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/devicetree/bindings/arm/gic.txt |  6 ++
 drivers/irqchip/irq-gic.c                     | 82 ++++++++++++++++---
 include/linux/irqchip/arm-gic.h               |  7 +-
 3 files changed, 84 insertions(+), 11 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/gic.txt b/Documentation/devicetree/bindings/arm/gic.txt
index 3dfb0c0384f5..535774577238 100644
--- a/Documentation/devicetree/bindings/arm/gic.txt
+++ b/Documentation/devicetree/bindings/arm/gic.txt
@@ -49,6 +49,11 @@ Optional
   regions, used when the GIC doesn't have banked registers. The offset is
   cpu-offset * cpu-nr.
 
+- arm,routable-irqs : Total number of gic irq inputs which are not directly
+		  connected from the peripherals, but are routed dynamically
+		  by a crossbar/multiplexer preceding the GIC. The GIC irq
+		  input line is assigned dynamically when the corresponding
+		  peripheral's crossbar line is mapped.
 Example:
 
 	intc: interrupt-controller@fff11000 {
@@ -56,6 +61,7 @@ Example:
 		#interrupt-cells = <3>;
 		#address-cells = <1>;
 		interrupt-controller;
+		arm,routable-irqs = <160>;
 		reg = <0xfff11000 0x1000>,
 		      <0xfff10100 0x100>;
 	};
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 036bf9c352f2..401078951dbb 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -824,16 +824,25 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 		irq_set_chip_and_handler(irq, &gic_chip,
 					 handle_fasteoi_irq);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+
+		gic_routable_irq_domain_ops->map(d, irq, hw);
 	}
 	irq_set_chip_data(irq, d->host_data);
 	return 0;
 }
 
+static void gic_irq_domain_unmap(struct irq_domain *d, unsigned int irq)
+{
+	gic_routable_irq_domain_ops->unmap(d, irq);
+}
+
 static int gic_irq_domain_xlate(struct irq_domain *d,
 				struct device_node *controller,
 				const u32 *intspec, unsigned int intsize,
 				unsigned long *out_hwirq, unsigned int *out_type)
 {
+	unsigned long ret = 0;
+
 	if (d->of_node != controller)
 		return -EINVAL;
 	if (intsize < 3)
@@ -843,11 +852,20 @@ static int gic_irq_domain_xlate(struct irq_domain *d,
 	*out_hwirq = intspec[1] + 16;
 
 	/* For SPIs, we need to add 16 more to get the GIC irq ID number */
-	if (!intspec[0])
-		*out_hwirq += 16;
+	if (!intspec[0]) {
+		ret = gic_routable_irq_domain_ops->xlate(d, controller,
+							 intspec,
+							 intsize,
+							 out_hwirq,
+							 out_type);
+
+		if (IS_ERR_VALUE(ret))
+			return ret;
+	}
 
 	*out_type = intspec[2] & IRQ_TYPE_SENSE_MASK;
-	return 0;
+
+	return ret;
 }
 
 #ifdef CONFIG_SMP
@@ -871,9 +889,41 @@ static struct notifier_block __cpuinitdata gic_cpu_notifier = {
 
 const struct irq_domain_ops gic_irq_domain_ops = {
 	.map = gic_irq_domain_map,
+	.unmap = gic_irq_domain_unmap,
 	.xlate = gic_irq_domain_xlate,
 };
 
+/* Default functions for routable irq domain */
+static int gic_routable_irq_domain_map(struct irq_domain *d, unsigned int irq,
+			      irq_hw_number_t hw)
+{
+	return 0;
+}
+
+static void gic_routable_irq_domain_unmap(struct irq_domain *d,
+					  unsigned int irq)
+{
+}
+
+static int gic_routable_irq_domain_xlate(struct irq_domain *d,
+				struct device_node *controller,
+				const u32 *intspec, unsigned int intsize,
+				unsigned long *out_hwirq,
+				unsigned int *out_type)
+{
+	*out_hwirq += 16;
+	return 0;
+}
+
+const struct irq_domain_ops gic_default_routable_irq_domain_ops = {
+	.map = gic_routable_irq_domain_map,
+	.unmap = gic_routable_irq_domain_unmap,
+	.xlate = gic_routable_irq_domain_xlate,
+};
+
+const struct irq_domain_ops *gic_routable_irq_domain_ops =
+					&gic_default_routable_irq_domain_ops;
+
 void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 			   void __iomem *dist_base, void __iomem *cpu_base,
 			   u32 percpu_offset, struct device_node *node)
@@ -881,6 +931,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 	irq_hw_number_t hwirq_base;
 	struct gic_chip_data *gic;
 	int gic_irqs, irq_base, i;
+	int nr_routable_irqs;
 
 	BUG_ON(gic_nr >= MAX_GIC_NR);
 
@@ -946,14 +997,25 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 	gic->gic_irqs = gic_irqs;
 
 	gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */
-	irq_base = irq_alloc_descs(irq_start, 16, gic_irqs, numa_node_id());
-	if (IS_ERR_VALUE(irq_base)) {
-		WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
-		     irq_start);
-		irq_base = irq_start;
+
+	if (of_property_read_u32(node, "arm,routable-irqs",
+				 &nr_routable_irqs)) {
+		irq_base = irq_alloc_descs(irq_start, 16, gic_irqs,
+					   numa_node_id());
+		if (IS_ERR_VALUE(irq_base)) {
+			WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
+			     irq_start);
+			irq_base = irq_start;
+		}
+
+		gic->domain = irq_domain_add_legacy(node, gic_irqs, irq_base,
+					hwirq_base, &gic_irq_domain_ops, gic);
+	} else {
+		gic->domain = irq_domain_add_linear(node, nr_routable_irqs,
+						    &gic_irq_domain_ops,
+						    gic);
 	}
-	gic->domain = irq_domain_add_legacy(node, gic_irqs, irq_base,
-				    hwirq_base, &gic_irq_domain_ops, gic);
+
 	if (WARN_ON(!gic->domain))
 		return;
 
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index cac496b1e279..fcb02d7637ec 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -81,6 +81,11 @@ int gic_get_cpu_id(unsigned int cpu);
 void gic_migrate_target(unsigned int new_cpu_id);
 unsigned long gic_get_sgir_physaddr(void);
 
+extern const struct irq_domain_ops *gic_routable_irq_domain_ops;
+static inline void __init register_routable_domain_ops
+					(const struct irq_domain_ops *ops)
+{
+	gic_routable_irq_domain_ops = ops;
+}
 #endif /* __ASSEMBLY */
-
 #endif

From 32fd049c3a74b9f88c1adcc5f04becb9f55c9db1 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 20 Feb 2014 17:42:07 +0000
Subject: [PATCH 0426/1185] irqchip: gic: use dmb ishst instead of dsb when
 raising a softirq

When sending an SGI to another CPU, we require a barrier to ensure that
any pending stores to normal memory are made visible to the recipient
before the interrupt arrives.

Rather than use a vanilla dsb() (which will soon cause an assembly error
on arm64) before the writel_relaxed, we can instead use dsb(ishst),
since we just need to ensure that any pending normal writes are visible
within the inner-shareable domain before we poke the GIC.

With this observation, we can then further weaken the barrier to a
dmb(ishst), since other CPUs in the inner-shareable domain must observe
the write to the distributor before the SGI is generated.

Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
(cherry picked from commit 8adbf57fc4294588e9785069215d445a98e6c23a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 401078951dbb..fe55ae6a658d 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -661,9 +661,9 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 
 	/*
 	 * Ensure that stores to Normal memory are visible to the
-	 * other CPUs before issuing the IPI.
+	 * other CPUs before they observe us issuing the IPI.
 	 */
-	dsb();
+	dmb(ishst);
 
 	/* this always happens on GIC0 */
 	writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);

From 7c2849557e72c7b13c3311c7eb5c96d489416e80 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 4 Mar 2014 17:02:01 -0800
Subject: [PATCH 0427/1185] irqchip: gic: Silence sparse warnings

drivers/irqchip/irq-gic.c:53:23: warning: duplicate [noderef]
drivers/irqchip/irq-gic.c:651:6: warning: symbol 'gic_raise_softirq' was not declared. Should it be static?
drivers/irqchip/irq-gic.c:872:29: warning: symbol 'gic_irq_domain_ops' was not declared. Should it be static?
drivers/irqchip/irq-gic.c:977:12: warning: symbol 'gic_of_init' was not declared. Should it be static?

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1393981321-25721-1-git-send-email-sboyd@codeaurora.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
(cherry picked from commit 6859358e4b0bf2e599027dc4c6317e0bc25ff339)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index fe55ae6a658d..29b483f603d4 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -50,7 +50,7 @@
 
 union gic_base {
 	void __iomem *common_base;
-	void __percpu __iomem **percpu_base;
+	void __percpu * __iomem *percpu_base;
 };
 
 struct gic_chip_data {
@@ -648,7 +648,7 @@ static void __init gic_pm_init(struct gic_chip_data *gic)
 #endif
 
 #ifdef CONFIG_SMP
-void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
+static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
 	int cpu;
 	unsigned long flags, map = 0;
@@ -887,7 +887,7 @@ static struct notifier_block __cpuinitdata gic_cpu_notifier = {
 };
 #endif
 
-const struct irq_domain_ops gic_irq_domain_ops = {
+static const struct irq_domain_ops gic_irq_domain_ops = {
 	.map = gic_irq_domain_map,
 	.unmap = gic_irq_domain_unmap,
 	.xlate = gic_irq_domain_xlate,
@@ -1036,7 +1036,8 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 #ifdef CONFIG_OF
 static int gic_cnt __initdata;
 
-int __init gic_of_init(struct device_node *node, struct device_node *parent)
+static int __init
+gic_of_init(struct device_node *node, struct device_node *parent)
 {
 	void __iomem *cpu_base;
 	void __iomem *dist_base;

From c0627e3f793af66835fb7440d13f70c23a8ec2fd Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 4 Mar 2014 16:40:30 -0800
Subject: [PATCH 0428/1185] irqchip: Remove asmlinkage from static functions

LTO patches add __visible to the asmlinkage define, causing
compilation warnings like:

  drivers/irqchip/irq-gic.c:283:1: warning: 'externally_visible'
  attribute have effect only on public objects [-Wattributes]

  [ Modified for LSK from it's origin commit (see below) to only touch
    the irq-gic.c file so that we have a natural progression of the code
    before factoring out code bits into a library file for GICv3 support
    later - Christoffer ]

Drop asmlinkage here to avoid such warnings.

Reported-by: Olof's autobuilder <build@lixom.net>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: khilman@linaro.org
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Josh Cartwright <joshc@codeaurora.org>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1393980030-17770-1-git-send-email-sboyd@codeaurora.org
(cherry picked from commit 8783dd3a37a5853689e1a8fa728827a50905b912)
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 29b483f603d4..922f7ba37d5c 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -279,7 +279,7 @@ static int gic_set_wake(struct irq_data *d, unsigned int on)
 #define gic_set_wake	NULL
 #endif
 
-static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 {
 	u32 irqstat, irqnr;
 	struct gic_chip_data *gic = &gic_data[0];

From 03c2fd4d233df62d02ed2a6233f40a5fab27eeec Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@linaro.org>
Date: Sun, 11 May 2014 16:05:58 +0800
Subject: [PATCH 0429/1185] irqchip: gic: Use mask field in GICC_IAR

Bit[9:0] is interrupt ID field in GICC_IAR. Bit[12:10] is CPU ID field,
and others are reserved.

So we should use GICC_IAR_INT_ID_MASK to get interrupt ID. It's not a good way
to use ~0x1c00 (CPU ID field) to get interrupt ID.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
Link: https://lkml.kernel.org/r/1399795571-17231-3-git-send-email-haojian.zhuang@linaro.org
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
(cherry picked from commit b8802f76fe473d91886220498aeda157c492f2d1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c       | 2 +-
 include/linux/irqchip/arm-gic.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 922f7ba37d5c..949a53d03a26 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -287,7 +287,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 
 	do {
 		irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);
-		irqnr = irqstat & ~0x1c00;
+		irqnr = irqstat & GICC_IAR_INT_ID_MASK;
 
 		if (likely(irqnr > 15 && irqnr < 1021)) {
 			irqnr = irq_find_mapping(gic->domain, irqnr);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index fcb02d7637ec..14faee7c9714 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -18,6 +18,8 @@
 #define GIC_CPU_RUNNINGPRI		0x14
 #define GIC_CPU_HIGHPRI			0x18
 
+#define GICC_IAR_INT_ID_MASK		0x3ff
+
 #define GIC_DIST_CTRL			0x000
 #define GIC_DIST_CTR			0x004
 #define GIC_DIST_IGROUP			0x080

From 1fbada54b2805be1db64ce46ccb1f94976c1067a Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Wed, 5 Mar 2014 22:43:45 +0300
Subject: [PATCH 0430/1185] irq-gic: remove file name from heading comment

File names in the heading comments  fell out of favor long ago, and this one
weren't even changed when the driver was moved from arch/arm/common/, so remove
it at last...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
(cherry picked from commit d31e373d077848f5d40abd8621b4ebd4d2179dd7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 949a53d03a26..01a623d8e59f 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1,6 +1,4 @@
 /*
- *  linux/arch/arm/common/gic.c
- *
  *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify

From a6c2e91f8d5db557bd90a3c384995ad20d15445a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 30 Jun 2014 16:01:30 +0100
Subject: [PATCH 0431/1185] irqchip: gic: Move some bits of GICv2 to a
 library-type file

A few GICv2 low-level function are actually very useful to GICv3,
and it makes some sense to share them across the two drivers.
They end-up in their own file, with an additional parameter used
to ensure an optional synchronization (unused on GICv2).

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1404140510-5382-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
(cherry picked from commit d51d0af43b30dcae1ca13ea67fd717e03b37f153)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/Makefile         |   2 +-
 drivers/irqchip/irq-gic-common.c | 115 +++++++++++++++++++++++++++++++
 drivers/irqchip/irq-gic-common.h |  29 ++++++++
 drivers/irqchip/irq-gic.c        |  59 ++--------------
 4 files changed, 149 insertions(+), 56 deletions(-)
 create mode 100644 drivers/irqchip/irq-gic-common.c
 create mode 100644 drivers/irqchip/irq-gic-common.h

diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index cda4cb5f7327..b7c35766d2f2 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_METAG)			+= irq-metag-ext.o
 obj-$(CONFIG_METAG_PERFCOUNTER_IRQS)	+= irq-metag.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sun4i.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
-obj-$(CONFIG_ARM_GIC)			+= irq-gic.o
+obj-$(CONFIG_ARM_GIC)			+= irq-gic.o irq-gic-common.o
 obj-$(CONFIG_ARM_VIC)			+= irq-vic.o
 obj-$(CONFIG_SIRF_IRQ)			+= irq-sirfsoc.o
 obj-$(CONFIG_RENESAS_INTC_IRQPIN)	+= irq-renesas-intc-irqpin.o
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
new file mode 100644
index 000000000000..60ac704d2090
--- /dev/null
+++ b/drivers/irqchip/irq-gic-common.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include "irq-gic-common.h"
+
+void gic_configure_irq(unsigned int irq, unsigned int type,
+		       void __iomem *base, void (*sync_access)(void))
+{
+	u32 enablemask = 1 << (irq % 32);
+	u32 enableoff = (irq / 32) * 4;
+	u32 confmask = 0x2 << ((irq % 16) * 2);
+	u32 confoff = (irq / 16) * 4;
+	bool enabled = false;
+	u32 val;
+
+	/*
+	 * Read current configuration register, and insert the config
+	 * for "irq", depending on "type".
+	 */
+	val = readl_relaxed(base + GIC_DIST_CONFIG + confoff);
+	if (type == IRQ_TYPE_LEVEL_HIGH)
+		val &= ~confmask;
+	else if (type == IRQ_TYPE_EDGE_RISING)
+		val |= confmask;
+
+	/*
+	 * As recommended by the spec, disable the interrupt before changing
+	 * the configuration
+	 */
+	if (readl_relaxed(base + GIC_DIST_ENABLE_SET + enableoff) & enablemask) {
+		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_CLEAR + enableoff);
+		if (sync_access)
+			sync_access();
+		enabled = true;
+	}
+
+	/*
+	 * Write back the new configuration, and possibly re-enable
+	 * the interrupt.
+	 */
+	writel_relaxed(val, base + GIC_DIST_CONFIG + confoff);
+
+	if (enabled)
+		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff);
+
+	if (sync_access)
+		sync_access();
+}
+
+void __init gic_dist_config(void __iomem *base, int gic_irqs,
+			    void (*sync_access)(void))
+{
+	unsigned int i;
+
+	/*
+	 * Set all global interrupts to be level triggered, active low.
+	 */
+	for (i = 32; i < gic_irqs; i += 16)
+		writel_relaxed(0, base + GIC_DIST_CONFIG + i / 4);
+
+	/*
+	 * Set priority on all global interrupts.
+	 */
+	for (i = 32; i < gic_irqs; i += 4)
+		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i);
+
+	/*
+	 * Disable all interrupts.  Leave the PPI and SGIs alone
+	 * as they are enabled by redistributor registers.
+	 */
+	for (i = 32; i < gic_irqs; i += 32)
+		writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i / 8);
+
+	if (sync_access)
+		sync_access();
+}
+
+void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
+{
+	int i;
+
+	/*
+	 * Deal with the banked PPI and SGI interrupts - disable all
+	 * PPI interrupts, ensure all SGI interrupts are enabled.
+	 */
+	writel_relaxed(0xffff0000, base + GIC_DIST_ENABLE_CLEAR);
+	writel_relaxed(0x0000ffff, base + GIC_DIST_ENABLE_SET);
+
+	/*
+	 * Set priority on PPI and SGI interrupts
+	 */
+	for (i = 0; i < 32; i += 4)
+		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
+
+	if (sync_access)
+		sync_access();
+}
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
new file mode 100644
index 000000000000..b41f02481c3a
--- /dev/null
+++ b/drivers/irqchip/irq-gic-common.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _IRQ_GIC_COMMON_H
+#define _IRQ_GIC_COMMON_H
+
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+
+void gic_configure_irq(unsigned int irq, unsigned int type,
+                       void __iomem *base, void (*sync_access)(void));
+void gic_dist_config(void __iomem *base, int gic_irqs,
+		     void (*sync_access)(void));
+void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
+
+#endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 01a623d8e59f..0f60eea69a85 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -44,6 +44,7 @@
 #include <asm/exception.h>
 #include <asm/smp_plat.h>
 
+#include "irq-gic-common.h"
 #include "irqchip.h"
 
 union gic_base {
@@ -186,12 +187,6 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 {
 	void __iomem *base = gic_dist_base(d);
 	unsigned int gicirq = gic_irq(d);
-	u32 enablemask = 1 << (gicirq % 32);
-	u32 enableoff = (gicirq / 32) * 4;
-	u32 confmask = 0x2 << ((gicirq % 16) * 2);
-	u32 confoff = (gicirq / 16) * 4;
-	bool enabled = false;
-	u32 val;
 
 	/* Interrupt configuration for SGIs can't be changed */
 	if (gicirq < 16)
@@ -205,25 +200,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 	if (gic_arch_extn.irq_set_type)
 		gic_arch_extn.irq_set_type(d, type);
 
-	val = readl_relaxed(base + GIC_DIST_CONFIG + confoff);
-	if (type == IRQ_TYPE_LEVEL_HIGH)
-		val &= ~confmask;
-	else if (type == IRQ_TYPE_EDGE_RISING)
-		val |= confmask;
-
-	/*
-	 * As recommended by the spec, disable the interrupt before changing
-	 * the configuration
-	 */
-	if (readl_relaxed(base + GIC_DIST_ENABLE_SET + enableoff) & enablemask) {
-		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_CLEAR + enableoff);
-		enabled = true;
-	}
-
-	writel_relaxed(val, base + GIC_DIST_CONFIG + confoff);
-
-	if (enabled)
-		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff);
+	gic_configure_irq(gicirq, type, base, NULL);
 
 	raw_spin_unlock(&irq_controller_lock);
 
@@ -380,12 +357,6 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 
 	writel_relaxed(0, base + GIC_DIST_CTRL);
 
-	/*
-	 * Set all global interrupts to be level triggered, active low.
-	 */
-	for (i = 32; i < gic_irqs; i += 16)
-		writel_relaxed(0, base + GIC_DIST_CONFIG + i * 4 / 16);
-
 	/*
 	 * Set all global interrupts to this CPU only.
 	 */
@@ -395,18 +366,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 	for (i = 32; i < gic_irqs; i += 4)
 		writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4);
 
-	/*
-	 * Set priority on all global interrupts.
-	 */
-	for (i = 32; i < gic_irqs; i += 4)
-		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
-
-	/*
-	 * Disable all interrupts.  Leave the PPI and SGIs alone
-	 * as these enables are banked registers.
-	 */
-	for (i = 32; i < gic_irqs; i += 32)
-		writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32);
+	gic_dist_config(base, gic_irqs, NULL);
 
 	writel_relaxed(1, base + GIC_DIST_CTRL);
 }
@@ -433,18 +393,7 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
 		if (i != cpu)
 			gic_cpu_map[i] &= ~cpu_mask;
 
-	/*
-	 * Deal with the banked PPI and SGI interrupts - disable all
-	 * PPI interrupts, ensure all SGI interrupts are enabled.
-	 */
-	writel_relaxed(0xffff0000, dist_base + GIC_DIST_ENABLE_CLEAR);
-	writel_relaxed(0x0000ffff, dist_base + GIC_DIST_ENABLE_SET);
-
-	/*
-	 * Set priority on PPI and SGI interrupts
-	 */
-	for (i = 0; i < 32; i += 4)
-		writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4 / 4);
+	gic_cpu_config(dist_base, NULL);
 
 	writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
 	writel_relaxed(1, base + GIC_CPU_CTRL);

From 6f4334fa426b09dbcec09c4b2f0f3aca34d5eae0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 30 Jun 2014 16:01:31 +0100
Subject: [PATCH 0432/1185] irqchip: gic-v3: Initial support for GICv3

The Generic Interrupt Controller (version 3) offers services that are
similar to GICv2, with a number of additional features:
- Affinity routing based on the CPU MPIDR (ARE)
- System register for the CPU interfaces (SRE)
- Support for more that 8 CPUs
- Locality-specific Peripheral Interrupts (LPIs)
- Interrupt Translation Services (ITS)

This patch adds preliminary support for GICv3 with ARE and SRE,
non-secure mode only. It relies on higher exception levels to grant ARE
and SRE access.

Support for LPI and ITS will be added at a later time.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Reviewed-by: Zi Shen Lim <zlim@broadcom.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Tirumalesh Chalamarla <tchalamarla@cavium.com>
Reviewed-by: Yun Wu <wuyun.wu@huawei.com>
Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
Tested-by: Tirumalesh Chalamarla<tchalamarla@cavium.com>
Tested-by: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Acked-by: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lkml.kernel.org/r/1404140510-5382-3-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
(cherry picked from commit 021f653791ad17e03f98aaa7fb933816ae16f161)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/Kconfig                 |   1 +
 arch/arm64/kernel/head.S           |  18 +
 arch/arm64/kernel/hyp-stub.S       |   1 +
 drivers/irqchip/Kconfig            |   5 +
 drivers/irqchip/Makefile           |   1 +
 drivers/irqchip/irq-gic-v3.c       | 692 +++++++++++++++++++++++++++++
 include/linux/irqchip/arm-gic-v3.h | 198 +++++++++
 7 files changed, 916 insertions(+)
 create mode 100644 drivers/irqchip/irq-gic-v3.c
 create mode 100644 include/linux/irqchip/arm-gic-v3.h

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 43068cf44c2d..4f10050a34d0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -8,6 +8,7 @@ config ARM64
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
+	select ARM_GIC_V3
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 39a8a83f1883..e18d04f3f2c6 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -22,6 +22,7 @@
 
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <linux/irqchip/arm-gic-v3.h>
 
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
@@ -162,6 +163,23 @@ ENTRY(el2_setup)
 	msr	cnthctl_el2, x0
 	msr	cntvoff_el2, xzr		// Clear virtual offset
 
+#ifdef CONFIG_ARM_GIC_V3
+	/* GICv3 system register access */
+	mrs	x0, id_aa64pfr0_el1
+	ubfx	x0, x0, #24, #4
+	cmp	x0, #1
+	b.ne	3f
+
+	mrs	x0, ICC_SRE_EL2
+	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
+	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
+	msr	ICC_SRE_EL2, x0
+	isb					// Make sure SRE is now set
+	msr	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
+
+3:
+#endif
+
 	/* Populate ID registers. */
 	mrs	x0, midr_el1
 	mrs	x1, mpidr_el1
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 0959611d9ff1..a272f335c289 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -19,6 +19,7 @@
 
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
 
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 4a33351c25dc..7709b1dbf6cc 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -10,6 +10,11 @@ config ARM_GIC
 config GIC_NON_BANKED
 	bool
 
+config ARM_GIC_V3
+	bool
+	select IRQ_DOMAIN
+	select MULTI_IRQ_HANDLER
+
 config ARM_VIC
 	bool
 	select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index b7c35766d2f2..bf4667b34306 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_METAG_PERFCOUNTER_IRQS)	+= irq-metag.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sun4i.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
 obj-$(CONFIG_ARM_GIC)			+= irq-gic.o irq-gic-common.o
+obj-$(CONFIG_ARM_GIC_V3)		+= irq-gic-v3.o irq-gic-common.o
 obj-$(CONFIG_ARM_VIC)			+= irq-vic.o
 obj-$(CONFIG_SIRF_IRQ)			+= irq-sirfsoc.o
 obj-$(CONFIG_RENESAS_INTC_IRQPIN)	+= irq-renesas-intc-irqpin.o
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
new file mode 100644
index 000000000000..81519bae0453
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -0,0 +1,692 @@
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/cputype.h>
+#include <asm/exception.h>
+#include <asm/smp_plat.h>
+
+#include "irq-gic-common.h"
+#include "irqchip.h"
+
+struct gic_chip_data {
+	void __iomem		*dist_base;
+	void __iomem		**redist_base;
+	void __percpu __iomem	**rdist;
+	struct irq_domain	*domain;
+	u64			redist_stride;
+	u32			redist_regions;
+	unsigned int		irq_nr;
+};
+
+static struct gic_chip_data gic_data __read_mostly;
+
+#define gic_data_rdist()		(this_cpu_ptr(gic_data.rdist))
+#define gic_data_rdist_rd_base()	(*gic_data_rdist())
+#define gic_data_rdist_sgi_base()	(gic_data_rdist_rd_base() + SZ_64K)
+
+/* Our default, arbitrary priority value. Linux only uses one anyway. */
+#define DEFAULT_PMR_VALUE	0xf0
+
+static inline unsigned int gic_irq(struct irq_data *d)
+{
+	return d->hwirq;
+}
+
+static inline int gic_irq_in_rdist(struct irq_data *d)
+{
+	return gic_irq(d) < 32;
+}
+
+static inline void __iomem *gic_dist_base(struct irq_data *d)
+{
+	if (gic_irq_in_rdist(d))	/* SGI+PPI -> SGI_base for this CPU */
+		return gic_data_rdist_sgi_base();
+
+	if (d->hwirq <= 1023)		/* SPI -> dist_base */
+		return gic_data.dist_base;
+
+	if (d->hwirq >= 8192)
+		BUG();		/* LPI Detected!!! */
+
+	return NULL;
+}
+
+static void gic_do_wait_for_rwp(void __iomem *base)
+{
+	u32 count = 1000000;	/* 1s! */
+
+	while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) {
+		count--;
+		if (!count) {
+			pr_err_ratelimited("RWP timeout, gone fishing\n");
+			return;
+		}
+		cpu_relax();
+		udelay(1);
+	};
+}
+
+/* Wait for completion of a distributor change */
+static void gic_dist_wait_for_rwp(void)
+{
+	gic_do_wait_for_rwp(gic_data.dist_base);
+}
+
+/* Wait for completion of a redistributor change */
+static void gic_redist_wait_for_rwp(void)
+{
+	gic_do_wait_for_rwp(gic_data_rdist_rd_base());
+}
+
+/* Low level accessors */
+static u64 gic_read_iar(void)
+{
+	u64 irqstat;
+
+	asm volatile("mrs %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+	return irqstat;
+}
+
+static void gic_write_pmr(u64 val)
+{
+	asm volatile("msr " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
+}
+
+static void gic_write_ctlr(u64 val)
+{
+	asm volatile("msr " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
+	isb();
+}
+
+static void gic_write_grpen1(u64 val)
+{
+	asm volatile("msr " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
+	isb();
+}
+
+static void gic_write_sgi1r(u64 val)
+{
+	asm volatile("msr " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
+}
+
+static void gic_enable_sre(void)
+{
+	u64 val;
+
+	asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	val |= ICC_SRE_EL1_SRE;
+	asm volatile("msr " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
+	isb();
+
+	/*
+	 * Need to check that the SRE bit has actually been set. If
+	 * not, it means that SRE is disabled at EL2. We're going to
+	 * die painfully, and there is nothing we can do about it.
+	 *
+	 * Kindly inform the luser.
+	 */
+	asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	if (!(val & ICC_SRE_EL1_SRE))
+		pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
+}
+
+static void gic_enable_redist(void)
+{
+	void __iomem *rbase;
+	u32 count = 1000000;	/* 1s! */
+	u32 val;
+
+	rbase = gic_data_rdist_rd_base();
+
+	/* Wake up this CPU redistributor */
+	val = readl_relaxed(rbase + GICR_WAKER);
+	val &= ~GICR_WAKER_ProcessorSleep;
+	writel_relaxed(val, rbase + GICR_WAKER);
+
+	while (readl_relaxed(rbase + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+		count--;
+		if (!count) {
+			pr_err_ratelimited("redist didn't wake up...\n");
+			return;
+		}
+		cpu_relax();
+		udelay(1);
+	};
+}
+
+/*
+ * Routines to disable, enable, EOI and route interrupts
+ */
+static void gic_poke_irq(struct irq_data *d, u32 offset)
+{
+	u32 mask = 1 << (gic_irq(d) % 32);
+	void (*rwp_wait)(void);
+	void __iomem *base;
+
+	if (gic_irq_in_rdist(d)) {
+		base = gic_data_rdist_sgi_base();
+		rwp_wait = gic_redist_wait_for_rwp;
+	} else {
+		base = gic_data.dist_base;
+		rwp_wait = gic_dist_wait_for_rwp;
+	}
+
+	writel_relaxed(mask, base + offset + (gic_irq(d) / 32) * 4);
+	rwp_wait();
+}
+
+static int gic_peek_irq(struct irq_data *d, u32 offset)
+{
+	u32 mask = 1 << (gic_irq(d) % 32);
+	void __iomem *base;
+
+	if (gic_irq_in_rdist(d))
+		base = gic_data_rdist_sgi_base();
+	else
+		base = gic_data.dist_base;
+
+	return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask);
+}
+
+static void gic_mask_irq(struct irq_data *d)
+{
+	gic_poke_irq(d, GICD_ICENABLER);
+}
+
+static void gic_unmask_irq(struct irq_data *d)
+{
+	gic_poke_irq(d, GICD_ISENABLER);
+}
+
+static void gic_eoi_irq(struct irq_data *d)
+{
+	gic_write_eoir(gic_irq(d));
+}
+
+static int gic_set_type(struct irq_data *d, unsigned int type)
+{
+	unsigned int irq = gic_irq(d);
+	void (*rwp_wait)(void);
+	void __iomem *base;
+
+	/* Interrupt configuration for SGIs can't be changed */
+	if (irq < 16)
+		return -EINVAL;
+
+	if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
+		return -EINVAL;
+
+	if (gic_irq_in_rdist(d)) {
+		base = gic_data_rdist_sgi_base();
+		rwp_wait = gic_redist_wait_for_rwp;
+	} else {
+		base = gic_data.dist_base;
+		rwp_wait = gic_dist_wait_for_rwp;
+	}
+
+	gic_configure_irq(irq, type, base, rwp_wait);
+
+	return 0;
+}
+
+static u64 gic_mpidr_to_affinity(u64 mpidr)
+{
+	u64 aff;
+
+	aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8  |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
+
+	return aff;
+}
+
+static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+{
+	u64 irqnr;
+
+	do {
+		irqnr = gic_read_iar();
+
+		if (likely(irqnr > 15 && irqnr < 1020)) {
+			u64 irq = irq_find_mapping(gic_data.domain, irqnr);
+			if (likely(irq)) {
+				handle_IRQ(irq, regs);
+				continue;
+			}
+
+			WARN_ONCE(true, "Unexpected SPI received!\n");
+			gic_write_eoir(irqnr);
+		}
+		if (irqnr < 16) {
+			gic_write_eoir(irqnr);
+#ifdef CONFIG_SMP
+			handle_IPI(irqnr, regs);
+#else
+			WARN_ONCE(true, "Unexpected SGI received!\n");
+#endif
+			continue;
+		}
+	} while (irqnr != ICC_IAR1_EL1_SPURIOUS);
+}
+
+static void __init gic_dist_init(void)
+{
+	unsigned int i;
+	u64 affinity;
+	void __iomem *base = gic_data.dist_base;
+
+	/* Disable the distributor */
+	writel_relaxed(0, base + GICD_CTLR);
+	gic_dist_wait_for_rwp();
+
+	gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp);
+
+	/* Enable distributor with ARE, Group1 */
+	writel_relaxed(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1,
+		       base + GICD_CTLR);
+
+	/*
+	 * Set all global interrupts to the boot CPU only. ARE must be
+	 * enabled.
+	 */
+	affinity = gic_mpidr_to_affinity(cpu_logical_map(smp_processor_id()));
+	for (i = 32; i < gic_data.irq_nr; i++)
+		writeq_relaxed(affinity, base + GICD_IROUTER + i * 8);
+}
+
+static int gic_populate_rdist(void)
+{
+	u64 mpidr = cpu_logical_map(smp_processor_id());
+	u64 typer;
+	u32 aff;
+	int i;
+
+	/*
+	 * Convert affinity to a 32bit value that can be matched to
+	 * GICR_TYPER bits [63:32].
+	 */
+	aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
+
+	for (i = 0; i < gic_data.redist_regions; i++) {
+		void __iomem *ptr = gic_data.redist_base[i];
+		u32 reg;
+
+		reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK;
+		if (reg != GIC_PIDR2_ARCH_GICv3 &&
+		    reg != GIC_PIDR2_ARCH_GICv4) { /* We're in trouble... */
+			pr_warn("No redistributor present @%p\n", ptr);
+			break;
+		}
+
+		do {
+			typer = readq_relaxed(ptr + GICR_TYPER);
+			if ((typer >> 32) == aff) {
+				gic_data_rdist_rd_base() = ptr;
+				pr_info("CPU%d: found redistributor %llx @%p\n",
+					smp_processor_id(),
+					(unsigned long long)mpidr, ptr);
+				return 0;
+			}
+
+			if (gic_data.redist_stride) {
+				ptr += gic_data.redist_stride;
+			} else {
+				ptr += SZ_64K * 2; /* Skip RD_base + SGI_base */
+				if (typer & GICR_TYPER_VLPIS)
+					ptr += SZ_64K * 2; /* Skip VLPI_base + reserved page */
+			}
+		} while (!(typer & GICR_TYPER_LAST));
+	}
+
+	/* We couldn't even deal with ourselves... */
+	WARN(true, "CPU%d: mpidr %llx has no re-distributor!\n",
+	     smp_processor_id(), (unsigned long long)mpidr);
+	return -ENODEV;
+}
+
+static void gic_cpu_init(void)
+{
+	void __iomem *rbase;
+
+	/* Register ourselves with the rest of the world */
+	if (gic_populate_rdist())
+		return;
+
+	gic_enable_redist();
+
+	rbase = gic_data_rdist_sgi_base();
+
+	gic_cpu_config(rbase, gic_redist_wait_for_rwp);
+
+	/* Enable system registers */
+	gic_enable_sre();
+
+	/* Set priority mask register */
+	gic_write_pmr(DEFAULT_PMR_VALUE);
+
+	/* EOI deactivates interrupt too (mode 0) */
+	gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+
+	/* ... and let's hit the road... */
+	gic_write_grpen1(1);
+}
+
+#ifdef CONFIG_SMP
+static int gic_secondary_init(struct notifier_block *nfb,
+			      unsigned long action, void *hcpu)
+{
+	if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
+		gic_cpu_init();
+	return NOTIFY_OK;
+}
+
+/*
+ * Notifier for enabling the GIC CPU interface. Set an arbitrarily high
+ * priority because the GIC needs to be up before the ARM generic timers.
+ */
+static struct notifier_block gic_cpu_notifier = {
+	.notifier_call = gic_secondary_init,
+	.priority = 100,
+};
+
+static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
+				   u64 cluster_id)
+{
+	int cpu = *base_cpu;
+	u64 mpidr = cpu_logical_map(cpu);
+	u16 tlist = 0;
+
+	while (cpu < nr_cpu_ids) {
+		/*
+		 * If we ever get a cluster of more than 16 CPUs, just
+		 * scream and skip that CPU.
+		 */
+		if (WARN_ON((mpidr & 0xff) >= 16))
+			goto out;
+
+		tlist |= 1 << (mpidr & 0xf);
+
+		cpu = cpumask_next(cpu, mask);
+		if (cpu == nr_cpu_ids)
+			goto out;
+
+		mpidr = cpu_logical_map(cpu);
+
+		if (cluster_id != (mpidr & ~0xffUL)) {
+			cpu--;
+			goto out;
+		}
+	}
+out:
+	*base_cpu = cpu;
+	return tlist;
+}
+
+static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
+{
+	u64 val;
+
+	val = (MPIDR_AFFINITY_LEVEL(cluster_id, 3) << 48	|
+	       MPIDR_AFFINITY_LEVEL(cluster_id, 2) << 32	|
+	       irq << 24			    		|
+	       MPIDR_AFFINITY_LEVEL(cluster_id, 1) << 16	|
+	       tlist);
+
+	pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
+	gic_write_sgi1r(val);
+}
+
+static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
+{
+	int cpu;
+
+	if (WARN_ON(irq >= 16))
+		return;
+
+	/*
+	 * Ensure that stores to Normal memory are visible to the
+	 * other CPUs before issuing the IPI.
+	 */
+	smp_wmb();
+
+	for_each_cpu_mask(cpu, *mask) {
+		u64 cluster_id = cpu_logical_map(cpu) & ~0xffUL;
+		u16 tlist;
+
+		tlist = gic_compute_target_list(&cpu, mask, cluster_id);
+		gic_send_sgi(cluster_id, tlist, irq);
+	}
+
+	/* Force the above writes to ICC_SGI1R_EL1 to be executed */
+	isb();
+}
+
+static void gic_smp_init(void)
+{
+	set_smp_cross_call(gic_raise_softirq);
+	register_cpu_notifier(&gic_cpu_notifier);
+}
+
+static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
+			    bool force)
+{
+	unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask);
+	void __iomem *reg;
+	int enabled;
+	u64 val;
+
+	if (gic_irq_in_rdist(d))
+		return -EINVAL;
+
+	/* If interrupt was enabled, disable it first */
+	enabled = gic_peek_irq(d, GICD_ISENABLER);
+	if (enabled)
+		gic_mask_irq(d);
+
+	reg = gic_dist_base(d) + GICD_IROUTER + (gic_irq(d) * 8);
+	val = gic_mpidr_to_affinity(cpu_logical_map(cpu));
+
+	writeq_relaxed(val, reg);
+
+	/*
+	 * If the interrupt was enabled, enabled it again. Otherwise,
+	 * just wait for the distributor to have digested our changes.
+	 */
+	if (enabled)
+		gic_unmask_irq(d);
+	else
+		gic_dist_wait_for_rwp();
+
+	return IRQ_SET_MASK_OK;
+}
+#else
+#define gic_set_affinity	NULL
+#define gic_smp_init()		do { } while(0)
+#endif
+
+static struct irq_chip gic_chip = {
+	.name			= "GICv3",
+	.irq_mask		= gic_mask_irq,
+	.irq_unmask		= gic_unmask_irq,
+	.irq_eoi		= gic_eoi_irq,
+	.irq_set_type		= gic_set_type,
+	.irq_set_affinity	= gic_set_affinity,
+};
+
+static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
+			      irq_hw_number_t hw)
+{
+	/* SGIs are private to the core kernel */
+	if (hw < 16)
+		return -EPERM;
+	/* PPIs */
+	if (hw < 32) {
+		irq_set_percpu_devid(irq);
+		irq_set_chip_and_handler(irq, &gic_chip,
+					 handle_percpu_devid_irq);
+		set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
+	}
+	/* SPIs */
+	if (hw >= 32 && hw < gic_data.irq_nr) {
+		irq_set_chip_and_handler(irq, &gic_chip,
+					 handle_fasteoi_irq);
+		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+	}
+	irq_set_chip_data(irq, d->host_data);
+	return 0;
+}
+
+static int gic_irq_domain_xlate(struct irq_domain *d,
+				struct device_node *controller,
+				const u32 *intspec, unsigned int intsize,
+				unsigned long *out_hwirq, unsigned int *out_type)
+{
+	if (d->of_node != controller)
+		return -EINVAL;
+	if (intsize < 3)
+		return -EINVAL;
+
+	switch(intspec[0]) {
+	case 0:			/* SPI */
+		*out_hwirq = intspec[1] + 32;
+		break;
+	case 1:			/* PPI */
+		*out_hwirq = intspec[1] + 16;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*out_type = intspec[2] & IRQ_TYPE_SENSE_MASK;
+	return 0;
+}
+
+static const struct irq_domain_ops gic_irq_domain_ops = {
+	.map = gic_irq_domain_map,
+	.xlate = gic_irq_domain_xlate,
+};
+
+static int __init gic_of_init(struct device_node *node, struct device_node *parent)
+{
+	void __iomem *dist_base;
+	void __iomem **redist_base;
+	u64 redist_stride;
+	u32 redist_regions;
+	u32 reg;
+	int gic_irqs;
+	int err;
+	int i;
+
+	dist_base = of_iomap(node, 0);
+	if (!dist_base) {
+		pr_err("%s: unable to map gic dist registers\n",
+			node->full_name);
+		return -ENXIO;
+	}
+
+	reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
+	if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4) {
+		pr_err("%s: no distributor detected, giving up\n",
+			node->full_name);
+		err = -ENODEV;
+		goto out_unmap_dist;
+	}
+
+	if (of_property_read_u32(node, "#redistributor-regions", &redist_regions))
+		redist_regions = 1;
+
+	redist_base = kzalloc(sizeof(*redist_base) * redist_regions, GFP_KERNEL);
+	if (!redist_base) {
+		err = -ENOMEM;
+		goto out_unmap_dist;
+	}
+
+	for (i = 0; i < redist_regions; i++) {
+		redist_base[i] = of_iomap(node, 1 + i);
+		if (!redist_base[i]) {
+			pr_err("%s: couldn't map region %d\n",
+			       node->full_name, i);
+			err = -ENODEV;
+			goto out_unmap_rdist;
+		}
+	}
+
+	if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
+		redist_stride = 0;
+
+	gic_data.dist_base = dist_base;
+	gic_data.redist_base = redist_base;
+	gic_data.redist_regions = redist_regions;
+	gic_data.redist_stride = redist_stride;
+
+	/*
+	 * Find out how many interrupts are supported.
+	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
+	 */
+	gic_irqs = readl_relaxed(gic_data.dist_base + GICD_TYPER) & 0x1f;
+	gic_irqs = (gic_irqs + 1) * 32;
+	if (gic_irqs > 1020)
+		gic_irqs = 1020;
+	gic_data.irq_nr = gic_irqs;
+
+	gic_data.domain = irq_domain_add_tree(node, &gic_irq_domain_ops,
+					      &gic_data);
+	gic_data.rdist = alloc_percpu(typeof(*gic_data.rdist));
+
+	if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdist)) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	set_handle_irq(gic_handle_irq);
+
+	gic_smp_init();
+	gic_dist_init();
+	gic_cpu_init();
+
+	return 0;
+
+out_free:
+	if (gic_data.domain)
+		irq_domain_remove(gic_data.domain);
+	free_percpu(gic_data.rdist);
+out_unmap_rdist:
+	for (i = 0; i < redist_regions; i++)
+		if (redist_base[i])
+			iounmap(redist_base[i]);
+	kfree(redist_base);
+out_unmap_dist:
+	iounmap(dist_base);
+	return err;
+}
+
+IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
new file mode 100644
index 000000000000..30cb7556d43f
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H
+#define __LINUX_IRQCHIP_ARM_GIC_V3_H
+
+/*
+ * Distributor registers. We assume we're running non-secure, with ARE
+ * being set. Secure-only and non-ARE registers are not described.
+ */
+#define GICD_CTLR			0x0000
+#define GICD_TYPER			0x0004
+#define GICD_IIDR			0x0008
+#define GICD_STATUSR			0x0010
+#define GICD_SETSPI_NSR			0x0040
+#define GICD_CLRSPI_NSR			0x0048
+#define GICD_SETSPI_SR			0x0050
+#define GICD_CLRSPI_SR			0x0058
+#define GICD_SEIR			0x0068
+#define GICD_ISENABLER			0x0100
+#define GICD_ICENABLER			0x0180
+#define GICD_ISPENDR			0x0200
+#define GICD_ICPENDR			0x0280
+#define GICD_ISACTIVER			0x0300
+#define GICD_ICACTIVER			0x0380
+#define GICD_IPRIORITYR			0x0400
+#define GICD_ICFGR			0x0C00
+#define GICD_IROUTER			0x6000
+#define GICD_PIDR2			0xFFE8
+
+#define GICD_CTLR_RWP			(1U << 31)
+#define GICD_CTLR_ARE_NS		(1U << 4)
+#define GICD_CTLR_ENABLE_G1A		(1U << 1)
+#define GICD_CTLR_ENABLE_G1		(1U << 0)
+
+#define GICD_IROUTER_SPI_MODE_ONE	(0U << 31)
+#define GICD_IROUTER_SPI_MODE_ANY	(1U << 31)
+
+#define GIC_PIDR2_ARCH_MASK		0xf0
+#define GIC_PIDR2_ARCH_GICv3		0x30
+#define GIC_PIDR2_ARCH_GICv4		0x40
+
+/*
+ * Re-Distributor registers, offsets from RD_base
+ */
+#define GICR_CTLR			GICD_CTLR
+#define GICR_IIDR			0x0004
+#define GICR_TYPER			0x0008
+#define GICR_STATUSR			GICD_STATUSR
+#define GICR_WAKER			0x0014
+#define GICR_SETLPIR			0x0040
+#define GICR_CLRLPIR			0x0048
+#define GICR_SEIR			GICD_SEIR
+#define GICR_PROPBASER			0x0070
+#define GICR_PENDBASER			0x0078
+#define GICR_INVLPIR			0x00A0
+#define GICR_INVALLR			0x00B0
+#define GICR_SYNCR			0x00C0
+#define GICR_MOVLPIR			0x0100
+#define GICR_MOVALLR			0x0110
+#define GICR_PIDR2			GICD_PIDR2
+
+#define GICR_WAKER_ProcessorSleep	(1U << 1)
+#define GICR_WAKER_ChildrenAsleep	(1U << 2)
+
+/*
+ * Re-Distributor registers, offsets from SGI_base
+ */
+#define GICR_ISENABLER0			GICD_ISENABLER
+#define GICR_ICENABLER0			GICD_ICENABLER
+#define GICR_ISPENDR0			GICD_ISPENDR
+#define GICR_ICPENDR0			GICD_ICPENDR
+#define GICR_ISACTIVER0			GICD_ISACTIVER
+#define GICR_ICACTIVER0			GICD_ICACTIVER
+#define GICR_IPRIORITYR0		GICD_IPRIORITYR
+#define GICR_ICFGR0			GICD_ICFGR
+
+#define GICR_TYPER_VLPIS		(1U << 1)
+#define GICR_TYPER_LAST			(1U << 4)
+
+/*
+ * CPU interface registers
+ */
+#define ICC_CTLR_EL1_EOImode_drop_dir	(0U << 1)
+#define ICC_CTLR_EL1_EOImode_drop	(1U << 1)
+#define ICC_SRE_EL1_SRE			(1U << 0)
+
+/*
+ * Hypervisor interface registers (SRE only)
+ */
+#define ICH_LR_VIRTUAL_ID_MASK		((1UL << 32) - 1)
+
+#define ICH_LR_EOI			(1UL << 41)
+#define ICH_LR_GROUP			(1UL << 60)
+#define ICH_LR_STATE			(3UL << 62)
+#define ICH_LR_PENDING_BIT		(1UL << 62)
+#define ICH_LR_ACTIVE_BIT		(1UL << 63)
+
+#define ICH_MISR_EOI			(1 << 0)
+#define ICH_MISR_U			(1 << 1)
+
+#define ICH_HCR_EN			(1 << 0)
+#define ICH_HCR_UIE			(1 << 1)
+
+#define ICH_VMCR_CTLR_SHIFT		0
+#define ICH_VMCR_CTLR_MASK		(0x21f << ICH_VMCR_CTLR_SHIFT)
+#define ICH_VMCR_BPR1_SHIFT		18
+#define ICH_VMCR_BPR1_MASK		(7 << ICH_VMCR_BPR1_SHIFT)
+#define ICH_VMCR_BPR0_SHIFT		21
+#define ICH_VMCR_BPR0_MASK		(7 << ICH_VMCR_BPR0_SHIFT)
+#define ICH_VMCR_PMR_SHIFT		24
+#define ICH_VMCR_PMR_MASK		(0xffUL << ICH_VMCR_PMR_SHIFT)
+
+#define ICC_EOIR1_EL1			S3_0_C12_C12_1
+#define ICC_IAR1_EL1			S3_0_C12_C12_0
+#define ICC_SGI1R_EL1			S3_0_C12_C11_5
+#define ICC_PMR_EL1			S3_0_C4_C6_0
+#define ICC_CTLR_EL1			S3_0_C12_C12_4
+#define ICC_SRE_EL1			S3_0_C12_C12_5
+#define ICC_GRPEN1_EL1			S3_0_C12_C12_7
+
+#define ICC_IAR1_EL1_SPURIOUS		0x3ff
+
+#define ICC_SRE_EL2			S3_4_C12_C9_5
+
+#define ICC_SRE_EL2_SRE			(1 << 0)
+#define ICC_SRE_EL2_ENABLE		(1 << 3)
+
+/*
+ * System register definitions
+ */
+#define ICH_VSEIR_EL2			S3_4_C12_C9_4
+#define ICH_HCR_EL2			S3_4_C12_C11_0
+#define ICH_VTR_EL2			S3_4_C12_C11_1
+#define ICH_MISR_EL2			S3_4_C12_C11_2
+#define ICH_EISR_EL2			S3_4_C12_C11_3
+#define ICH_ELSR_EL2			S3_4_C12_C11_5
+#define ICH_VMCR_EL2			S3_4_C12_C11_7
+
+#define __LR0_EL2(x)			S3_4_C12_C12_ ## x
+#define __LR8_EL2(x)			S3_4_C12_C13_ ## x
+
+#define ICH_LR0_EL2			__LR0_EL2(0)
+#define ICH_LR1_EL2			__LR0_EL2(1)
+#define ICH_LR2_EL2			__LR0_EL2(2)
+#define ICH_LR3_EL2			__LR0_EL2(3)
+#define ICH_LR4_EL2			__LR0_EL2(4)
+#define ICH_LR5_EL2			__LR0_EL2(5)
+#define ICH_LR6_EL2			__LR0_EL2(6)
+#define ICH_LR7_EL2			__LR0_EL2(7)
+#define ICH_LR8_EL2			__LR8_EL2(0)
+#define ICH_LR9_EL2			__LR8_EL2(1)
+#define ICH_LR10_EL2			__LR8_EL2(2)
+#define ICH_LR11_EL2			__LR8_EL2(3)
+#define ICH_LR12_EL2			__LR8_EL2(4)
+#define ICH_LR13_EL2			__LR8_EL2(5)
+#define ICH_LR14_EL2			__LR8_EL2(6)
+#define ICH_LR15_EL2			__LR8_EL2(7)
+
+#define __AP0Rx_EL2(x)			S3_4_C12_C8_ ## x
+#define ICH_AP0R0_EL2			__AP0Rx_EL2(0)
+#define ICH_AP0R1_EL2			__AP0Rx_EL2(1)
+#define ICH_AP0R2_EL2			__AP0Rx_EL2(2)
+#define ICH_AP0R3_EL2			__AP0Rx_EL2(3)
+
+#define __AP1Rx_EL2(x)			S3_4_C12_C9_ ## x
+#define ICH_AP1R0_EL2			__AP1Rx_EL2(0)
+#define ICH_AP1R1_EL2			__AP1Rx_EL2(1)
+#define ICH_AP1R2_EL2			__AP1Rx_EL2(2)
+#define ICH_AP1R3_EL2			__AP1Rx_EL2(3)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+static inline void gic_write_eoir(u64 irq)
+{
+	asm volatile("msr " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
+	isb();
+}
+
+#endif
+
+#endif

From 765a2fa0989f7686f75a6e98f4e246a86f33ed0f Mon Sep 17 00:00:00 2001
From: Matthias Brugger <matthias.bgg@gmail.com>
Date: Thu, 3 Jul 2014 13:58:52 +0200
Subject: [PATCH 0433/1185] irqchip: gic: Add support for cortex a7 compatible
 string

Patch 0a68214b "ARM: DT: Add binding for GIC virtualization extentions (VGIC)" added
the "arm,cortex-a7-gic" compatible string, but the corresponding IRQCHIP_DECLARE
was never added to the gic driver.

To let real Cortex-A7 SoCs use it, add the necessary declaration to the device driver.

Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lkml.kernel.org/r/1404388732-28890-1-git-send-email-matthias.bgg@gmail.com
Fixes: 0a68214b76ca ("ARM: DT: Add binding for GIC virtualization extentions (VGIC)")
Cc: <stable@vger.kernel.org> # v3.5+
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
(cherry picked from commit a97e8027b1d28eafe6bafe062556c1ec926a49c6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 0f60eea69a85..e667778cdb4b 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1016,6 +1016,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 }
 IRQCHIP_DECLARE(cortex_a15_gic, "arm,cortex-a15-gic", gic_of_init);
 IRQCHIP_DECLARE(cortex_a9_gic, "arm,cortex-a9-gic", gic_of_init);
+IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init);
 IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init);
 IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init);
 

From 9d918bade7346bb73e5c61574372b7662e7c8448 Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Date: Tue, 15 Jul 2014 00:03:03 +0200
Subject: [PATCH 0434/1185] irqchip: gic: Add binding probe for ARM GIC400

Commit 3ab72f9156bb "dt-bindings: add GIC-400 binding" added the
"arm,gic-400" compatible string, but the corresponding IRQCHIP_DECLARE
was never added to the gic driver.

Therefore add the missing irqchip declaration for it.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>

Removed additional empty line and adapted commit message to mark it
as fixing an issue.
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Will Deacon <will.deacon@arm.com>
Fixes: 3ab72f9156bb ("dt-bindings: add GIC-400 binding")
Cc: <stable@vger.kernel.org> # v3.14+
Link: https://lkml.kernel.org/r/2621565.f5eISveXXJ@diego
Signed-off-by: Jason Cooper <jason@lakedaemon.net>

(cherry picked from commit 144cb08864ed44be52d8634ac69cd98e5efcf527)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index e667778cdb4b..b21171946960 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1014,6 +1014,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 	gic_cnt++;
 	return 0;
 }
+IRQCHIP_DECLARE(gic_400, "arm,gic-400", gic_of_init);
 IRQCHIP_DECLARE(cortex_a15_gic, "arm,cortex-a15-gic", gic_of_init);
 IRQCHIP_DECLARE(cortex_a9_gic, "arm,cortex-a9-gic", gic_of_init);
 IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init);

From e0e343996ea16cde57b70980befbc25037dbd619 Mon Sep 17 00:00:00 2001
From: Tomasz Figa <t.figa@samsung.com>
Date: Thu, 17 Jul 2014 17:23:44 +0200
Subject: [PATCH 0435/1185] irqchip: gic: Fix core ID calculation when topology
 is read from DT

Certain GIC implementation, namely those found on earlier, single
cluster, Exynos SoCs, have registers mapped without per-CPU banking,
which means that the driver needs to use different offset for each CPU.

Currently the driver calculates the offset by multiplying value returned
by cpu_logical_map() by CPU offset parsed from DT. This is correct when
CPU topology is not specified in DT and aforementioned function returns
core ID alone. However when DT contains CPU topology, the function
changes to return cluster ID as well, which is non-zero on mentioned
SoCs and so breaks the calculation in GIC driver.

This patch fixes this by masking out cluster ID in CPU offset
calculation so that only core ID is considered. Multi-cluster Exynos
SoCs already have banked GIC implementations, so this simple fix should
be enough.

Reported-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reported-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Tomasz Figa <t.figa@samsung.com>
Fixes: db0d4db22a78d ("ARM: gic: allow GIC to support non-banked setups")
Cc: <stable@vger.kernel.org> # v3.3+
Link: https://lkml.kernel.org/r/1405610624-18722-1-git-send-email-t.figa@samsung.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
(cherry picked from commit 29e697b11853d3f83b1864ae385abdad4aa2c361)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 drivers/irqchip/irq-gic.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index b21171946960..7a17951a9d66 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -40,6 +40,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqchip/arm-gic.h>
 
+#include <asm/cputype.h>
 #include <asm/irq.h>
 #include <asm/exception.h>
 #include <asm/smp_plat.h>
@@ -897,7 +898,9 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 		}
 
 		for_each_possible_cpu(cpu) {
-			unsigned long offset = percpu_offset * cpu_logical_map(cpu);
+			u32 mpidr = cpu_logical_map(cpu);
+			u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+			unsigned long offset = percpu_offset * core_id;
 			*per_cpu_ptr(gic->dist_base.percpu_base, cpu) = dist_base + offset;
 			*per_cpu_ptr(gic->cpu_base.percpu_base, cpu) = cpu_base + offset;
 		}

From bc48de9c9ed83296b420c47f9057f3fe219f7ef9 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 24 Jul 2014 14:14:42 +0100
Subject: [PATCH 0436/1185] arm64: gicv3: Allow GICv3 compilation with older
 binutils

GICv3 introduces new system registers accessible with the full msr/mrs
syntax (e.g. mrs x0, Sop0_op1_CRm_CRn_op2). However, only recent
binutils understand the new syntax. This patch introduces msr_s/mrs_s
assembly macros which generate the equivalent instructions above and
converts the existing GICv3 code (both drivers/irqchip/ and
arch/arm64/kernel/).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Olof Johansson <olof@lixom.net>
Tested-by: Olof Johansson <olof@lixom.net>
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Jason Cooper <jason@lakedaemon.net>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 72c5839515260dce966cd24f54436e6583288e6c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/sysreg.h    | 60 ++++++++++++++++++++++++++++++
 arch/arm64/kernel/head.S           |  6 +--
 drivers/irqchip/irq-gic-v3.c       | 16 ++++----
 include/linux/irqchip/arm-gic-v3.h | 42 +++++++++++----------
 4 files changed, 93 insertions(+), 31 deletions(-)
 create mode 100644 arch/arm64/include/asm/sysreg.h

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
new file mode 100644
index 000000000000..5c89df0acbcb
--- /dev/null
+++ b/arch/arm64/include/asm/sysreg.h
@@ -0,0 +1,60 @@
+/*
+ * Macros for accessing system registers with older binutils.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_SYSREG_H
+#define __ASM_SYSREG_H
+
+#define sys_reg(op0, op1, crn, crm, op2) \
+	((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+
+#ifdef __ASSEMBLY__
+
+	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+	.equ	__reg_num_x\num, \num
+	.endr
+	.equ	__reg_num_xzr, 31
+
+	.macro	mrs_s, rt, sreg
+	.inst	0xd5300000|(\sreg)|(__reg_num_\rt)
+	.endm
+
+	.macro	msr_s, sreg, rt
+	.inst	0xd5100000|(\sreg)|(__reg_num_\rt)
+	.endm
+
+#else
+
+asm(
+"	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
+"	.equ	__reg_num_x\\num, \\num\n"
+"	.endr\n"
+"	.equ	__reg_num_xzr, 31\n"
+"\n"
+"	.macro	mrs_s, rt, sreg\n"
+"	.inst	0xd5300000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.endm\n"
+"\n"
+"	.macro	msr_s, sreg, rt\n"
+"	.inst	0xd5100000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.endm\n"
+);
+
+#endif
+
+#endif	/* __ASM_SYSREG_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index e18d04f3f2c6..5ef6f9b99ed0 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -170,12 +170,12 @@ ENTRY(el2_setup)
 	cmp	x0, #1
 	b.ne	3f
 
-	mrs	x0, ICC_SRE_EL2
+	mrs_s	x0, ICC_SRE_EL2
 	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
 	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
-	msr	ICC_SRE_EL2, x0
+	msr_s	ICC_SRE_EL2, x0
 	isb					// Make sure SRE is now set
-	msr	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
+	msr_s	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
 
 3:
 #endif
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 81519bae0453..57eaa5a0b1e3 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -108,39 +108,39 @@ static u64 gic_read_iar(void)
 {
 	u64 irqstat;
 
-	asm volatile("mrs %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+	asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
 	return irqstat;
 }
 
 static void gic_write_pmr(u64 val)
 {
-	asm volatile("msr " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
 }
 
 static void gic_write_ctlr(u64 val)
 {
-	asm volatile("msr " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
 	isb();
 }
 
 static void gic_write_grpen1(u64 val)
 {
-	asm volatile("msr " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
 	isb();
 }
 
 static void gic_write_sgi1r(u64 val)
 {
-	asm volatile("msr " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
 }
 
 static void gic_enable_sre(void)
 {
 	u64 val;
 
-	asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
 	val |= ICC_SRE_EL1_SRE;
-	asm volatile("msr " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
 	isb();
 
 	/*
@@ -150,7 +150,7 @@ static void gic_enable_sre(void)
 	 *
 	 * Kindly inform the luser.
 	 */
-	asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
 	if (!(val & ICC_SRE_EL1_SRE))
 		pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
 }
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 30cb7556d43f..03a4ea37ba86 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -18,6 +18,8 @@
 #ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H
 #define __LINUX_IRQCHIP_ARM_GIC_V3_H
 
+#include <asm/sysreg.h>
+
 /*
  * Distributor registers. We assume we're running non-secure, with ARE
  * being set. Secure-only and non-ARE registers are not described.
@@ -125,17 +127,17 @@
 #define ICH_VMCR_PMR_SHIFT		24
 #define ICH_VMCR_PMR_MASK		(0xffUL << ICH_VMCR_PMR_SHIFT)
 
-#define ICC_EOIR1_EL1			S3_0_C12_C12_1
-#define ICC_IAR1_EL1			S3_0_C12_C12_0
-#define ICC_SGI1R_EL1			S3_0_C12_C11_5
-#define ICC_PMR_EL1			S3_0_C4_C6_0
-#define ICC_CTLR_EL1			S3_0_C12_C12_4
-#define ICC_SRE_EL1			S3_0_C12_C12_5
-#define ICC_GRPEN1_EL1			S3_0_C12_C12_7
+#define ICC_EOIR1_EL1			sys_reg(3, 0, 12, 12, 1)
+#define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
+#define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
+#define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
+#define ICC_CTLR_EL1			sys_reg(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
+#define ICC_GRPEN1_EL1			sys_reg(3, 0, 12, 12, 7)
 
 #define ICC_IAR1_EL1_SPURIOUS		0x3ff
 
-#define ICC_SRE_EL2			S3_4_C12_C9_5
+#define ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
 
 #define ICC_SRE_EL2_SRE			(1 << 0)
 #define ICC_SRE_EL2_ENABLE		(1 << 3)
@@ -143,16 +145,16 @@
 /*
  * System register definitions
  */
-#define ICH_VSEIR_EL2			S3_4_C12_C9_4
-#define ICH_HCR_EL2			S3_4_C12_C11_0
-#define ICH_VTR_EL2			S3_4_C12_C11_1
-#define ICH_MISR_EL2			S3_4_C12_C11_2
-#define ICH_EISR_EL2			S3_4_C12_C11_3
-#define ICH_ELSR_EL2			S3_4_C12_C11_5
-#define ICH_VMCR_EL2			S3_4_C12_C11_7
+#define ICH_VSEIR_EL2			sys_reg(3, 4, 12, 9, 4)
+#define ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
+#define ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
+#define ICH_MISR_EL2			sys_reg(3, 4, 12, 11, 2)
+#define ICH_EISR_EL2			sys_reg(3, 4, 12, 11, 3)
+#define ICH_ELSR_EL2			sys_reg(3, 4, 12, 11, 5)
+#define ICH_VMCR_EL2			sys_reg(3, 4, 12, 11, 7)
 
-#define __LR0_EL2(x)			S3_4_C12_C12_ ## x
-#define __LR8_EL2(x)			S3_4_C12_C13_ ## x
+#define __LR0_EL2(x)			sys_reg(3, 4, 12, 12, x)
+#define __LR8_EL2(x)			sys_reg(3, 4, 12, 13, x)
 
 #define ICH_LR0_EL2			__LR0_EL2(0)
 #define ICH_LR1_EL2			__LR0_EL2(1)
@@ -171,13 +173,13 @@
 #define ICH_LR14_EL2			__LR8_EL2(6)
 #define ICH_LR15_EL2			__LR8_EL2(7)
 
-#define __AP0Rx_EL2(x)			S3_4_C12_C8_ ## x
+#define __AP0Rx_EL2(x)			sys_reg(3, 4, 12, 8, x)
 #define ICH_AP0R0_EL2			__AP0Rx_EL2(0)
 #define ICH_AP0R1_EL2			__AP0Rx_EL2(1)
 #define ICH_AP0R2_EL2			__AP0Rx_EL2(2)
 #define ICH_AP0R3_EL2			__AP0Rx_EL2(3)
 
-#define __AP1Rx_EL2(x)			S3_4_C12_C9_ ## x
+#define __AP1Rx_EL2(x)			sys_reg(3, 4, 12, 9, x)
 #define ICH_AP1R0_EL2			__AP1Rx_EL2(0)
 #define ICH_AP1R1_EL2			__AP1Rx_EL2(1)
 #define ICH_AP1R2_EL2			__AP1Rx_EL2(2)
@@ -189,7 +191,7 @@
 
 static inline void gic_write_eoir(u64 irq)
 {
-	asm volatile("msr " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
+	asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
 	isb();
 }
 

From 5cb100b38a7bf5bc5e892fa46998ebcbab925f37 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Sun, 5 May 2013 20:03:40 +0800
Subject: [PATCH 0437/1185] KVM: add missing misc_deregister() on error in
 kvm_init()

Add the missing misc_deregister() before return from kvm_init()
in the debugfs init error handling case.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit afc2f792cdcb67f4257f0e68d10ee4a7b7eae57a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 302681c4aa44..b547a1ceecbc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3181,6 +3181,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
 out_undebugfs:
 	unregister_syscore_ops(&kvm_syscore_ops);
+	misc_deregister(&kvm_dev);
 out_unreg:
 	kvm_async_pf_deinit();
 out_free:

From 86882a4950ff8f054d0b89bb36655446a04b89c0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 14:31:01 +0100
Subject: [PATCH 0438/1185] ARM: KVM: move GIC/timer code to a common location

As KVM/arm64 is looming on the horizon, it makes sense to move some
of the common code to a single location in order to reduce duplication.

The code could live anywhere. Actually, most of KVM is already built
with a bunch of ugly ../../.. hacks in the various Makefiles, so we're
not exactly talking about style here. But maybe it is time to start
moving into a less ugly direction.

The include files must be in a "public" location, as they are accessed
from non-KVM files (arch/arm/kernel/asm-offsets.c).

For this purpose, introduce two new locations:
- virt/kvm/arm/ : x86 and ia64 already share the ioapic code in
  virt/kvm, so this could be seen as a (very ugly) precedent.
- include/kvm/  : there is already an include/xen, and while the
  intent is slightly different, this seems as good a location as
  any

Eventually, we should probably have independant Makefiles at every
levels (just like everywhere else in the kernel), but this is just
the first step.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 7275acdfe29ba03ad2f6e150386900c4e2d43fb1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h                            | 4 ++--
 arch/arm/kvm/Makefile                                      | 7 ++++---
 .../asm/kvm_arch_timer.h => include/kvm/arm_arch_timer.h   | 0
 arch/arm/include/asm/kvm_vgic.h => include/kvm/arm_vgic.h  | 0
 {arch/arm/kvm => virt/kvm/arm}/arch_timer.c                | 4 ++--
 {arch/arm/kvm => virt/kvm/arm}/vgic.c                      | 0
 6 files changed, 8 insertions(+), 7 deletions(-)
 rename arch/arm/include/asm/kvm_arch_timer.h => include/kvm/arm_arch_timer.h (100%)
 rename arch/arm/include/asm/kvm_vgic.h => include/kvm/arm_vgic.h (100%)
 rename {arch/arm/kvm => virt/kvm/arm}/arch_timer.c (99%)
 rename {arch/arm/kvm => virt/kvm/arm}/vgic.c (100%)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 57cb786a6203..ff5aaf10e6ec 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -23,7 +23,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/fpstate.h>
-#include <asm/kvm_arch_timer.h>
+#include <kvm/arm_arch_timer.h>
 
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #define KVM_USER_MEM_SLOTS 32
@@ -38,7 +38,7 @@
 #define KVM_NR_PAGE_SIZES	1
 #define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
 
-#include <asm/kvm_vgic.h>
+#include <kvm/arm_vgic.h>
 
 struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 53c5ed83d16f..9184a491d172 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -14,10 +14,11 @@ CFLAGS_mmu.o := -I.
 AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
-kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+KVM := ../../../virt/kvm
+kvm-arm-y = $(addprefix $(KVM)/, kvm_main.o coalesced_mmio.o)
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
-obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/include/kvm/arm_arch_timer.h
similarity index 100%
rename from arch/arm/include/asm/kvm_arch_timer.h
rename to include/kvm/arm_arch_timer.h
diff --git a/arch/arm/include/asm/kvm_vgic.h b/include/kvm/arm_vgic.h
similarity index 100%
rename from arch/arm/include/asm/kvm_vgic.h
rename to include/kvm/arm_vgic.h
diff --git a/arch/arm/kvm/arch_timer.c b/virt/kvm/arm/arch_timer.c
similarity index 99%
rename from arch/arm/kvm/arch_timer.c
rename to virt/kvm/arm/arch_timer.c
index c55b6089e923..2d00b2925780 100644
--- a/arch/arm/kvm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -25,8 +25,8 @@
 #include <clocksource/arm_arch_timer.h>
 #include <asm/arch_timer.h>
 
-#include <asm/kvm_vgic.h>
-#include <asm/kvm_arch_timer.h>
+#include <kvm/arm_vgic.h>
+#include <kvm/arm_arch_timer.h>
 
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
diff --git a/arch/arm/kvm/vgic.c b/virt/kvm/arm/vgic.c
similarity index 100%
rename from arch/arm/kvm/vgic.c
rename to virt/kvm/arm/vgic.c

From 7ce8a35ca784be99a50520476c1a34aad2755b7a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 14:31:02 +0100
Subject: [PATCH 0439/1185] KVM: get rid of $(addprefix ../../../virt/kvm/,
 ...) in Makefiles

As requested by the KVM maintainers, remove the addprefix used to
refer to the main KVM code from the arch code, and replace it with
a KVM variable that does the same thing.

Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Christoffer Dall <cdall@cs.columbia.edu>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 535cf7b3b13c7faed3dfabafb6598417de1129ca)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/Makefile     |  2 +-
 arch/ia64/kvm/Makefile    |  7 ++++---
 arch/powerpc/kvm/Makefile | 13 +++++++------
 arch/s390/kvm/Makefile    |  3 ++-
 arch/x86/kvm/Makefile     | 13 +++++++------
 5 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 9184a491d172..d99bee4950e5 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
 KVM := ../../../virt/kvm
-kvm-arm-y = $(addprefix $(KVM)/, kvm_main.o coalesced_mmio.o)
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 1a4053789d01..18e45ec49bbf 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file)
 
 ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
 asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
+KVM := ../../../virt/kvm
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-		coalesced_mmio.o irq_comm.o)
+common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+		$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
 
 ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o)
+common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
 endif
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 422de3f4d46c..008cd856c5b5 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -5,9 +5,10 @@
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
+KVM := ../../../virt/kvm
 
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
-						eventfd.o)
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
+		$(KVM)/eventfd.o
 
 CFLAGS_44x_tlb.o  := -I.
 CFLAGS_e500_mmu.o := -I.
@@ -53,7 +54,7 @@ kvm-e500mc-objs := \
 kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
 
 kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
-	../../../virt/kvm/coalesced_mmio.o \
+	$(KVM)/coalesced_mmio.o \
 	fpu.o \
 	book3s_paired_singles.o \
 	book3s_pr.o \
@@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
 kvm-book3s_64-module-objs := \
-	../../../virt/kvm/kvm_main.o \
-	../../../virt/kvm/eventfd.o \
+	$(KVM)/kvm_main.o \
+	$(KVM)/eventfd.o \
 	powerpc.o \
 	emulate.o \
 	book3s.o \
@@ -111,7 +112,7 @@ kvm-book3s_32-objs := \
 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
 
 kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
-kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
 
 kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
 
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 8fe9d65a4585..40b4c6470f88 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -6,7 +6,8 @@
 # it under the terms of the GNU General Public License (version 2 only)
 # as published by the Free Software Foundation.
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d609e1d84048..bf4fb04d0112 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -5,12 +5,13 @@ CFLAGS_x86.o := -I.
 CFLAGS_svm.o := -I.
 CFLAGS_vmx.o := -I.
 
-kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-				coalesced_mmio.o irq_comm.o eventfd.o \
-				irqchip.o)
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(addprefix ../../../virt/kvm/, \
-				assigned-dev.o iommu.o)
-kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
+KVM := ../../../virt/kvm
+
+kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+				$(KVM)/eventfd.o $(KVM)/irqchip.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
+kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o cpuid.o pmu.o

From 9861d210e02d5fb6ffcd5ea992f62043105813b3 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 26 Mar 2013 13:41:35 +0000
Subject: [PATCH 0440/1185] ARM: KVM: arch_timers: zero CNTVOFF upon return to
 host

To use the virtual counters from the host, we need to ensure that
CNTVOFF doesn't change unexpectedly. When we change to a guest, we
replace the host's CNTVOFF, but we don't restore it when returning to
the host.

As the host sets CNTVOFF to zero, and never changes it, we can simply
zero CNTVOFF when returning to the host. This patch adds said zeroing to
the return to host path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit f793c23ebbe5afd1cabf4a42a3a297022213756f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/interrupts_head.S | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 2b44b95a86dd..6f18695a09cb 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -503,6 +503,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r5, vcpu, r4
 	strd	r2, r3, [r5]
 
+	@ Ensure host CNTVCT == CNTPCT
+	mov	r2, #0
+	mcrr	p15, 4, r2, r2, c14	@ CNTVOFF
+
 1:
 #endif
 	@ Allow physical timer/counter access for the host

From 16d32e6b5aad12c2566cd8c6918e1be3ecac06f5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 7 Dec 2012 18:40:43 +0000
Subject: [PATCH 0441/1185] arm64: KVM: HYP mode idmap support

Add the necessary infrastructure for identity-mapped HYP page
tables. Idmap-ed code must be in the ".hyp.idmap.text" linker
section.

The rest of the HYP ends up in ".hyp.text".

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 2240bbb697354f5617d95e3ee104ca61bb812507)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kernel/vmlinux.lds.S | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index ce2d97255ba9..55d0e035205f 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -17,6 +17,19 @@ ENTRY(stext)
 
 jiffies = jiffies_64;
 
+#define HYPERVISOR_TEXT					\
+	/*						\
+	 * Force the alignment to be compatible with	\
+	 * the vectors requirements			\
+	 */						\
+	. = ALIGN(2048);				\
+	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;	\
+	*(.hyp.idmap.text)				\
+	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;	\
+	VMLINUX_SYMBOL(__hyp_text_start) = .;		\
+	*(.hyp.text)					\
+	VMLINUX_SYMBOL(__hyp_text_end) = .;
+
 SECTIONS
 {
 	/*
@@ -48,6 +61,7 @@ SECTIONS
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT
+			HYPERVISOR_TEXT
 			*(.fixup)
 			*(.gnu.warning)
 		. = ALIGN(16);
@@ -102,3 +116,9 @@ SECTIONS
 	STABS_DEBUG
 	.comment 0 : { *(.comment) }
 }
+
+/*
+ * The HYP init code can't be more than a page long.
+ */
+ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end),
+       "HYP init code too big")

From a51ca39794b462370c0b4f46b360370f55303454 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 10:46:47 +0000
Subject: [PATCH 0442/1185] arm64: KVM: EL2 register definitions

Define all the useful bitfields for EL2 registers.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 0369f6a34b9facd16eea4236518ca6f9cbc9e5ef)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h | 245 +++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_arm.h

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
new file mode 100644
index 000000000000..a5f28e2720c7
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_ARM_H__
+#define __ARM64_KVM_ARM_H__
+
+#include <asm/types.h>
+
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_ID		(UL(1) << 33)
+#define HCR_CD		(UL(1) << 32)
+#define HCR_RW_SHIFT	31
+#define HCR_RW		(UL(1) << HCR_RW_SHIFT)
+#define HCR_TRVM	(UL(1) << 30)
+#define HCR_HCD		(UL(1) << 29)
+#define HCR_TDZ		(UL(1) << 28)
+#define HCR_TGE		(UL(1) << 27)
+#define HCR_TVM		(UL(1) << 26)
+#define HCR_TTLB	(UL(1) << 25)
+#define HCR_TPU		(UL(1) << 24)
+#define HCR_TPC		(UL(1) << 23)
+#define HCR_TSW		(UL(1) << 22)
+#define HCR_TAC		(UL(1) << 21)
+#define HCR_TIDCP	(UL(1) << 20)
+#define HCR_TSC		(UL(1) << 19)
+#define HCR_TID3	(UL(1) << 18)
+#define HCR_TID2	(UL(1) << 17)
+#define HCR_TID1	(UL(1) << 16)
+#define HCR_TID0	(UL(1) << 15)
+#define HCR_TWE		(UL(1) << 14)
+#define HCR_TWI		(UL(1) << 13)
+#define HCR_DC		(UL(1) << 12)
+#define HCR_BSU		(3 << 10)
+#define HCR_BSU_IS	(UL(1) << 10)
+#define HCR_FB		(UL(1) << 9)
+#define HCR_VA		(UL(1) << 8)
+#define HCR_VI		(UL(1) << 7)
+#define HCR_VF		(UL(1) << 6)
+#define HCR_AMO		(UL(1) << 5)
+#define HCR_IMO		(UL(1) << 4)
+#define HCR_FMO		(UL(1) << 3)
+#define HCR_PTW		(UL(1) << 2)
+#define HCR_SWIO	(UL(1) << 1)
+#define HCR_VM		(UL(1) << 0)
+
+/*
+ * The bits we set in HCR:
+ * RW:		64bit by default, can be overriden for 32bit VMs
+ * TAC:		Trap ACTLR
+ * TSC:		Trap SMC
+ * TSW:		Trap cache operations by set/way
+ * TWI:		Trap WFI
+ * TIDCP:	Trap L2CTLR/L2ECTLR
+ * BSU_IS:	Upgrade barriers to the inner shareable domain
+ * FB:		Force broadcast of all maintainance operations
+ * AMO:		Override CPSR.A and enable signaling with VA
+ * IMO:		Override CPSR.I and enable signaling with VI
+ * FMO:		Override CPSR.F and enable signaling with VF
+ * SWIO:	Turn set/way invalidates into set/way clean+invalidate
+ */
+#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
+			 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
+			 HCR_SWIO | HCR_TIDCP | HCR_RW)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+
+/* Hyp System Control Register (SCTLR_EL2) bits */
+#define SCTLR_EL2_EE	(1 << 25)
+#define SCTLR_EL2_WXN	(1 << 19)
+#define SCTLR_EL2_I	(1 << 12)
+#define SCTLR_EL2_SA	(1 << 3)
+#define SCTLR_EL2_C	(1 << 2)
+#define SCTLR_EL2_A	(1 << 1)
+#define SCTLR_EL2_M	1
+#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
+			 SCTLR_EL2_SA | SCTLR_EL2_I)
+
+/* TCR_EL2 Registers bits */
+#define TCR_EL2_TBI	(1 << 20)
+#define TCR_EL2_PS	(7 << 16)
+#define TCR_EL2_PS_40B	(2 << 16)
+#define TCR_EL2_TG0	(1 << 14)
+#define TCR_EL2_SH0	(3 << 12)
+#define TCR_EL2_ORGN0	(3 << 10)
+#define TCR_EL2_IRGN0	(3 << 8)
+#define TCR_EL2_T0SZ	0x3f
+#define TCR_EL2_MASK	(TCR_EL2_TG0 | TCR_EL2_SH0 | \
+			 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+
+#define TCR_EL2_FLAGS	(TCR_EL2_PS_40B)
+
+/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_PS_MASK	(7 << 16)
+#define VTCR_EL2_PS_40B		(2 << 16)
+#define VTCR_EL2_TG0_MASK	(1 << 14)
+#define VTCR_EL2_TG0_4K		(0 << 14)
+#define VTCR_EL2_TG0_64K	(1 << 14)
+#define VTCR_EL2_SH0_MASK	(3 << 12)
+#define VTCR_EL2_SH0_INNER	(3 << 12)
+#define VTCR_EL2_ORGN0_MASK	(3 << 10)
+#define VTCR_EL2_ORGN0_WBWA	(1 << 10)
+#define VTCR_EL2_IRGN0_MASK	(3 << 8)
+#define VTCR_EL2_IRGN0_WBWA	(1 << 8)
+#define VTCR_EL2_SL0_MASK	(3 << 6)
+#define VTCR_EL2_SL0_LVL1	(1 << 6)
+#define VTCR_EL2_T0SZ_MASK	0x3f
+#define VTCR_EL2_T0SZ_40B	24
+
+#ifdef CONFIG_ARM64_64K_PAGES
+/*
+ * Stage2 translation configuration:
+ * 40bits output (PS = 2)
+ * 40bits input  (T0SZ = 24)
+ * 64kB pages (TG0 = 1)
+ * 2 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
+				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
+				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
+				 VTCR_EL2_T0SZ_40B)
+#define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
+#else
+/*
+ * Stage2 translation configuration:
+ * 40bits output (PS = 2)
+ * 40bits input  (T0SZ = 24)
+ * 4kB pages (TG0 = 0)
+ * 3 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
+				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
+				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
+				 VTCR_EL2_T0SZ_40B)
+#define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
+#endif
+
+#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
+#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  (48LLU)
+#define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
+
+/* Hyp System Trap Register */
+#define HSTR_EL2_TTEE	(1 << 16)
+#define HSTR_EL2_T(x)	(1 << x)
+
+/* Hyp Coprocessor Trap Register */
+#define CPTR_EL2_TCPAC	(1 << 31)
+#define CPTR_EL2_TTA	(1 << 20)
+#define CPTR_EL2_TFP	(1 << 10)
+
+/* Hyp Debug Configuration Register bits */
+#define MDCR_EL2_TDRA		(1 << 11)
+#define MDCR_EL2_TDOSA		(1 << 10)
+#define MDCR_EL2_TDA		(1 << 9)
+#define MDCR_EL2_TDE		(1 << 8)
+#define MDCR_EL2_HPME		(1 << 7)
+#define MDCR_EL2_TPM		(1 << 6)
+#define MDCR_EL2_TPMCR		(1 << 5)
+#define MDCR_EL2_HPMN_MASK	(0x1F)
+
+/* Exception Syndrome Register (ESR) bits */
+#define ESR_EL2_EC_SHIFT	(26)
+#define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
+#define ESR_EL2_IL		(1U << 25)
+#define ESR_EL2_ISS		(ESR_EL2_IL - 1)
+#define ESR_EL2_ISV_SHIFT	(24)
+#define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)
+#define ESR_EL2_SAS_SHIFT	(22)
+#define ESR_EL2_SAS		(3U << ESR_EL2_SAS_SHIFT)
+#define ESR_EL2_SSE		(1 << 21)
+#define ESR_EL2_SRT_SHIFT	(16)
+#define ESR_EL2_SRT_MASK	(0x1f << ESR_EL2_SRT_SHIFT)
+#define ESR_EL2_SF 		(1 << 15)
+#define ESR_EL2_AR 		(1 << 14)
+#define ESR_EL2_EA 		(1 << 9)
+#define ESR_EL2_CM 		(1 << 8)
+#define ESR_EL2_S1PTW 		(1 << 7)
+#define ESR_EL2_WNR		(1 << 6)
+#define ESR_EL2_FSC		(0x3f)
+#define ESR_EL2_FSC_TYPE	(0x3c)
+
+#define ESR_EL2_CV_SHIFT	(24)
+#define ESR_EL2_CV		(1U << ESR_EL2_CV_SHIFT)
+#define ESR_EL2_COND_SHIFT	(20)
+#define ESR_EL2_COND		(0xfU << ESR_EL2_COND_SHIFT)
+
+
+#define FSC_FAULT	(0x04)
+#define FSC_PERM	(0x0c)
+
+/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
+#define HPFAR_MASK	(~0xFUL)
+
+#define ESR_EL2_EC_UNKNOWN	(0x00)
+#define ESR_EL2_EC_WFI		(0x01)
+#define ESR_EL2_EC_CP15_32	(0x03)
+#define ESR_EL2_EC_CP15_64	(0x04)
+#define ESR_EL2_EC_CP14_MR	(0x05)
+#define ESR_EL2_EC_CP14_LS	(0x06)
+#define ESR_EL2_EC_FP_ASIMD	(0x07)
+#define ESR_EL2_EC_CP10_ID	(0x08)
+#define ESR_EL2_EC_CP14_64	(0x0C)
+#define ESR_EL2_EC_ILL_ISS	(0x0E)
+#define ESR_EL2_EC_SVC32	(0x11)
+#define ESR_EL2_EC_HVC32	(0x12)
+#define ESR_EL2_EC_SMC32	(0x13)
+#define ESR_EL2_EC_SVC64	(0x15)
+#define ESR_EL2_EC_HVC64	(0x16)
+#define ESR_EL2_EC_SMC64	(0x17)
+#define ESR_EL2_EC_SYS64	(0x18)
+#define ESR_EL2_EC_IABT		(0x20)
+#define ESR_EL2_EC_IABT_HYP	(0x21)
+#define ESR_EL2_EC_PC_ALIGN	(0x22)
+#define ESR_EL2_EC_DABT		(0x24)
+#define ESR_EL2_EC_DABT_HYP	(0x25)
+#define ESR_EL2_EC_SP_ALIGN	(0x26)
+#define ESR_EL2_EC_FP_EXC32	(0x28)
+#define ESR_EL2_EC_FP_EXC64	(0x2C)
+#define ESR_EL2_EC_SERRROR	(0x2F)
+#define ESR_EL2_EC_BREAKPT	(0x30)
+#define ESR_EL2_EC_BREAKPT_HYP	(0x31)
+#define ESR_EL2_EC_SOFTSTP	(0x32)
+#define ESR_EL2_EC_SOFTSTP_HYP	(0x33)
+#define ESR_EL2_EC_WATCHPT	(0x34)
+#define ESR_EL2_EC_WATCHPT_HYP	(0x35)
+#define ESR_EL2_EC_BKPT32	(0x38)
+#define ESR_EL2_EC_VECTOR32	(0x3A)
+#define ESR_EL2_EC_BRK64	(0x3C)
+
+#define ESR_EL2_EC_xABT_xFSR_EXTABT	0x10
+
+#endif /* __ARM64_KVM_ARM_H__ */

From 44848d746b7f29803f7ac9579d594f368a1cad11 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 11:16:40 +0000
Subject: [PATCH 0443/1185] arm64: KVM: system register definitions for 64bit
 guests

Define the saved/restored registers for 64bit guests.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit fd9fc9f73cc2070d2637a7ee082800a817fd45f3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h | 68 ++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_asm.h

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
new file mode 100644
index 000000000000..591ac219964a
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_ASM_H__
+#define __ARM_KVM_ASM_H__
+
+/*
+ * 0 is reserved as an invalid value.
+ * Order *must* be kept in sync with the hyp switch code.
+ */
+#define	MPIDR_EL1	1	/* MultiProcessor Affinity Register */
+#define	CSSELR_EL1	2	/* Cache Size Selection Register */
+#define	SCTLR_EL1	3	/* System Control Register */
+#define	ACTLR_EL1	4	/* Auxilliary Control Register */
+#define	CPACR_EL1	5	/* Coprocessor Access Control */
+#define	TTBR0_EL1	6	/* Translation Table Base Register 0 */
+#define	TTBR1_EL1	7	/* Translation Table Base Register 1 */
+#define	TCR_EL1		8	/* Translation Control Register */
+#define	ESR_EL1		9	/* Exception Syndrome Register */
+#define	AFSR0_EL1	10	/* Auxilary Fault Status Register 0 */
+#define	AFSR1_EL1	11	/* Auxilary Fault Status Register 1 */
+#define	FAR_EL1		12	/* Fault Address Register */
+#define	MAIR_EL1	13	/* Memory Attribute Indirection Register */
+#define	VBAR_EL1	14	/* Vector Base Address Register */
+#define	CONTEXTIDR_EL1	15	/* Context ID Register */
+#define	TPIDR_EL0	16	/* Thread ID, User R/W */
+#define	TPIDRRO_EL0	17	/* Thread ID, User R/O */
+#define	TPIDR_EL1	18	/* Thread ID, Privileged */
+#define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
+#define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
+#define	NR_SYS_REGS	21
+
+#define ARM_EXCEPTION_IRQ	  0
+#define ARM_EXCEPTION_TRAP	  1
+
+#ifndef __ASSEMBLY__
+struct kvm;
+struct kvm_vcpu;
+
+extern char __kvm_hyp_init[];
+extern char __kvm_hyp_init_end[];
+
+extern char __kvm_hyp_vector[];
+
+extern char __kvm_hyp_code_start[];
+extern char __kvm_hyp_code_end[];
+
+extern void __kvm_flush_vm_context(void);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+#endif
+
+#endif /* __ARM_KVM_ASM_H__ */

From b04c4cdfff4b701f01f1dae2ada92b580cc281d5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 13:27:52 +0000
Subject: [PATCH 0444/1185] arm64: KVM: Basic ESR_EL2 helpers and vcpu register
 access

Implements helpers for dealing with the EL2 syndrome register as
well as accessing the vcpu registers.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 83a4979483c8e597b69d4403794f87fea51fa549)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h | 158 +++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_emulate.h

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
new file mode 100644
index 000000000000..6c1725e93b0b
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/kvm_emulate.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_EMULATE_H__
+#define __ARM64_KVM_EMULATE_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmio.h>
+#include <asm/ptrace.h>
+
+void kvm_inject_undefined(struct kvm_vcpu *vcpu);
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+
+static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
+}
+
+static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
+}
+
+static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
+}
+
+static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
+{
+	return false;	/* 32bit? Bahhh... */
+}
+
+static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
+{
+	return true;	/* No conditionals on arm64 */
+}
+
+static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	*vcpu_pc(vcpu) += 4;
+}
+
+static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+}
+
+/* Get vcpu SPSR for current mode */
+static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+}
+
+static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
+{
+	u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+
+	return mode != PSR_MODE_EL0t;
+}
+
+static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.esr_el2;
+}
+
+static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.far_el2;
+}
+
+static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+{
+	return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+}
+
+static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV);
+}
+
+static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR);
+}
+
+static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE);
+}
+
+static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
+{
+	return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT;
+}
+
+static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA);
+}
+
+static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW);
+}
+
+static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+{
+	return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT);
+}
+
+/* This one is not specific to Data Abort */
+static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL);
+}
+
+static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT;
+}
+
+static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
+}
+
+#endif /* __ARM64_KVM_EMULATE_H__ */

From fbd17d89d0b567e7c5c12955648cf728f0c04169 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 17 Dec 2012 12:27:42 +0000
Subject: [PATCH 0445/1185] arm64: KVM: fault injection into a guest

Implement the injection of a fault (undefined, data abort or
prefetch abort) into a 64bit guest.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit aa8eff9bfbd531e0fcc8e68052f4ac545cd004c5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/inject_fault.c | 126 ++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 arch/arm64/kvm/inject_fault.c

diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
new file mode 100644
index 000000000000..54f656271266
--- /dev/null
+++ b/arch/arm64/kvm/inject_fault.c
@@ -0,0 +1,126 @@
+/*
+ * Fault injection for 64bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/esr.h>
+
+#define PSTATE_FAULT_BITS_64 	(PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
+				 PSR_I_BIT | PSR_D_BIT)
+#define EL1_EXCEPT_SYNC_OFFSET	0x200
+
+static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_aarch32;
+	u32 esr = 0;
+
+	is_aarch32 = vcpu_mode_is_32bit(vcpu);
+
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+
+	/*
+	 * Build an {i,d}abort, depending on the level and the
+	 * instruction set. Report an external synchronous abort.
+	 */
+	if (kvm_vcpu_trap_il_is32bit(vcpu))
+		esr |= ESR_EL1_IL;
+
+	/*
+	 * Here, the guest runs in AArch64 mode when in EL1. If we get
+	 * an AArch32 fault, it means we managed to trap an EL0 fault.
+	 */
+	if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t)
+		esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT);
+	else
+		esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT);
+
+	if (!is_iabt)
+		esr |= ESR_EL1_EC_DABT_EL0;
+
+	vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT;
+}
+
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT);
+
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+	/*
+	 * Build an unknown exception, depending on the instruction
+	 * set.
+	 */
+	if (kvm_vcpu_trap_il_is32bit(vcpu))
+		esr |= ESR_EL1_IL;
+
+	vcpu_sys_reg(vcpu, ESR_EL1) = esr;
+}
+
+/**
+ * kvm_inject_dabt - inject a data abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	inject_abt64(vcpu, false, addr);
+}
+
+/**
+ * kvm_inject_pabt - inject a prefetch abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	inject_abt64(vcpu, true, addr);
+}
+
+/**
+ * kvm_inject_undefined - inject an undefined instruction into the guest
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_undefined(struct kvm_vcpu *vcpu)
+{
+	inject_undef64(vcpu);
+}

From bef26a85389996c6f9c79f09d2ba09da0cd46351 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 15:35:24 +0000
Subject: [PATCH 0446/1185] arm64: KVM: architecture specific MMU backend

Define the arm64 specific MMU backend:
- HYP/kernel VA offset
- S2 4/64kB definitions
- S2 page table populating and flushing
- icache cleaning

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 37c437532b0126d1df5685080db9cecf3d918175)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_mmu.h | 135 +++++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_mmu.h

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
new file mode 100644
index 000000000000..efe609c6a3c9
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMU_H__
+#define __ARM64_KVM_MMU_H__
+
+#include <asm/page.h>
+#include <asm/memory.h>
+
+/*
+ * As we only have the TTBR0_EL2 register, we cannot express
+ * "negative" addresses. This makes it impossible to directly share
+ * mappings with the kernel.
+ *
+ * Instead, give the HYP mode its own VA region at a fixed offset from
+ * the kernel by just masking the top bits (which are all ones for a
+ * kernel address).
+ */
+#define HYP_PAGE_OFFSET_SHIFT	VA_BITS
+#define HYP_PAGE_OFFSET_MASK	((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
+#define HYP_PAGE_OFFSET		(PAGE_OFFSET & HYP_PAGE_OFFSET_MASK)
+
+/*
+ * Our virtual mapping for the idmap-ed MMU-enable code. Must be
+ * shared across all the page-tables. Conveniently, we use the last
+ * possible page, where no kernel mapping will ever exist.
+ */
+#define TRAMPOLINE_VA		(HYP_PAGE_OFFSET_MASK & PAGE_MASK)
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Convert a kernel VA into a HYP VA.
+ * reg: VA to be converted.
+ */
+.macro kern_hyp_va	reg
+	and	\reg, \reg, #HYP_PAGE_OFFSET_MASK
+.endm
+
+#else
+
+#include <asm/cachetype.h>
+#include <asm/cacheflush.h>
+
+#define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
+
+/*
+ * Align KVM with the kernel's view of physical memory. Should be
+ * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ */
+#define KVM_PHYS_SHIFT	PHYS_MASK_SHIFT
+#define KVM_PHYS_SIZE	(1UL << KVM_PHYS_SHIFT)
+#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1UL)
+
+/* Make sure we get the right size, and thus the right alignment */
+#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT))
+#define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+
+int create_hyp_mappings(void *from, void *to);
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+void free_boot_hyp_pgd(void);
+void free_hyp_pgds(void);
+
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+			  phys_addr_t pa, unsigned long size);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+
+phys_addr_t kvm_mmu_get_httbr(void);
+phys_addr_t kvm_mmu_get_boot_httbr(void);
+phys_addr_t kvm_get_idmap_vector(void);
+int kvm_mmu_init(void);
+void kvm_clear_hyp_idmap(void);
+
+#define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
+
+static inline bool kvm_is_write_fault(unsigned long esr)
+{
+	unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
+
+	if (esr_ec == ESR_EL2_EC_IABT)
+		return false;
+
+	if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
+		return false;
+
+	return true;
+}
+
+static inline void kvm_clean_dcache_area(void *addr, size_t size) {}
+static inline void kvm_clean_pgd(pgd_t *pgd) {}
+static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
+static inline void kvm_clean_pte(pte_t *pte) {}
+static inline void kvm_clean_pte_entry(pte_t *pte) {}
+
+static inline void kvm_set_s2pte_writable(pte_t *pte)
+{
+	pte_val(*pte) |= PTE_S2_RDWR;
+}
+
+struct kvm;
+
+static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+{
+	if (!icache_is_aliasing()) {		/* PIPT */
+		unsigned long hva = gfn_to_hva(kvm, gfn);
+		flush_icache_range(hva, hva + PAGE_SIZE);
+	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
+		/* any kind of VIPT cache */
+		__flush_icache_all();
+	}
+}
+
+#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ARM64_KVM_MMU_H__ */

From becdc5b63f089f0664fc1381fa168b7a7dce09dc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:29:28 +0000
Subject: [PATCH 0447/1185] arm64: KVM: user space interface

Provide the kvm.h file that defines the user space visible
interface.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 54f81d0eb93896da73d1636bca84cf90f52cabdf)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/uapi/asm/kvm.h | 117 ++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 arch/arm64/include/uapi/asm/kvm.h

diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..4e64570a20c9
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/uapi/asm/kvm.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#define KVM_SPSR_EL1	0
+#define KVM_NR_SPSR	1
+
+#ifndef __ASSEMBLY__
+#include <asm/types.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+
+#define KVM_REG_SIZE(id)						\
+	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+struct kvm_regs {
+	struct user_pt_regs regs;	/* sp = sp_el0 */
+
+	__u64	sp_el1;
+	__u64	elr_el1;
+
+	__u64	spsr[KVM_NR_SPSR];
+
+	struct user_fpsimd_state fp_regs;
+};
+
+/* Supported Processor Types */
+#define KVM_ARM_TARGET_AEM_V8		0
+#define KVM_ARM_TARGET_FOUNDATION_V8	1
+#define KVM_ARM_TARGET_CORTEX_A57	2
+
+#define KVM_ARM_NUM_TARGETS		3
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT	0
+#define KVM_ARM_DEVICE_TYPE_MASK	(0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT		16
+#define KVM_ARM_DEVICE_ID_MASK		(0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2		0
+
+/* Supported VGIC address types  */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST	0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU	1
+
+#define KVM_VGIC_V2_DIST_SIZE		0x1000
+#define KVM_VGIC_V2_CPU_SIZE		0x2000
+
+struct kvm_vcpu_init {
+	__u32 target;
+	__u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+struct kvm_sync_regs {
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT		24
+#define KVM_ARM_IRQ_TYPE_MASK		0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT		16
+#define KVM_ARM_IRQ_VCPU_MASK		0xff
+#define KVM_ARM_IRQ_NUM_SHIFT		0
+#define KVM_ARM_IRQ_NUM_MASK		0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU		0
+#define KVM_ARM_IRQ_TYPE_SPI		1
+#define KVM_ARM_IRQ_TYPE_PPI		2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ		0
+#define KVM_ARM_IRQ_CPU_FIQ		1
+
+/* Highest supported SPI, from VGIC_NR_IRQS */
+#define KVM_ARM_IRQ_GIC_MAX		127
+
+#endif
+
+#endif /* __ARM_KVM_H__ */

From 068d803462e03f87953424ea6f3910578f3f293e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:15:34 +0000
Subject: [PATCH 0448/1185] arm64: KVM: system register handling

Provide 64bit system register handling, modeled after the cp15
handling for ARM.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 7c8c5e6a9101ea57a1c2c9faff0917e79251a21e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_coproc.h |  51 ++
 arch/arm64/include/uapi/asm/kvm.h   |  29 +
 arch/arm64/kvm/sys_regs.c           | 883 ++++++++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.h           | 138 +++++
 include/uapi/linux/kvm.h            |   1 +
 5 files changed, 1102 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_coproc.h
 create mode 100644 arch/arm64/kvm/sys_regs.c
 create mode 100644 arch/arm64/kvm/sys_regs.h

diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
new file mode 100644
index 000000000000..9b4477acb554
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_coproc.h
+ * Copyright (C) 2012 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_COPROC_H__
+#define __ARM64_KVM_COPROC_H__
+
+#include <linux/kvm_host.h>
+
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
+
+struct kvm_sys_reg_table {
+	const struct sys_reg_desc *table;
+	size_t num;
+};
+
+struct kvm_sys_reg_target_table {
+	struct kvm_sys_reg_table table64;
+};
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+				       struct kvm_sys_reg_target_table *table);
+
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+#define kvm_coproc_table_init kvm_sys_reg_table_init
+void kvm_sys_reg_table_init(void);
+
+struct kvm_one_reg;
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_COPROC_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 4e64570a20c9..ebac919dc0ca 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -92,6 +92,35 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
+#define KVM_REG_ARM_COPROC_SHIFT	16
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / sizeof(__u32))
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX		(0x0011 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK	0x000000000000FF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT	8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR	(0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK	0x00000000000000FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT	0
+
+/* AArch64 system registers */
+#define KVM_REG_ARM64_SYSREG		(0x0013 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM64_SYSREG_OP0_MASK	0x000000000000c000
+#define KVM_REG_ARM64_SYSREG_OP0_SHIFT	14
+#define KVM_REG_ARM64_SYSREG_OP1_MASK	0x0000000000003800
+#define KVM_REG_ARM64_SYSREG_OP1_SHIFT	11
+#define KVM_REG_ARM64_SYSREG_CRN_MASK	0x0000000000000780
+#define KVM_REG_ARM64_SYSREG_CRN_SHIFT	7
+#define KVM_REG_ARM64_SYSREG_CRM_MASK	0x0000000000000078
+#define KVM_REG_ARM64_SYSREG_CRM_SHIFT	3
+#define KVM_REG_ARM64_SYSREG_OP2_MASK	0x0000000000000007
+#define KVM_REG_ARM64_SYSREG_OP2_SHIFT	0
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
 #define KVM_ARM_IRQ_TYPE_MASK		0xff
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
new file mode 100644
index 000000000000..52fff0ae3442
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.c
@@ -0,0 +1,883 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.com.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <trace/events/kvm.h>
+
+#include "sys_regs.h"
+
+/*
+ * All of this file is extremly similar to the ARM coproc.c, but the
+ * types are different. My gut feeling is that it should be pretty
+ * easy to merge, but that would be an ABI breakage -- again. VFP
+ * would also need to be abstracted.
+ */
+
+/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
+static u32 cache_levels;
+
+/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
+#define CSSELR_MAX 12
+
+/* Which cache CCSIDR represents depends on CSSELR value. */
+static u32 get_ccsidr(u32 csselr)
+{
+	u32 ccsidr;
+
+	/* Make sure noone else changes CSSELR during this! */
+	local_irq_disable();
+	/* Put value into CSSELR */
+	asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+	isb();
+	/* Read result out of CCSIDR */
+	asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr));
+	local_irq_enable();
+
+	return ccsidr;
+}
+
+static void do_dc_cisw(u32 val)
+{
+	asm volatile("dc cisw, %x0" : : "r" (val));
+	dsb();
+}
+
+static void do_dc_csw(u32 val)
+{
+	asm volatile("dc csw, %x0" : : "r" (val));
+	dsb();
+}
+
+/* See note at ARM ARM B1.14.4 */
+static bool access_dcsw(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	unsigned long val;
+	int cpu;
+
+	if (!p->is_write)
+		return read_from_write_only(vcpu, p);
+
+	cpu = get_cpu();
+
+	cpumask_setall(&vcpu->arch.require_dcache_flush);
+	cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
+
+	/* If we were already preempted, take the long way around */
+	if (cpu != vcpu->arch.last_pcpu) {
+		flush_cache_all();
+		goto done;
+	}
+
+	val = *vcpu_reg(vcpu, p->Rt);
+
+	switch (p->CRm) {
+	case 6:			/* Upgrade DCISW to DCCISW, as per HCR.SWIO */
+	case 14:		/* DCCISW */
+		do_dc_cisw(val);
+		break;
+
+	case 10:		/* DCCSW */
+		do_dc_csw(val);
+		break;
+	}
+
+done:
+	put_cpu();
+
+	return true;
+}
+
+/*
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters.  Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ */
+static bool pm_fake(struct kvm_vcpu *vcpu,
+		    const struct sys_reg_params *p,
+		    const struct sys_reg_desc *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+	else
+		return read_zero(vcpu, p);
+}
+
+static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 amair;
+
+	asm volatile("mrs %0, amair_el1\n" : "=r" (amair));
+	vcpu_sys_reg(vcpu, AMAIR_EL1) = amair;
+}
+
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	/*
+	 * Simply map the vcpu_id into the Aff0 field of the MPIDR.
+	 */
+	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
+}
+
+/*
+ * Architected system registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc sys_reg_descs[] = {
+	/* DC ISW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010),
+	  access_dcsw },
+	/* DC CSW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010),
+	  access_dcsw },
+	/* DC CISW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
+	  access_dcsw },
+
+	/* MPIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101),
+	  NULL, reset_mpidr, MPIDR_EL1 },
+	/* SCTLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, SCTLR_EL1, 0x00C50078 },
+	/* CPACR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
+	  NULL, reset_val, CPACR_EL1, 0 },
+	/* TTBR0_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, TTBR0_EL1 },
+	/* TTBR1_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
+	  NULL, reset_unknown, TTBR1_EL1 },
+	/* TCR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
+	  NULL, reset_val, TCR_EL1, 0 },
+
+	/* AFSR0_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
+	  NULL, reset_unknown, AFSR0_EL1 },
+	/* AFSR1_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
+	  NULL, reset_unknown, AFSR1_EL1 },
+	/* ESR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
+	  NULL, reset_unknown, ESR_EL1 },
+	/* FAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, FAR_EL1 },
+
+	/* PMINTENSET_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
+	  pm_fake },
+	/* PMINTENCLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
+	  pm_fake },
+
+	/* MAIR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
+	  NULL, reset_unknown, MAIR_EL1 },
+	/* AMAIR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
+	  NULL, reset_amair_el1, AMAIR_EL1 },
+
+	/* VBAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, VBAR_EL1, 0 },
+	/* CONTEXTIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
+	  NULL, reset_val, CONTEXTIDR_EL1, 0 },
+	/* TPIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
+	  NULL, reset_unknown, TPIDR_EL1 },
+
+	/* CNTKCTL_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000),
+	  NULL, reset_val, CNTKCTL_EL1, 0},
+
+	/* CSSELR_EL1 */
+	{ Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, CSSELR_EL1 },
+
+	/* PMCR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
+	  pm_fake },
+	/* PMCNTENSET_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
+	  pm_fake },
+	/* PMCNTENCLR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
+	  pm_fake },
+	/* PMOVSCLR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
+	  pm_fake },
+	/* PMSWINC_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
+	  pm_fake },
+	/* PMSELR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
+	  pm_fake },
+	/* PMCEID0_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
+	  pm_fake },
+	/* PMCEID1_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
+	  pm_fake },
+	/* PMCCNTR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
+	  pm_fake },
+	/* PMXEVTYPER_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
+	  pm_fake },
+	/* PMXEVCNTR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
+	  pm_fake },
+	/* PMUSERENR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
+	  pm_fake },
+	/* PMOVSSET_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
+	  pm_fake },
+
+	/* TPIDR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
+	  NULL, reset_unknown, TPIDR_EL0 },
+	/* TPIDRRO_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
+	  NULL, reset_unknown, TPIDRRO_EL0 },
+};
+
+/* Target specific emulation tables */
+static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+				       struct kvm_sys_reg_target_table *table)
+{
+	target_tables[target] = table;
+}
+
+/* Get specific register table for this target. */
+static const struct sys_reg_desc *get_target_table(unsigned target, size_t *num)
+{
+	struct kvm_sys_reg_target_table *table;
+
+	table = target_tables[target];
+	*num = table->table64.num;
+	return table->table64.table;
+}
+
+static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
+					 const struct sys_reg_desc table[],
+					 unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		const struct sys_reg_desc *r = &table[i];
+
+		if (params->Op0 != r->Op0)
+			continue;
+		if (params->Op1 != r->Op1)
+			continue;
+		if (params->CRn != r->CRn)
+			continue;
+		if (params->CRm != r->CRm)
+			continue;
+		if (params->Op2 != r->Op2)
+			continue;
+
+		return r;
+	}
+	return NULL;
+}
+
+static int emulate_sys_reg(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *params)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+
+	table = get_target_table(vcpu->arch.target, &num);
+
+	/* Search target-specific then generic table. */
+	r = find_reg(params, table, num);
+	if (!r)
+		r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	if (likely(r)) {
+		/*
+		 * Not having an accessor means that we have
+		 * configured a trap that we don't know how to
+		 * handle. This certainly qualifies as a gross bug
+		 * that should be fixed right away.
+		 */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+		/* If access function fails, it should complain. */
+	} else {
+		kvm_err("Unsupported guest sys_reg access at: %lx\n",
+			*vcpu_pc(vcpu));
+		print_sys_reg_instr(params);
+	}
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
+			      const struct sys_reg_desc *table, size_t num)
+{
+	unsigned long i;
+
+	for (i = 0; i < num; i++)
+		if (table[i].reset)
+			table[i].reset(vcpu, &table[i]);
+}
+
+/**
+ * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+
+	params.Op0 = (esr >> 20) & 3;
+	params.Op1 = (esr >> 14) & 0x7;
+	params.CRn = (esr >> 10) & 0xf;
+	params.CRm = (esr >> 1) & 0xf;
+	params.Op2 = (esr >> 17) & 0x7;
+	params.Rt = (esr >> 5) & 0x1f;
+	params.is_write = !(esr & 1);
+
+	return emulate_sys_reg(vcpu, &params);
+}
+
+/******************************************************************************
+ * Userspace API
+ *****************************************************************************/
+
+static bool index_to_params(u64 id, struct sys_reg_params *params)
+{
+	switch (id & KVM_REG_SIZE_MASK) {
+	case KVM_REG_SIZE_U64:
+		/* Any unused index bits means it's not valid. */
+		if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
+			      | KVM_REG_ARM_COPROC_MASK
+			      | KVM_REG_ARM64_SYSREG_OP0_MASK
+			      | KVM_REG_ARM64_SYSREG_OP1_MASK
+			      | KVM_REG_ARM64_SYSREG_CRN_MASK
+			      | KVM_REG_ARM64_SYSREG_CRM_MASK
+			      | KVM_REG_ARM64_SYSREG_OP2_MASK))
+			return false;
+		params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP0_SHIFT);
+		params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP1_SHIFT);
+		params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK)
+			       >> KVM_REG_ARM64_SYSREG_CRN_SHIFT);
+		params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK)
+			       >> KVM_REG_ARM64_SYSREG_CRM_SHIFT);
+		params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP2_SHIFT);
+		return true;
+	default:
+		return false;
+	}
+}
+
+/* Decode an index value, and find the sys_reg_desc entry. */
+static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
+						    u64 id)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+	struct sys_reg_params params;
+
+	/* We only do sys_reg for now. */
+	if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
+		return NULL;
+
+	if (!index_to_params(id, &params))
+		return NULL;
+
+	table = get_target_table(vcpu->arch.target, &num);
+	r = find_reg(&params, table, num);
+	if (!r)
+		r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	/* Not saved in the sys_reg array? */
+	if (r && !r->reg)
+		r = NULL;
+
+	return r;
+}
+
+/*
+ * These are the invariant sys_reg registers: we let the guest see the
+ * host versions of these, so they're part of the guest state.
+ *
+ * A future CPU may provide a mechanism to present different values to
+ * the guest, or a future kvm may trap them.
+ */
+
+#define FUNCTION_INVARIANT(reg)						\
+	static void get_##reg(struct kvm_vcpu *v,			\
+			      const struct sys_reg_desc *r)		\
+	{								\
+		u64 val;						\
+									\
+		asm volatile("mrs %0, " __stringify(reg) "\n"		\
+			     : "=r" (val));				\
+		((struct sys_reg_desc *)r)->val = val;			\
+	}
+
+FUNCTION_INVARIANT(midr_el1)
+FUNCTION_INVARIANT(ctr_el0)
+FUNCTION_INVARIANT(revidr_el1)
+FUNCTION_INVARIANT(id_pfr0_el1)
+FUNCTION_INVARIANT(id_pfr1_el1)
+FUNCTION_INVARIANT(id_dfr0_el1)
+FUNCTION_INVARIANT(id_afr0_el1)
+FUNCTION_INVARIANT(id_mmfr0_el1)
+FUNCTION_INVARIANT(id_mmfr1_el1)
+FUNCTION_INVARIANT(id_mmfr2_el1)
+FUNCTION_INVARIANT(id_mmfr3_el1)
+FUNCTION_INVARIANT(id_isar0_el1)
+FUNCTION_INVARIANT(id_isar1_el1)
+FUNCTION_INVARIANT(id_isar2_el1)
+FUNCTION_INVARIANT(id_isar3_el1)
+FUNCTION_INVARIANT(id_isar4_el1)
+FUNCTION_INVARIANT(id_isar5_el1)
+FUNCTION_INVARIANT(clidr_el1)
+FUNCTION_INVARIANT(aidr_el1)
+
+/* ->val is filled in by kvm_sys_reg_table_init() */
+static struct sys_reg_desc invariant_sys_regs[] = {
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, get_midr_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110),
+	  NULL, get_revidr_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000),
+	  NULL, get_id_pfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001),
+	  NULL, get_id_pfr1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010),
+	  NULL, get_id_dfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011),
+	  NULL, get_id_afr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100),
+	  NULL, get_id_mmfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101),
+	  NULL, get_id_mmfr1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110),
+	  NULL, get_id_mmfr2_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111),
+	  NULL, get_id_mmfr3_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+	  NULL, get_id_isar0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001),
+	  NULL, get_id_isar1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+	  NULL, get_id_isar2_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011),
+	  NULL, get_id_isar3_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100),
+	  NULL, get_id_isar4_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101),
+	  NULL, get_id_isar5_el1 },
+	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001),
+	  NULL, get_clidr_el1 },
+	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111),
+	  NULL, get_aidr_el1 },
+	{ Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001),
+	  NULL, get_ctr_el0 },
+};
+
+static int reg_from_user(void *val, const void __user *uaddr, u64 id)
+{
+	/* This Just Works because we are little endian. */
+	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
+static int reg_to_user(void __user *uaddr, const void *val, u64 id)
+{
+	/* This Just Works because we are little endian. */
+	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
+static int get_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+	struct sys_reg_params params;
+	const struct sys_reg_desc *r;
+
+	if (!index_to_params(id, &params))
+		return -ENOENT;
+
+	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+	if (!r)
+		return -ENOENT;
+
+	return reg_to_user(uaddr, &r->val, id);
+}
+
+static int set_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+	struct sys_reg_params params;
+	const struct sys_reg_desc *r;
+	int err;
+	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+
+	if (!index_to_params(id, &params))
+		return -ENOENT;
+	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+	if (!r)
+		return -ENOENT;
+
+	err = reg_from_user(&val, uaddr, id);
+	if (err)
+		return err;
+
+	/* This is what we mean by invariant: you can't change it. */
+	if (r->val != val)
+		return -EINVAL;
+
+	return 0;
+}
+
+static bool is_valid_cache(u32 val)
+{
+	u32 level, ctype;
+
+	if (val >= CSSELR_MAX)
+		return -ENOENT;
+
+	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
+	level = (val >> 1);
+	ctype = (cache_levels >> (level * 3)) & 7;
+
+	switch (ctype) {
+	case 0: /* No cache */
+		return false;
+	case 1: /* Instruction cache only */
+		return (val & 1);
+	case 2: /* Data cache only */
+	case 4: /* Unified cache */
+		return !(val & 1);
+	case 3: /* Separate instruction and data caches */
+		return true;
+	default: /* Reserved: we can't know instruction or data. */
+		return false;
+	}
+}
+
+static int demux_c15_get(u64 id, void __user *uaddr)
+{
+	u32 val;
+	u32 __user *uval = uaddr;
+
+	/* Fail if we have unknown bits set. */
+	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+		return -ENOENT;
+
+	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+		if (KVM_REG_SIZE(id) != 4)
+			return -ENOENT;
+		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+		if (!is_valid_cache(val))
+			return -ENOENT;
+
+		return put_user(get_ccsidr(val), uval);
+	default:
+		return -ENOENT;
+	}
+}
+
+static int demux_c15_set(u64 id, void __user *uaddr)
+{
+	u32 val, newval;
+	u32 __user *uval = uaddr;
+
+	/* Fail if we have unknown bits set. */
+	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+		return -ENOENT;
+
+	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+		if (KVM_REG_SIZE(id) != 4)
+			return -ENOENT;
+		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+		if (!is_valid_cache(val))
+			return -ENOENT;
+
+		if (get_user(newval, uval))
+			return -EFAULT;
+
+		/* This is also invariant: you can't change it. */
+		if (newval != get_ccsidr(val))
+			return -EINVAL;
+		return 0;
+	default:
+		return -ENOENT;
+	}
+}
+
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	const struct sys_reg_desc *r;
+	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return demux_c15_get(reg->id, uaddr);
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+		return -ENOENT;
+
+	r = index_to_sys_reg_desc(vcpu, reg->id);
+	if (!r)
+		return get_invariant_sys_reg(reg->id, uaddr);
+
+	return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
+}
+
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	const struct sys_reg_desc *r;
+	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return demux_c15_set(reg->id, uaddr);
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+		return -ENOENT;
+
+	r = index_to_sys_reg_desc(vcpu, reg->id);
+	if (!r)
+		return set_invariant_sys_reg(reg->id, uaddr);
+
+	return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+}
+
+static unsigned int num_demux_regs(void)
+{
+	unsigned int i, count = 0;
+
+	for (i = 0; i < CSSELR_MAX; i++)
+		if (is_valid_cache(i))
+			count++;
+
+	return count;
+}
+
+static int write_demux_regids(u64 __user *uindices)
+{
+	u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+	unsigned int i;
+
+	val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
+	for (i = 0; i < CSSELR_MAX; i++) {
+		if (!is_valid_cache(i))
+			continue;
+		if (put_user(val | i, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+	return 0;
+}
+
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg)
+{
+	return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |
+		KVM_REG_ARM64_SYSREG |
+		(reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) |
+		(reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) |
+		(reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) |
+		(reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) |
+		(reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT));
+}
+
+static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
+{
+	if (!*uind)
+		return true;
+
+	if (put_user(sys_reg_to_index(reg), *uind))
+		return false;
+
+	(*uind)++;
+	return true;
+}
+
+/* Assumed ordered tables, see kvm_sys_reg_table_init. */
+static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
+{
+	const struct sys_reg_desc *i1, *i2, *end1, *end2;
+	unsigned int total = 0;
+	size_t num;
+
+	/* We check for duplicates here, to allow arch-specific overrides. */
+	i1 = get_target_table(vcpu->arch.target, &num);
+	end1 = i1 + num;
+	i2 = sys_reg_descs;
+	end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
+
+	BUG_ON(i1 == end1 || i2 == end2);
+
+	/* Walk carefully, as both tables may refer to the same register. */
+	while (i1 || i2) {
+		int cmp = cmp_sys_reg(i1, i2);
+		/* target-specific overrides generic entry. */
+		if (cmp <= 0) {
+			/* Ignore registers we trap but don't save. */
+			if (i1->reg) {
+				if (!copy_reg_to_user(i1, &uind))
+					return -EFAULT;
+				total++;
+			}
+		} else {
+			/* Ignore registers we trap but don't save. */
+			if (i2->reg) {
+				if (!copy_reg_to_user(i2, &uind))
+					return -EFAULT;
+				total++;
+			}
+		}
+
+		if (cmp <= 0 && ++i1 == end1)
+			i1 = NULL;
+		if (cmp >= 0 && ++i2 == end2)
+			i2 = NULL;
+	}
+	return total;
+}
+
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu)
+{
+	return ARRAY_SIZE(invariant_sys_regs)
+		+ num_demux_regs()
+		+ walk_sys_regs(vcpu, (u64 __user *)NULL);
+}
+
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	unsigned int i;
+	int err;
+
+	/* Then give them all the invariant registers' indices. */
+	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) {
+		if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	err = walk_sys_regs(vcpu, uindices);
+	if (err < 0)
+		return err;
+	uindices += err;
+
+	return write_demux_regids(uindices);
+}
+
+void kvm_sys_reg_table_init(void)
+{
+	unsigned int i;
+	struct sys_reg_desc clidr;
+
+	/* Make sure tables are unique and in order. */
+	for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++)
+		BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0);
+
+	/* We abuse the reset function to overwrite the table itself. */
+	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
+		invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
+
+	/*
+	 * CLIDR format is awkward, so clean it up.  See ARM B4.1.20:
+	 *
+	 *   If software reads the Cache Type fields from Ctype1
+	 *   upwards, once it has seen a value of 0b000, no caches
+	 *   exist at further-out levels of the hierarchy. So, for
+	 *   example, if Ctype3 is the first Cache Type field with a
+	 *   value of 0b000, the values of Ctype4 to Ctype7 must be
+	 *   ignored.
+	 */
+	get_clidr_el1(NULL, &clidr); /* Ugly... */
+	cache_levels = clidr.val;
+	for (i = 0; i < 7; i++)
+		if (((cache_levels >> (i*3)) & 7) == 0)
+			break;
+	/* Clear all higher bits. */
+	cache_levels &= (1 << (i*3))-1;
+}
+
+/**
+ * kvm_reset_sys_regs - sets system registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architecturally defined reset values.
+ */
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
+{
+	size_t num;
+	const struct sys_reg_desc *table;
+
+	/* Catch someone adding a register without putting in reset entry. */
+	memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs));
+
+	/* Generic chip reset first (so target could override). */
+	reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	table = get_target_table(vcpu->arch.target, &num);
+	reset_sys_reg_descs(vcpu, table, num);
+
+	for (num = 1; num < NR_SYS_REGS; num++)
+		if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
+			panic("Didn't reset vcpu_sys_reg(%zi)", num);
+}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
new file mode 100644
index 000000000000..d50d3722998e
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__
+#define __ARM64_KVM_SYS_REGS_LOCAL_H__
+
+struct sys_reg_params {
+	u8	Op0;
+	u8	Op1;
+	u8	CRn;
+	u8	CRm;
+	u8	Op2;
+	u8	Rt;
+	bool	is_write;
+};
+
+struct sys_reg_desc {
+	/* MRS/MSR instruction which accesses it. */
+	u8	Op0;
+	u8	Op1;
+	u8	CRn;
+	u8	CRm;
+	u8	Op2;
+
+	/* Trapped access from guest, if non-NULL. */
+	bool (*access)(struct kvm_vcpu *,
+		       const struct sys_reg_params *,
+		       const struct sys_reg_desc *);
+
+	/* Initialization for vcpu. */
+	void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
+
+	/* Index into sys_reg[], or 0 if we don't need to save it. */
+	int reg;
+
+	/* Value (usually reset value) */
+	u64 val;
+};
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+	/* Look, we even formatted it for you to paste into the table! */
+	kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+		      p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+}
+
+static inline bool ignore_write(struct kvm_vcpu *vcpu,
+				const struct sys_reg_params *p)
+{
+	return true;
+}
+
+static inline bool read_zero(struct kvm_vcpu *vcpu,
+			     const struct sys_reg_params *p)
+{
+	*vcpu_reg(vcpu, p->Rt) = 0;
+	return true;
+}
+
+static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
+				      const struct sys_reg_params *params)
+{
+	kvm_debug("sys_reg write to read-only register at: %lx\n",
+		  *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	return false;
+}
+
+static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
+					const struct sys_reg_params *params)
+{
+	kvm_debug("sys_reg read to write-only register at: %lx\n",
+		  *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	return false;
+}
+
+/* Reset functions */
+static inline void reset_unknown(struct kvm_vcpu *vcpu,
+				 const struct sys_reg_desc *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= NR_SYS_REGS);
+	vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+}
+
+static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= NR_SYS_REGS);
+	vcpu_sys_reg(vcpu, r->reg) = r->val;
+}
+
+static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
+			      const struct sys_reg_desc *i2)
+{
+	BUG_ON(i1 == i2);
+	if (!i1)
+		return 1;
+	else if (!i2)
+		return -1;
+	if (i1->Op0 != i2->Op0)
+		return i1->Op0 - i2->Op0;
+	if (i1->Op1 != i2->Op1)
+		return i1->Op1 - i2->Op1;
+	if (i1->CRn != i2->CRn)
+		return i1->CRn - i2->CRn;
+	if (i1->CRm != i2->CRm)
+		return i1->CRm - i2->CRm;
+	return i1->Op2 - i2->Op2;
+}
+
+
+#define Op0(_x) 	.Op0 = _x
+#define Op1(_x) 	.Op1 = _x
+#define CRn(_x)		.CRn = _x
+#define CRm(_x) 	.CRm = _x
+#define Op2(_x) 	.Op2 = _x
+
+#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d88c8ee00c8b..97277d333e82 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -783,6 +783,7 @@ struct kvm_dirty_tlb {
 #define KVM_REG_IA64		0x3000000000000000ULL
 #define KVM_REG_ARM		0x4000000000000000ULL
 #define KVM_REG_S390		0x5000000000000000ULL
+#define KVM_REG_ARM64		0x6000000000000000ULL
 #define KVM_REG_MIPS		0x7000000000000000ULL
 
 #define KVM_REG_SIZE_SHIFT	52

From 3107348ebf0b00722a27b7a4a32c1f4c34fdb1e2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Feb 2013 17:30:48 +0000
Subject: [PATCH 0449/1185] arm64: KVM: CPU specific system registers handling

Add the support code for CPU specific system registers. Not much
here yet.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b990a9d3152bddca62cc1f8bf80518430b98737b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs_generic_v8.c | 85 ++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 arch/arm64/kvm/sys_regs_generic_v8.c

diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
new file mode 100644
index 000000000000..d4e803907312
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/coproc_a15.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kvm_host.h>
+#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <linux/init.h>
+
+#include "sys_regs.h"
+
+static bool access_actlr(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1);
+	return true;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 actlr;
+
+	asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr));
+	vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr;
+}
+
+/*
+ * Implementation specific sys-reg registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc genericv8_sys_regs[] = {
+	/* ACTLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+	  access_actlr, reset_actlr, ACTLR_EL1 },
+};
+
+static struct kvm_sys_reg_target_table genericv8_target_table = {
+	.table64 = {
+		.table = genericv8_sys_regs,
+		.num = ARRAY_SIZE(genericv8_sys_regs),
+	},
+};
+
+static int __init sys_reg_genericv8_init(void)
+{
+	unsigned int i;
+
+	for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++)
+		BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1],
+			       &genericv8_sys_regs[i]) >= 0);
+
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
+					  &genericv8_target_table);
+	return 0;
+}
+late_initcall(sys_reg_genericv8_init);

From cac664305a439463255eb7954ecd375b39f4299f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:23:59 +0000
Subject: [PATCH 0450/1185] arm64: KVM: virtual CPU reset

Provide the reset code for a virtual CPU booted in 64bit mode.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f4672752c321ea36ce099cebdd7a082a8f327505)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/reset.c | 76 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 arch/arm64/kvm/reset.c

diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
new file mode 100644
index 000000000000..f6536a06231a
--- /dev/null
+++ b/arch/arm64/kvm/reset.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/reset.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+#include <asm/cputype.h>
+#include <asm/ptrace.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_coproc.h>
+
+/*
+ * ARMv8 Reset Values
+ */
+static const struct kvm_regs default_regs_reset = {
+	.regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
+			PSR_F_BIT | PSR_D_BIT),
+};
+
+int kvm_arch_dev_ioctl_check_extension(long ext)
+{
+	int r;
+
+	switch (ext) {
+	default:
+		r = 0;
+	}
+
+	return r;
+}
+
+/**
+ * kvm_reset_vcpu - sets core registers and sys_regs to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on
+ * the virtual CPU struct to their architectually defined reset
+ * values.
+ */
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+	const struct kvm_regs *cpu_reset;
+
+	switch (vcpu->arch.target) {
+	default:
+		cpu_reset = &default_regs_reset;
+		break;
+	}
+
+	/* Reset core registers */
+	memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
+
+	/* Reset system registers */
+	kvm_reset_sys_regs(vcpu);
+
+	return 0;
+}

From 913d79134dee46fc407ad2f3819bb5dce2f47e38 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:29:28 +0000
Subject: [PATCH 0451/1185] arm64: KVM: kvm_arch and kvm_vcpu_arch definitions

Provide the architecture dependent structures for VM and
vcpu abstractions.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 4f8d6632ec71372a3b8dbb4775662c2c9025d173)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 186 ++++++++++++++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_host.h

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
new file mode 100644
index 000000000000..4a2622f5e81f
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_host.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HOST_H__
+#define __ARM64_KVM_HOST_H__
+
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmio.h>
+
+#define KVM_MAX_VCPUS 4
+#define KVM_USER_MEM_SLOTS 32
+#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#include <kvm/arm_vgic.h>
+#include <kvm/arm_arch_timer.h>
+
+#define KVM_VCPU_MAX_FEATURES 0
+
+/* We don't currently support large pages. */
+#define KVM_HPAGE_GFN_SHIFT(x)	0
+#define KVM_NR_PAGE_SIZES	1
+#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
+
+struct kvm_vcpu;
+int kvm_target_cpu(void);
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+int kvm_arch_dev_ioctl_check_extension(long ext);
+
+struct kvm_arch {
+	/* The VMID generation used for the virt. memory system */
+	u64    vmid_gen;
+	u32    vmid;
+
+	/* 1-level 2nd stage table and lock */
+	spinlock_t pgd_lock;
+	pgd_t *pgd;
+
+	/* VTTBR value associated with above pgd and vmid */
+	u64    vttbr;
+
+	/* Interrupt controller */
+	struct vgic_dist	vgic;
+
+	/* Timer */
+	struct arch_timer_kvm	timer;
+};
+
+#define KVM_NR_MEM_OBJS     40
+
+/*
+ * We don't want allocation failures within the mmu code, so we preallocate
+ * enough memory for a single page fault in a cache.
+ */
+struct kvm_mmu_memory_cache {
+	int nobjs;
+	void *objects[KVM_NR_MEM_OBJS];
+};
+
+struct kvm_vcpu_fault_info {
+	u32 esr_el2;		/* Hyp Syndrom Register */
+	u64 far_el2;		/* Hyp Fault Address Register */
+	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
+};
+
+struct kvm_cpu_context {
+	struct kvm_regs	gp_regs;
+	u64 sys_regs[NR_SYS_REGS];
+};
+
+typedef struct kvm_cpu_context kvm_cpu_context_t;
+
+struct kvm_vcpu_arch {
+	struct kvm_cpu_context ctxt;
+
+	/* HYP configuration */
+	u64 hcr_el2;
+
+	/* Exception Information */
+	struct kvm_vcpu_fault_info fault;
+
+	/* Pointer to host CPU context */
+	kvm_cpu_context_t *host_cpu_context;
+
+	/* VGIC state */
+	struct vgic_cpu vgic_cpu;
+	struct arch_timer_cpu timer_cpu;
+
+	/*
+	 * Anything that is not used directly from assembly code goes
+	 * here.
+	 */
+	/* dcache set/way operation pending */
+	int last_pcpu;
+	cpumask_t require_dcache_flush;
+
+	/* Don't run the guest */
+	bool pause;
+
+	/* IO related fields */
+	struct kvm_decode mmio_decode;
+
+	/* Interrupt related fields */
+	u64 irq_lines;		/* IRQ and FIQ levels */
+
+	/* Cache some mmu pages needed inside spinlock regions */
+	struct kvm_mmu_memory_cache mmu_page_cache;
+
+	/* Target CPU and feature flags */
+	u32 target;
+	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
+
+	/* Detect first run of a vcpu */
+	bool has_run_once;
+};
+
+#define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
+#define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
+#define vcpu_cp15(v,r)		((v)->arch.ctxt.cp15[(r)])
+
+struct kvm_vm_stat {
+	u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+	u32 halt_wakeup;
+};
+
+struct kvm_vcpu_init;
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			const struct kvm_vcpu_init *init);
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+struct kvm_one_reg;
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm,
+			unsigned long start, unsigned long end);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
+struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+
+u64 kvm_call_hyp(void *hypfn, ...);
+
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		int exception_index);
+
+int kvm_perf_init(void);
+int kvm_perf_teardown(void);
+
+#endif /* __ARM64_KVM_HOST_H__ */

From 56a8fbf4b8d368562273336cf8f43068aaed91a3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:29:50 +0000
Subject: [PATCH 0452/1185] arm64: KVM: MMIO access backend

Define the necessary structures to perform an MMIO access.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit d7246bf3571a82834984a42db52261525bc11159)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_mmio.h | 59 +++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_mmio.h

diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
new file mode 100644
index 000000000000..fc2f689c0694
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMIO_H__
+#define __ARM64_KVM_MMIO_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+/*
+ * This is annoying. The mmio code requires this, even if we don't
+ * need any decoding. To be fixed.
+ */
+struct kvm_decode {
+	unsigned long rt;
+	bool sign_extend;
+};
+
+/*
+ * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
+ * which is an anonymous type. Use our own type instead.
+ */
+struct kvm_exit_mmio {
+	phys_addr_t	phys_addr;
+	u8		data[8];
+	u32		len;
+	bool		is_write;
+};
+
+static inline void kvm_prepare_mmio(struct kvm_run *run,
+				    struct kvm_exit_mmio *mmio)
+{
+	run->mmio.phys_addr	= mmio->phys_addr;
+	run->mmio.len		= mmio->len;
+	run->mmio.is_write	= mmio->is_write;
+	memcpy(run->mmio.data, mmio->data, mmio->len);
+	run->exit_reason	= KVM_EXIT_MMIO;
+}
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		 phys_addr_t fault_ipa);
+
+#endif	/* __ARM64_KVM_MMIO_H__ */

From f98733fb49495d20251f55dc1ddff88f5761a797 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:37:02 +0000
Subject: [PATCH 0453/1185] arm64: KVM: guest one-reg interface

Let userspace play with the guest registers.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 2f4a07c5f9fe4a5cdb9867e1e2fcab3165846ea7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/guest.c | 259 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 259 insertions(+)
 create mode 100644 arch/arm64/kvm/guest.c

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
new file mode 100644
index 000000000000..3d7518a7ebaa
--- /dev/null
+++ b/arch/arm64/kvm/guest.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/guest.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputype.h>
+#include <asm/uaccess.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ NULL }
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+	return 0;
+}
+
+static u64 core_reg_offset_from_id(u64 id)
+{
+	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
+}
+
+static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/*
+	 * Because the kvm_regs structure is a mix of 32, 64 and
+	 * 128bit fields, we index it as if it was a 32bit
+	 * array. Hence below, nr_regs is the number of entries, and
+	 * off the index in the "array".
+	 */
+	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+	struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+	int nr_regs = sizeof(*regs) / sizeof(__u32);
+	u32 off;
+
+	/* Our ID is an index into the kvm_regs struct. */
+	off = core_reg_offset_from_id(reg->id);
+	if (off >= nr_regs ||
+	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+		return -ENOENT;
+
+	if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+	struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+	int nr_regs = sizeof(*regs) / sizeof(__u32);
+	__uint128_t tmp;
+	void *valp = &tmp;
+	u64 off;
+	int err = 0;
+
+	/* Our ID is an index into the kvm_regs struct. */
+	off = core_reg_offset_from_id(reg->id);
+	if (off >= nr_regs ||
+	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+		return -ENOENT;
+
+	if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
+		return -EINVAL;
+
+	if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
+		u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK;
+		switch (mode) {
+		case PSR_MODE_EL0t:
+		case PSR_MODE_EL1t:
+		case PSR_MODE_EL1h:
+			break;
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+out:
+	return err;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	return -EINVAL;
+}
+
+static unsigned long num_core_regs(void)
+{
+	return sizeof(struct kvm_regs) / sizeof(__u32);
+}
+
+/**
+ * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
+{
+	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu);
+}
+
+/**
+ * kvm_arm_copy_reg_indices - get indices of all registers.
+ *
+ * We do core registers right here, then we apppend system regs.
+ */
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	unsigned int i;
+	const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+
+	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
+		if (put_user(core_reg | i, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
+}
+
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/* We currently use nothing arch-specific in upper 32 bits */
+	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+		return -EINVAL;
+
+	/* Register group 16 means we want a core register. */
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+		return get_core_reg(vcpu, reg);
+
+	return kvm_arm_sys_reg_get_reg(vcpu, reg);
+}
+
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/* We currently use nothing arch-specific in upper 32 bits */
+	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+		return -EINVAL;
+
+	/* Register group 16 means we set a core register. */
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+		return set_core_reg(vcpu, reg);
+
+	return kvm_arm_sys_reg_set_reg(vcpu, reg);
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int __attribute_const__ kvm_target_cpu(void)
+{
+	unsigned long implementor = read_cpuid_implementor();
+	unsigned long part_number = read_cpuid_part_number();
+
+	if (implementor != ARM_CPU_IMP_ARM)
+		return -EINVAL;
+
+	switch (part_number) {
+	case ARM_CPU_PART_AEM_V8:
+		return KVM_ARM_TARGET_AEM_V8;
+	case ARM_CPU_PART_FOUNDATION:
+		return KVM_ARM_TARGET_FOUNDATION_V8;
+	case ARM_CPU_PART_CORTEX_A57:
+		/* Currently handled by the generic backend */
+		return KVM_ARM_TARGET_CORTEX_A57;
+	default:
+		return -EINVAL;
+	}
+}
+
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			const struct kvm_vcpu_init *init)
+{
+	unsigned int i;
+	int phys_target = kvm_target_cpu();
+
+	if (init->target != phys_target)
+		return -EINVAL;
+
+	vcpu->arch.target = phys_target;
+	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+
+	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+	for (i = 0; i < sizeof(init->features) * 8; i++) {
+		if (init->features[i / 32] & (1 << (i % 32))) {
+			if (i >= KVM_VCPU_MAX_FEATURES)
+				return -ENOENT;
+			set_bit(i, vcpu->arch.features);
+		}
+	}
+
+	/* Now we know what it is, we can reset it. */
+	return kvm_reset_vcpu(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return -EINVAL;
+}

From f756a17fc0ab844b88c1282cf5df2ffb03185825 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 17 Dec 2012 17:07:52 +0000
Subject: [PATCH 0454/1185] arm64: KVM: hypervisor initialization code

Provide EL2 with page tables and stack, and set the vectors
to point to the full blown world-switch code.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 092bd143cbb481b4ce1d55247a2987eaaf61f967)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  13 ++++
 arch/arm64/kvm/hyp-init.S         | 107 ++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+)
 create mode 100644 arch/arm64/kvm/hyp-init.S

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4a2622f5e81f..2500eb6a4d2a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -183,4 +183,17 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+				       phys_addr_t pgd_ptr,
+				       unsigned long hyp_stack_ptr,
+				       unsigned long vector_ptr)
+{
+	/*
+	 * Call initialization code, and switch to the full blown
+	 * HYP code.
+	 */
+	kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+		     hyp_stack_ptr, vector_ptr);
+}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
new file mode 100644
index 000000000000..ba84e6705e20
--- /dev/null
+++ b/arch/arm64/kvm/hyp-init.S
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.idmap.text, "ax"
+
+	.align	11
+
+ENTRY(__kvm_hyp_init)
+	ventry	__invalid		// Synchronous EL2t
+	ventry	__invalid		// IRQ EL2t
+	ventry	__invalid		// FIQ EL2t
+	ventry	__invalid		// Error EL2t
+
+	ventry	__invalid		// Synchronous EL2h
+	ventry	__invalid		// IRQ EL2h
+	ventry	__invalid		// FIQ EL2h
+	ventry	__invalid		// Error EL2h
+
+	ventry	__do_hyp_init		// Synchronous 64-bit EL1
+	ventry	__invalid		// IRQ 64-bit EL1
+	ventry	__invalid		// FIQ 64-bit EL1
+	ventry	__invalid		// Error 64-bit EL1
+
+	ventry	__invalid		// Synchronous 32-bit EL1
+	ventry	__invalid		// IRQ 32-bit EL1
+	ventry	__invalid		// FIQ 32-bit EL1
+	ventry	__invalid		// Error 32-bit EL1
+
+__invalid:
+	b	.
+
+	/*
+	 * x0: HYP boot pgd
+	 * x1: HYP pgd
+	 * x2: HYP stack
+	 * x3: HYP vectors
+	 */
+__do_hyp_init:
+
+	msr	ttbr0_el2, x0
+
+	mrs	x4, tcr_el1
+	ldr	x5, =TCR_EL2_MASK
+	and	x4, x4, x5
+	ldr	x5, =TCR_EL2_FLAGS
+	orr	x4, x4, x5
+	msr	tcr_el2, x4
+
+	ldr	x4, =VTCR_EL2_FLAGS
+	msr	vtcr_el2, x4
+
+	mrs	x4, mair_el1
+	msr	mair_el2, x4
+	isb
+
+	mov	x4, #SCTLR_EL2_FLAGS
+	msr	sctlr_el2, x4
+	isb
+
+	/* MMU is now enabled. Get ready for the trampoline dance */
+	ldr	x4, =TRAMPOLINE_VA
+	adr	x5, target
+	bfi	x4, x5, #0, #PAGE_SHIFT
+	br	x4
+
+target: /* We're now in the trampoline code, switch page tables */
+	msr	ttbr0_el2, x1
+	isb
+
+	/* Invalidate the old TLBs */
+	tlbi	alle2
+	dsb	sy
+
+	/* Set the stack and new vectors */
+	kern_hyp_va	x2
+	mov	sp, x2
+	kern_hyp_va	x3
+	msr	vbar_el2, x3
+
+	/* Hello, World! */
+	eret
+ENDPROC(__kvm_hyp_init)
+
+	.ltorg
+
+	.popsection

From 066bddb132227937d1f68dc5ec4554786e393345 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:40:18 +0000
Subject: [PATCH 0455/1185] arm64: KVM: HYP mode world switch implementation

The HYP mode world switch in all its glory.

Implements save/restore of host/guest registers, EL2 trapping,
IPA resolution, and additional services (tlb invalidation).

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 55c7401d92e16360e0987afe39355f1eb6300f31)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kernel/asm-offsets.c |  33 ++
 arch/arm64/kvm/hyp.S            | 617 ++++++++++++++++++++++++++++++++
 2 files changed, 650 insertions(+)
 create mode 100644 arch/arm64/kvm/hyp.S

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a2a4d810bea3..49c162c03b69 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -104,5 +104,38 @@ int main(void)
   BLANK();
   DEFINE(TZ_MINWEST,		offsetof(struct timezone, tz_minuteswest));
   DEFINE(TZ_DSTTIME,		offsetof(struct timezone, tz_dsttime));
+  BLANK();
+#ifdef CONFIG_KVM_ARM_HOST
+  DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt));
+  DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
+  DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
+  DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
+  DEFINE(CPU_SP_EL1,		offsetof(struct kvm_regs, sp_el1));
+  DEFINE(CPU_ELR_EL1,		offsetof(struct kvm_regs, elr_el1));
+  DEFINE(CPU_SPSR,		offsetof(struct kvm_regs, spsr));
+  DEFINE(CPU_SYSREGS,		offsetof(struct kvm_cpu_context, sys_regs));
+  DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
+  DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
+  DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
+  DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
+  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
+  DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
+  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
+  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
+  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
+  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
+  DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
+  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
+  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
+  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
+  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
+  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
+  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
+  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
+  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
+  DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
+#endif
   return 0;
 }
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
new file mode 100644
index 000000000000..0b18c2e1e043
--- /dev/null
+++ b/arch/arm64/kvm/hyp.S
@@ -0,0 +1,617 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/fpsimdmacros.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
+#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_SPSR_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_SPSR + 8*x)
+#define CPU_SYSREG_OFFSET(x)	(CPU_SYSREGS + 8*x)
+
+	.text
+	.pushsection	.hyp.text, "ax"
+	.align	PAGE_SHIFT
+
+__kvm_hyp_code_start:
+	.globl __kvm_hyp_code_start
+
+.macro save_common_regs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_XREG_OFFSET(19)
+	stp	x19, x20, [x3]
+	stp	x21, x22, [x3, #16]
+	stp	x23, x24, [x3, #32]
+	stp	x25, x26, [x3, #48]
+	stp	x27, x28, [x3, #64]
+	stp	x29, lr, [x3, #80]
+
+	mrs	x19, sp_el0
+	mrs	x20, elr_el2		// EL1 PC
+	mrs	x21, spsr_el2		// EL1 pstate
+
+	stp	x19, x20, [x3, #96]
+	str	x21, [x3, #112]
+
+	mrs	x22, sp_el1
+	mrs	x23, elr_el1
+	mrs	x24, spsr_el1
+
+	str	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	str	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+	str	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+.endm
+
+.macro restore_common_regs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	ldr	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+	ldr	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+
+	msr	sp_el1, x22
+	msr	elr_el1, x23
+	msr	spsr_el1, x24
+
+	add	x3, x2, #CPU_XREG_OFFSET(31)    // SP_EL0
+	ldp	x19, x20, [x3]
+	ldr	x21, [x3, #16]
+
+	msr	sp_el0, x19
+	msr	elr_el2, x20 				// EL1 PC
+	msr	spsr_el2, x21 				// EL1 pstate
+
+	add	x3, x2, #CPU_XREG_OFFSET(19)
+	ldp	x19, x20, [x3]
+	ldp	x21, x22, [x3, #16]
+	ldp	x23, x24, [x3, #32]
+	ldp	x25, x26, [x3, #48]
+	ldp	x27, x28, [x3, #64]
+	ldp	x29, lr, [x3, #80]
+.endm
+
+.macro save_host_regs
+	save_common_regs
+.endm
+
+.macro restore_host_regs
+	restore_common_regs
+.endm
+
+.macro save_fpsimd
+	// x2: cpu context address
+	// x3, x4: tmp regs
+	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	fpsimd_save x3, 4
+.endm
+
+.macro restore_fpsimd
+	// x2: cpu context address
+	// x3, x4: tmp regs
+	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	fpsimd_restore x3, 4
+.endm
+
+.macro save_guest_regs
+	// x0 is the vcpu address
+	// x1 is the return code, do not corrupt!
+	// x2 is the cpu context
+	// x3 is a tmp register
+	// Guest's x0-x3 are on the stack
+
+	// Compute base to save registers
+	add	x3, x2, #CPU_XREG_OFFSET(4)
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+	stp	x8, x9, [x3, #32]
+	stp	x10, x11, [x3, #48]
+	stp	x12, x13, [x3, #64]
+	stp	x14, x15, [x3, #80]
+	stp	x16, x17, [x3, #96]
+	str	x18, [x3, #112]
+
+	pop	x6, x7			// x2, x3
+	pop	x4, x5			// x0, x1
+
+	add	x3, x2, #CPU_XREG_OFFSET(0)
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	save_common_regs
+.endm
+
+.macro restore_guest_regs
+	// x0 is the vcpu address.
+	// x2 is the cpu context
+	// x3 is a tmp register
+
+	// Prepare x0-x3 for later restore
+	add	x3, x2, #CPU_XREG_OFFSET(0)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	push	x4, x5		// Push x0-x3 on the stack
+	push	x6, x7
+
+	// x4-x18
+	ldp	x4, x5, [x3, #32]
+	ldp	x6, x7, [x3, #48]
+	ldp	x8, x9, [x3, #64]
+	ldp	x10, x11, [x3, #80]
+	ldp	x12, x13, [x3, #96]
+	ldp	x14, x15, [x3, #112]
+	ldp	x16, x17, [x3, #128]
+	ldr	x18, [x3, #144]
+
+	// x19-x29, lr, sp*, elr*, spsr*
+	restore_common_regs
+
+	// Last bits of the 64bit state
+	pop	x2, x3
+	pop	x0, x1
+
+	// Do not touch any register after this!
+.endm
+
+/*
+ * Macros to perform system register save/restore.
+ *
+ * Ordering here is absolutely critical, and must be kept consistent
+ * in {save,restore}_sysregs, {save,restore}_guest_32bit_state,
+ * and in kvm_asm.h.
+ *
+ * In other words, don't touch any of these unless you know what
+ * you are doing.
+ */
+.macro save_sysregs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+	mrs	x4,	vmpidr_el2
+	mrs	x5,	csselr_el1
+	mrs	x6,	sctlr_el1
+	mrs	x7,	actlr_el1
+	mrs	x8,	cpacr_el1
+	mrs	x9,	ttbr0_el1
+	mrs	x10,	ttbr1_el1
+	mrs	x11,	tcr_el1
+	mrs	x12,	esr_el1
+	mrs	x13, 	afsr0_el1
+	mrs	x14,	afsr1_el1
+	mrs	x15,	far_el1
+	mrs	x16,	mair_el1
+	mrs	x17,	vbar_el1
+	mrs	x18,	contextidr_el1
+	mrs	x19,	tpidr_el0
+	mrs	x20,	tpidrro_el0
+	mrs	x21,	tpidr_el1
+	mrs	x22, 	amair_el1
+	mrs	x23, 	cntkctl_el1
+
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+	stp	x8, x9, [x3, #32]
+	stp	x10, x11, [x3, #48]
+	stp	x12, x13, [x3, #64]
+	stp	x14, x15, [x3, #80]
+	stp	x16, x17, [x3, #96]
+	stp	x18, x19, [x3, #112]
+	stp	x20, x21, [x3, #128]
+	stp	x22, x23, [x3, #144]
+.endm
+
+.macro restore_sysregs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	ldp	x8, x9, [x3, #32]
+	ldp	x10, x11, [x3, #48]
+	ldp	x12, x13, [x3, #64]
+	ldp	x14, x15, [x3, #80]
+	ldp	x16, x17, [x3, #96]
+	ldp	x18, x19, [x3, #112]
+	ldp	x20, x21, [x3, #128]
+	ldp	x22, x23, [x3, #144]
+
+	msr	vmpidr_el2,	x4
+	msr	csselr_el1,	x5
+	msr	sctlr_el1,	x6
+	msr	actlr_el1,	x7
+	msr	cpacr_el1,	x8
+	msr	ttbr0_el1,	x9
+	msr	ttbr1_el1,	x10
+	msr	tcr_el1,	x11
+	msr	esr_el1,	x12
+	msr	afsr0_el1,	x13
+	msr	afsr1_el1,	x14
+	msr	far_el1,	x15
+	msr	mair_el1,	x16
+	msr	vbar_el1,	x17
+	msr	contextidr_el1,	x18
+	msr	tpidr_el0,	x19
+	msr	tpidrro_el0,	x20
+	msr	tpidr_el1,	x21
+	msr	amair_el1,	x22
+	msr	cntkctl_el1,	x23
+.endm
+
+.macro activate_traps
+	ldr	x2, [x0, #VCPU_IRQ_LINES]
+	ldr	x1, [x0, #VCPU_HCR_EL2]
+	orr	x2, x2, x1
+	msr	hcr_el2, x2
+
+	ldr	x2, =(CPTR_EL2_TTA)
+	msr	cptr_el2, x2
+
+	ldr	x2, =(1 << 15)	// Trap CP15 Cr=15
+	msr	hstr_el2, x2
+
+	mrs	x2, mdcr_el2
+	and	x2, x2, #MDCR_EL2_HPMN_MASK
+	orr	x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
+	msr	mdcr_el2, x2
+.endm
+
+.macro deactivate_traps
+	mov	x2, #HCR_RW
+	msr	hcr_el2, x2
+	msr	cptr_el2, xzr
+	msr	hstr_el2, xzr
+
+	mrs	x2, mdcr_el2
+	and	x2, x2, #MDCR_EL2_HPMN_MASK
+	msr	mdcr_el2, x2
+.endm
+
+.macro activate_vm
+	ldr	x1, [x0, #VCPU_KVM]
+	kern_hyp_va	x1
+	ldr	x2, [x1, #KVM_VTTBR]
+	msr	vttbr_el2, x2
+.endm
+
+.macro deactivate_vm
+	msr	vttbr_el2, xzr
+.endm
+
+__save_sysregs:
+	save_sysregs
+	ret
+
+__restore_sysregs:
+	restore_sysregs
+	ret
+
+__save_fpsimd:
+	save_fpsimd
+	ret
+
+__restore_fpsimd:
+	restore_fpsimd
+	ret
+
+/*
+ * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+ *
+ * This is the world switch. The first half of the function
+ * deals with entering the guest, and anything from __kvm_vcpu_return
+ * to the end of the function deals with reentering the host.
+ * On the enter path, only x0 (vcpu pointer) must be preserved until
+ * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception
+ * code) must both be preserved until the epilogue.
+ * In both cases, x2 points to the CPU context we're saving/restoring from/to.
+ */
+ENTRY(__kvm_vcpu_run)
+	kern_hyp_va	x0
+	msr	tpidr_el2, x0	// Save the vcpu register
+
+	// Host context
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	save_host_regs
+	bl __save_fpsimd
+	bl __save_sysregs
+
+	activate_traps
+	activate_vm
+
+	// Guest context
+	add	x2, x0, #VCPU_CONTEXT
+
+	bl __restore_sysregs
+	bl __restore_fpsimd
+	restore_guest_regs
+
+	// That's it, no more messing around.
+	eret
+
+__kvm_vcpu_return:
+	// Assume x0 is the vcpu pointer, x1 the return code
+	// Guest's x0-x3 are on the stack
+
+	// Guest context
+	add	x2, x0, #VCPU_CONTEXT
+
+	save_guest_regs
+	bl __save_fpsimd
+	bl __save_sysregs
+
+	deactivate_traps
+	deactivate_vm
+
+	// Host context
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	bl __restore_sysregs
+	bl __restore_fpsimd
+	restore_host_regs
+
+	mov	x0, x1
+	ret
+END(__kvm_vcpu_run)
+
+// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+ENTRY(__kvm_tlb_flush_vmid_ipa)
+	kern_hyp_va	x0
+	ldr	x2, [x0, #KVM_VTTBR]
+	msr	vttbr_el2, x2
+	isb
+
+	/*
+	 * We could do so much better if we had the VA as well.
+	 * Instead, we invalidate Stage-2 for this IPA, and the
+	 * whole of Stage-1. Weep...
+	 */
+	tlbi	ipas2e1is, x1
+	dsb	sy
+	tlbi	vmalle1is
+	dsb	sy
+	isb
+
+	msr	vttbr_el2, xzr
+	ret
+ENDPROC(__kvm_tlb_flush_vmid_ipa)
+
+ENTRY(__kvm_flush_vm_context)
+	tlbi	alle1is
+	ic	ialluis
+	dsb	sy
+	ret
+ENDPROC(__kvm_flush_vm_context)
+
+__kvm_hyp_panic:
+	// Guess the context by looking at VTTBR:
+	// If zero, then we're already a host.
+	// Otherwise restore a minimal host context before panicing.
+	mrs	x0, vttbr_el2
+	cbz	x0, 1f
+
+	mrs	x0, tpidr_el2
+
+	deactivate_traps
+	deactivate_vm
+
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	bl __restore_sysregs
+
+1:	adr	x0, __hyp_panic_str
+	adr	x1, 2f
+	ldp	x2, x3, [x1]
+	sub	x0, x0, x2
+	add	x0, x0, x3
+	mrs	x1, spsr_el2
+	mrs	x2, elr_el2
+	mrs	x3, esr_el2
+	mrs	x4, far_el2
+	mrs	x5, hpfar_el2
+	mrs	x6, par_el1
+	mrs	x7, tpidr_el2
+
+	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+		      PSR_MODE_EL1h)
+	msr	spsr_el2, lr
+	ldr	lr, =panic
+	msr	elr_el2, lr
+	eret
+
+	.align	3
+2:	.quad	HYP_PAGE_OFFSET
+	.quad	PAGE_OFFSET
+ENDPROC(__kvm_hyp_panic)
+
+__hyp_panic_str:
+	.ascii	"HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0"
+
+	.align	2
+
+ENTRY(kvm_call_hyp)
+	hvc	#0
+	ret
+ENDPROC(kvm_call_hyp)
+
+.macro invalid_vector	label, target
+	.align	2
+\label:
+	b \target
+ENDPROC(\label)
+.endm
+
+	/* None of these should ever happen */
+	invalid_vector	el2t_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_error_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_error_invalid, __kvm_hyp_panic
+	invalid_vector	el1_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el1_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el1_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el1_error_invalid, __kvm_hyp_panic
+
+el1_sync:					// Guest trapped into EL2
+	push	x0, x1
+	push	x2, x3
+
+	mrs	x1, esr_el2
+	lsr	x2, x1, #ESR_EL2_EC_SHIFT
+
+	cmp	x2, #ESR_EL2_EC_HVC64
+	b.ne	el1_trap
+
+	mrs	x3, vttbr_el2			// If vttbr is valid, the 64bit guest
+	cbnz	x3, el1_trap			// called HVC
+
+	/* Here, we're pretty sure the host called HVC. */
+	pop	x2, x3
+	pop	x0, x1
+
+	push	lr, xzr
+
+	/*
+	 * Compute the function address in EL2, and shuffle the parameters.
+	 */
+	kern_hyp_va	x0
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+
+	pop	lr, xzr
+	eret
+
+el1_trap:
+	/*
+	 * x1: ESR
+	 * x2: ESR_EC
+	 */
+	cmp	x2, #ESR_EL2_EC_DABT
+	mov	x0, #ESR_EL2_EC_IABT
+	ccmp	x2, x0, #4, ne
+	b.ne	1f		// Not an abort we care about
+
+	/* This is an abort. Check for permission fault */
+	and	x2, x1, #ESR_EL2_FSC_TYPE
+	cmp	x2, #FSC_PERM
+	b.ne	1f		// Not a permission fault
+
+	/*
+	 * Check for Stage-1 page table walk, which is guaranteed
+	 * to give a valid HPFAR_EL2.
+	 */
+	tbnz	x1, #7, 1f	// S1PTW is set
+
+	/*
+	 * Permission fault, HPFAR_EL2 is invalid.
+	 * Resolve the IPA the hard way using the guest VA.
+	 * Stage-1 translation already validated the memory access rights.
+	 * As such, we can use the EL1 translation regime, and don't have
+	 * to distinguish between EL0 and EL1 access.
+	 */
+	mrs	x2, far_el2
+	at	s1e1r, x2
+	isb
+
+	/* Read result */
+	mrs	x3, par_el1
+	tbnz	x3, #0, 3f		// Bail out if we failed the translation
+	ubfx	x3, x3, #12, #36	// Extract IPA
+	lsl	x3, x3, #4		// and present it like HPFAR
+	b	2f
+
+1:	mrs	x3, hpfar_el2
+	mrs	x2, far_el2
+
+2:	mrs	x0, tpidr_el2
+	str	x1, [x0, #VCPU_ESR_EL2]
+	str	x2, [x0, #VCPU_FAR_EL2]
+	str	x3, [x0, #VCPU_HPFAR_EL2]
+
+	mov	x1, #ARM_EXCEPTION_TRAP
+	b	__kvm_vcpu_return
+
+	/*
+	 * Translation failed. Just return to the guest and
+	 * let it fault again. Another CPU is probably playing
+	 * behind our back.
+	 */
+3:	pop	x2, x3
+	pop	x0, x1
+
+	eret
+
+el1_irq:
+	push	x0, x1
+	push	x2, x3
+	mrs	x0, tpidr_el2
+	mov	x1, #ARM_EXCEPTION_IRQ
+	b	__kvm_vcpu_return
+
+	.ltorg
+
+	.align 11
+
+ENTRY(__kvm_hyp_vector)
+	ventry	el2t_sync_invalid		// Synchronous EL2t
+	ventry	el2t_irq_invalid		// IRQ EL2t
+	ventry	el2t_fiq_invalid		// FIQ EL2t
+	ventry	el2t_error_invalid		// Error EL2t
+
+	ventry	el2h_sync_invalid		// Synchronous EL2h
+	ventry	el2h_irq_invalid		// IRQ EL2h
+	ventry	el2h_fiq_invalid		// FIQ EL2h
+	ventry	el2h_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq				// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync			// Synchronous 32-bit EL1
+	ventry	el1_irq				// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+ENDPROC(__kvm_hyp_vector)
+
+__kvm_hyp_code_end:
+	.globl	__kvm_hyp_code_end
+
+	.popsection

From b0353aa3722552c9058737452c9bec86d73ccdad Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:40:41 +0000
Subject: [PATCH 0456/1185] arm64: KVM: Exit handling

Handle the exit of a VM, decoding the exit reason from HYP mode
and calling the corresponding handler.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit c4b1afd022e93eada6ee4b209be37101cd4b3494)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/handle_exit.c | 119 +++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 arch/arm64/kvm/handle_exit.c

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
new file mode 100644
index 000000000000..c65d1154f969
--- /dev/null
+++ b/arch/arm64/kvm/handle_exit.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/handle_exit.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/*
+	 * Guest called HVC instruction:
+	 * Let it know we don't want that by injecting an undefined exception.
+	 */
+	kvm_debug("hvc: %x (at %08lx)", kvm_vcpu_get_hsr(vcpu) & ((1 << 16) - 1),
+		  *vcpu_pc(vcpu));
+	kvm_debug("         HSR: %8x", kvm_vcpu_get_hsr(vcpu));
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/* We don't support SMC; don't do that. */
+	kvm_debug("smc: at %08lx", *vcpu_pc(vcpu));
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+/**
+ * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * @vcpu:	the vcpu pointer
+ *
+ * Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
+ */
+static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_vcpu_block(vcpu);
+	return 1;
+}
+
+static exit_handle_fn arm_exit_handlers[] = {
+	[ESR_EL2_EC_WFI]	= kvm_handle_wfi,
+	[ESR_EL2_EC_HVC64]	= handle_hvc,
+	[ESR_EL2_EC_SMC64]	= handle_smc,
+	[ESR_EL2_EC_SYS64]	= kvm_handle_sys_reg,
+	[ESR_EL2_EC_IABT]	= kvm_handle_guest_abort,
+	[ESR_EL2_EC_DABT]	= kvm_handle_guest_abort,
+};
+
+static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
+{
+	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+
+	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
+	    !arm_exit_handlers[hsr_ec]) {
+		kvm_err("Unkown exception class: hsr: %#08x\n",
+			(unsigned int)kvm_vcpu_get_hsr(vcpu));
+		BUG();
+	}
+
+	return arm_exit_handlers[hsr_ec];
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		       int exception_index)
+{
+	exit_handle_fn exit_handler;
+
+	switch (exception_index) {
+	case ARM_EXCEPTION_IRQ:
+		return 1;
+	case ARM_EXCEPTION_TRAP:
+		/*
+		 * See ARM ARM B1.14.1: "Hyp traps on instructions
+		 * that fail their condition code check"
+		 */
+		if (!kvm_condition_valid(vcpu)) {
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+
+		exit_handler = kvm_get_exit_handler(vcpu);
+
+		return exit_handler(vcpu, run);
+	default:
+		kvm_pr_unimpl("Unsupported exception type: %d",
+			      exception_index);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return 0;
+	}
+}

From 4f0c6d89a863d5a86d2dea50a9e114559e73d97f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 7 Dec 2012 17:54:54 +0000
Subject: [PATCH 0457/1185] arm64: KVM: Plug the VGIC

Add support for the in-kernel GIC emulation.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 1f17f3b6044d8a81a74dc6c962b3b38a7336106b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 88 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 0b18c2e1e043..8dc27a367d77 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -306,6 +306,90 @@ __kvm_hyp_code_start:
 	msr	vttbr_el2, xzr
 .endm
 
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro save_vgic_state
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* Save all interesting registers */
+	ldr	w4, [x2, #GICH_HCR]
+	ldr	w5, [x2, #GICH_VMCR]
+	ldr	w6, [x2, #GICH_MISR]
+	ldr	w7, [x2, #GICH_EISR0]
+	ldr	w8, [x2, #GICH_EISR1]
+	ldr	w9, [x2, #GICH_ELRSR0]
+	ldr	w10, [x2, #GICH_ELRSR1]
+	ldr	w11, [x2, #GICH_APR]
+
+	str	w4, [x3, #VGIC_CPU_HCR]
+	str	w5, [x3, #VGIC_CPU_VMCR]
+	str	w6, [x3, #VGIC_CPU_MISR]
+	str	w7, [x3, #VGIC_CPU_EISR]
+	str	w8, [x3, #(VGIC_CPU_EISR + 4)]
+	str	w9, [x3, #VGIC_CPU_ELRSR]
+	str	w10, [x3, #(VGIC_CPU_ELRSR + 4)]
+	str	w11, [x3, #VGIC_CPU_APR]
+
+	/* Clear GICH_HCR */
+	str	wzr, [x2, #GICH_HCR]
+
+	/* Save list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_CPU_LR
+1:	ldr	w5, [x2], #4
+	str	w5, [x3], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+.macro restore_vgic_state
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* We only restore a minimal set of registers */
+	ldr	w4, [x3, #VGIC_CPU_HCR]
+	ldr	w5, [x3, #VGIC_CPU_VMCR]
+	ldr	w6, [x3, #VGIC_CPU_APR]
+
+	str	w4, [x2, #GICH_HCR]
+	str	w5, [x2, #GICH_VMCR]
+	str	w6, [x2, #GICH_APR]
+
+	/* Restore list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_CPU_LR
+1:	ldr	w5, [x3], #4
+	str	w5, [x2], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+.endm
+
 __save_sysregs:
 	save_sysregs
 	ret
@@ -348,6 +432,8 @@ ENTRY(__kvm_vcpu_run)
 	activate_traps
 	activate_vm
 
+	restore_vgic_state
+
 	// Guest context
 	add	x2, x0, #VCPU_CONTEXT
 
@@ -369,6 +455,8 @@ __kvm_vcpu_return:
 	bl __save_fpsimd
 	bl __save_sysregs
 
+	save_vgic_state
+
 	deactivate_traps
 	deactivate_vm
 

From 615fd459aa42eb0450667fb191c1931a1da749b3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 30 May 2013 18:31:28 +0100
Subject: [PATCH 0458/1185] ARM: KVM: timer: allow DT matching for ARMv8 cores

ARMv8 cores have the exact same timer as ARMv7 cores. Make sure the
KVM timer code can match it in the device tree.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f61701e0a24a09aa4a44baf24e57dcc5e706afa8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 2d00b2925780..6f485eaf643b 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -195,6 +195,7 @@ static struct notifier_block kvm_timer_cpu_nb = {
 
 static const struct of_device_id arch_timer_of_match[] = {
 	{ .compatible	= "arm,armv7-timer",	},
+	{ .compatible	= "arm,armv8-timer",	},
 	{},
 };
 

From 5b12bf6aa91a284eb32fdb5f02ebc770149dbf50 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 7 Dec 2012 17:52:03 +0000
Subject: [PATCH 0459/1185] arm64: KVM: Plug the arch timer

Add support for the in-kernel timer emulation.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 003300de6c3e51934fb52eb2677f6f4fb4996cbd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S   | 56 ++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/reset.c | 12 +++++++++
 2 files changed, 68 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 8dc27a367d77..8b510835b440 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -390,6 +390,60 @@ __kvm_hyp_code_start:
 2:
 .endm
 
+.macro save_timer_state
+	// x0: vcpu pointer
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va x2
+	ldr	w3, [x2, #KVM_TIMER_ENABLED]
+	cbz	w3, 1f
+
+	mrs	x3, cntv_ctl_el0
+	and	x3, x3, #3
+	str	w3, [x0, #VCPU_TIMER_CNTV_CTL]
+	bic	x3, x3, #1		// Clear Enable
+	msr	cntv_ctl_el0, x3
+
+	isb
+
+	mrs	x3, cntv_cval_el0
+	str	x3, [x0, #VCPU_TIMER_CNTV_CVAL]
+
+1:
+	// Allow physical timer/counter access for the host
+	mrs	x2, cnthctl_el2
+	orr	x2, x2, #3
+	msr	cnthctl_el2, x2
+
+	// Clear cntvoff for the host
+	msr	cntvoff_el2, xzr
+.endm
+
+.macro restore_timer_state
+	// x0: vcpu pointer
+	// Disallow physical timer access for the guest
+	// Physical counter access is allowed
+	mrs	x2, cnthctl_el2
+	orr	x2, x2, #1
+	bic	x2, x2, #2
+	msr	cnthctl_el2, x2
+
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va x2
+	ldr	w3, [x2, #KVM_TIMER_ENABLED]
+	cbz	w3, 1f
+
+	ldr	x3, [x2, #KVM_TIMER_CNTVOFF]
+	msr	cntvoff_el2, x3
+	ldr	x2, [x0, #VCPU_TIMER_CNTV_CVAL]
+	msr	cntv_cval_el0, x2
+	isb
+
+	ldr	w2, [x0, #VCPU_TIMER_CNTV_CTL]
+	and	x2, x2, #3
+	msr	cntv_ctl_el0, x2
+1:
+.endm
+
 __save_sysregs:
 	save_sysregs
 	ret
@@ -433,6 +487,7 @@ ENTRY(__kvm_vcpu_run)
 	activate_vm
 
 	restore_vgic_state
+	restore_timer_state
 
 	// Guest context
 	add	x2, x0, #VCPU_CONTEXT
@@ -455,6 +510,7 @@ __kvm_vcpu_return:
 	bl __save_fpsimd
 	bl __save_sysregs
 
+	save_timer_state
 	save_vgic_state
 
 	deactivate_traps
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index f6536a06231a..766150ac76ed 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -23,6 +23,8 @@
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 
+#include <kvm/arm_arch_timer.h>
+
 #include <asm/cputype.h>
 #include <asm/ptrace.h>
 #include <asm/kvm_arm.h>
@@ -36,6 +38,11 @@ static const struct kvm_regs default_regs_reset = {
 			PSR_F_BIT | PSR_D_BIT),
 };
 
+static const struct kvm_irq_level default_vtimer_irq = {
+	.irq	= 27,
+	.level	= 1,
+};
+
 int kvm_arch_dev_ioctl_check_extension(long ext)
 {
 	int r;
@@ -58,11 +65,13 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
+	const struct kvm_irq_level *cpu_vtimer_irq;
 	const struct kvm_regs *cpu_reset;
 
 	switch (vcpu->arch.target) {
 	default:
 		cpu_reset = &default_regs_reset;
+		cpu_vtimer_irq = &default_vtimer_irq;
 		break;
 	}
 
@@ -72,5 +81,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	/* Reset system registers */
 	kvm_reset_sys_regs(vcpu);
 
+	/* Reset timer */
+	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
 	return 0;
 }

From 5732aca89a79f4f9446145c8d921963c81b4157f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 12 Dec 2012 18:52:05 +0000
Subject: [PATCH 0460/1185] arm64: KVM: PSCI implementation

Wire the PSCI backend into the exit handling code.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit dcd2e40c1e1cce302498d16d095b0f8a30326f74)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  2 +-
 arch/arm64/include/asm/kvm_psci.h | 23 +++++++++++++++++++++++
 arch/arm64/include/uapi/asm/kvm.h | 16 ++++++++++++++++
 arch/arm64/kvm/handle_exit.c      | 16 +++++++---------
 4 files changed, 47 insertions(+), 10 deletions(-)
 create mode 100644 arch/arm64/include/asm/kvm_psci.h

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2500eb6a4d2a..2fdeb326c3ee 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 
-#define KVM_VCPU_MAX_FEATURES 0
+#define KVM_VCPU_MAX_FEATURES 1
 
 /* We don't currently support large pages. */
 #define KVM_HPAGE_GFN_SHIFT(x)	0
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
new file mode 100644
index 000000000000..e301a4816355
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_PSCI_H__
+#define __ARM64_KVM_PSCI_H__
+
+bool kvm_psci_call(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index ebac919dc0ca..fb60f9037057 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -69,6 +69,8 @@ struct kvm_regs {
 #define KVM_VGIC_V2_DIST_SIZE		0x1000
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
+#define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+
 struct kvm_vcpu_init {
 	__u32 target;
 	__u32 features[7];
@@ -141,6 +143,20 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX		127
 
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE		0x95c1ba5e
+#define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND		KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS		0
+#define KVM_PSCI_RET_NI			((unsigned long)-1)
+#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
+#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+
 #endif
 
 #endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index c65d1154f969..4766b7f3515e 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -24,26 +24,24 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_psci.h>
 
 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	/*
-	 * Guest called HVC instruction:
-	 * Let it know we don't want that by injecting an undefined exception.
-	 */
-	kvm_debug("hvc: %x (at %08lx)", kvm_vcpu_get_hsr(vcpu) & ((1 << 16) - 1),
-		  *vcpu_pc(vcpu));
-	kvm_debug("         HSR: %8x", kvm_vcpu_get_hsr(vcpu));
+	if (kvm_psci_call(vcpu))
+		return 1;
+
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	/* We don't support SMC; don't do that. */
-	kvm_debug("smc: at %08lx", *vcpu_pc(vcpu));
+	if (kvm_psci_call(vcpu))
+		return 1;
+
 	kvm_inject_undefined(vcpu);
 	return 1;
 }

From ea4ebae1594250431731dcdacd6da33b865fd3b1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:41:44 +0000
Subject: [PATCH 0461/1185] arm64: KVM: Build system integration

Only the Makefile is plugged in. The Kconfig stuff is in a separate
patch to allow for an easier merge process.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 6211753fdfd05af9e08f54c8d0ba3ee516034878)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/Makefile     |  1 +
 arch/arm64/kvm/Makefile | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 arch/arm64/kvm/Makefile

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b6ccf8a36e2d..7ab6b358cc35 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -37,6 +37,7 @@ TEXT_OFFSET := 0x00080000
 export	TEXT_OFFSET GZFLAGS
 
 core-y		+= arch/arm64/kernel/ arch/arm64/mm/
+core-$(CONFIG_KVM) += arch/arm64/kvm/
 libs-y		:= arch/arm64/lib/ $(libs-y)
 libs-y		+= $(LIBGCC)
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
new file mode 100644
index 000000000000..dca110556683
--- /dev/null
+++ b/arch/arm64/kvm/Makefile
@@ -0,0 +1,23 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+CFLAGS_arm.o := -I.
+CFLAGS_mmu.o := -I.
+
+KVM=../../../virt/kvm
+ARM=../../../arch/arm/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o
+kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
+kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
+
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o

From 8ee55043e2fc5e6c90209f478c70b600dce523a9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Feb 2013 19:17:50 +0000
Subject: [PATCH 0462/1185] arm64: KVM: define 32bit specific registers

Define the 32bit specific registers (SPSRs, cp15...).

Most CPU registers are directly mapped to a 64bit register
(r0->x0...). Only the SPSRs have separate registers.

cp15 registers are also mapped into their 64bit counterpart in most
cases.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 40033a614ea3db196d57c477ca328f44eb1e4df0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h  | 38 ++++++++++++++++++++++++++++++-
 arch/arm64/include/asm/kvm_host.h |  5 +++-
 arch/arm64/include/uapi/asm/kvm.h |  7 +++++-
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 591ac219964a..c92de4163eba 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -42,7 +42,43 @@
 #define	TPIDR_EL1	18	/* Thread ID, Privileged */
 #define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
 #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
-#define	NR_SYS_REGS	21
+/* 32bit specific registers. Keep them at the end of the range */
+#define	DACR32_EL2	21	/* Domain Access Control Register */
+#define	IFSR32_EL2	22	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	23	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	24	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	25	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	26	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	27
+
+/* 32bit mapping */
+#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
+#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
+#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
+#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
+#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
+#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
+#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
+#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
+#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
+#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
+#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
+#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
+#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
+#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
+#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
+#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
+#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
+#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
+#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
+#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
+#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
+#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
+#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
+#define c10_AMAIR	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+#define NR_CP15_REGS	(NR_SYS_REGS * 2)
 
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2fdeb326c3ee..3f5830b3ca3f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -84,7 +84,10 @@ struct kvm_vcpu_fault_info {
 
 struct kvm_cpu_context {
 	struct kvm_regs	gp_regs;
-	u64 sys_regs[NR_SYS_REGS];
+	union {
+		u64 sys_regs[NR_SYS_REGS];
+		u32 cp15[NR_CP15_REGS];
+	};
 };
 
 typedef struct kvm_cpu_context kvm_cpu_context_t;
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index fb60f9037057..5b1110c49df5 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -23,7 +23,12 @@
 #define __ARM_KVM_H__
 
 #define KVM_SPSR_EL1	0
-#define KVM_NR_SPSR	1
+#define KVM_SPSR_SVC	KVM_SPSR_EL1
+#define KVM_SPSR_ABT	1
+#define KVM_SPSR_UND	2
+#define KVM_SPSR_IRQ	3
+#define KVM_SPSR_FIQ	4
+#define KVM_NR_SPSR	5
 
 #ifndef __ASSEMBLY__
 #include <asm/types.h>

From 934f190b989d879c8ef59e26c9d6eb2d10632f0a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Feb 2013 19:40:29 +0000
Subject: [PATCH 0463/1185] arm64: KVM: 32bit GP register access

Allow access to the 32bit register file through the usual API.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b547631fc64e249a3c507e6ce854642507fa7c1c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h |  15 ++-
 arch/arm64/kvm/Makefile              |   2 +-
 arch/arm64/kvm/regmap.c              | 168 +++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/kvm/regmap.c

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 6c1725e93b0b..20a1a3931d8d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -28,6 +28,9 @@
 #include <asm/kvm_mmio.h>
 #include <asm/ptrace.h>
 
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
@@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
 
 static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
 {
-	return false;	/* 32bit? Bahhh... */
+	return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
 }
 
 static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
@@ -64,16 +67,23 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
 
 static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
 {
+	*vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
 }
 
 static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
 {
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_reg32(vcpu, reg_num);
+
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
 }
 
 /* Get vcpu SPSR for current mode */
 static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
 {
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_spsr32(vcpu);
+
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
 }
 
@@ -81,6 +91,9 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
 {
 	u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
 
+	if (vcpu_mode_is_32bit(vcpu))
+		return mode > COMPAT_PSR_MODE_USR;
+
 	return mode != PSR_MODE_EL0t;
 }
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index dca110556683..a2169ec8d93b 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -15,7 +15,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o
+kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
new file mode 100644
index 000000000000..bbc6ae32e4af
--- /dev/null
+++ b/arch/arm64/kvm/regmap.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/emulate.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/ptrace.h>
+
+#define VCPU_NR_MODES 6
+#define REG_OFFSET(_reg) \
+	(offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
+
+#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
+
+static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
+	/* USR Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12), USR_REG_OFFSET(13),	USR_REG_OFFSET(14),
+		REG_OFFSET(pc)
+	},
+
+	/* FIQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7),
+		REG_OFFSET(compat_r8_fiq),  /* r8 */
+		REG_OFFSET(compat_r9_fiq),  /* r9 */
+		REG_OFFSET(compat_r10_fiq), /* r10 */
+		REG_OFFSET(compat_r11_fiq), /* r11 */
+		REG_OFFSET(compat_r12_fiq), /* r12 */
+		REG_OFFSET(compat_sp_fiq),  /* r13 */
+		REG_OFFSET(compat_lr_fiq),  /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* IRQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_irq), /* r13 */
+		REG_OFFSET(compat_lr_irq), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* SVC Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_svc), /* r13 */
+		REG_OFFSET(compat_lr_svc), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* ABT Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_abt), /* r13 */
+		REG_OFFSET(compat_lr_abt), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* UND Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_und), /* r13 */
+		REG_OFFSET(compat_lr_und), /* r14 */
+		REG_OFFSET(pc)
+	},
+};
+
+/*
+ * Return a pointer to the register number valid in the current mode of
+ * the virtual CPU.
+ */
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+
+	switch (mode) {
+	case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC:
+		mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
+		break;
+
+	case COMPAT_PSR_MODE_ABT:
+		mode = 4;
+		break;
+
+	case COMPAT_PSR_MODE_UND:
+		mode = 5;
+		break;
+
+	case COMPAT_PSR_MODE_SYS:
+		mode = 0;	/* SYS maps to USR */
+		break;
+
+	default:
+		BUG();
+	}
+
+	return reg_array + vcpu_reg_offsets[mode][reg_num];
+}
+
+/*
+ * Return the SPSR for the current mode of the virtual CPU.
+ */
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+	switch (mode) {
+	case COMPAT_PSR_MODE_SVC:
+		mode = KVM_SPSR_SVC;
+		break;
+	case COMPAT_PSR_MODE_ABT:
+		mode = KVM_SPSR_ABT;
+		break;
+	case COMPAT_PSR_MODE_UND:
+		mode = KVM_SPSR_UND;
+		break;
+	case COMPAT_PSR_MODE_IRQ:
+		mode = KVM_SPSR_IRQ;
+		break;
+	case COMPAT_PSR_MODE_FIQ:
+		mode = KVM_SPSR_FIQ;
+		break;
+	default:
+		BUG();
+	}
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+}

From 4129306976df31cf80f13a85d6886e2f1245662b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Feb 2013 19:54:04 +0000
Subject: [PATCH 0464/1185] arm64: KVM: 32bit conditional execution emulation

As conditional instructions can trap on AArch32, add the thinest
possible emulation layer to keep 32bit guests happy.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 27b190bd9fbfee34536cb858f0b5924d294aac38)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h |  13 ++-
 arch/arm64/kvm/Makefile              |   2 +-
 arch/arm64/kvm/emulate.c             | 158 +++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm64/kvm/emulate.c

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 20a1a3931d8d..eec073875218 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -31,6 +31,9 @@
 unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
 unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
 
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
@@ -57,12 +60,18 @@ static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
 
 static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
 {
-	return true;	/* No conditionals on arm64 */
+	if (vcpu_mode_is_32bit(vcpu))
+		return kvm_condition_valid32(vcpu);
+
+	return true;
 }
 
 static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
 {
-	*vcpu_pc(vcpu) += 4;
+	if (vcpu_mode_is_32bit(vcpu))
+		kvm_skip_instr32(vcpu, is_wide_instr);
+	else
+		*vcpu_pc(vcpu) += 4;
 }
 
 static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index a2169ec8d93b..72a9fd583ad3 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -15,7 +15,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
+kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c
new file mode 100644
index 000000000000..124418d17049
--- /dev/null
+++ b/arch/arm64/kvm/emulate.c
@@ -0,0 +1,158 @@
+/*
+ * (not much of an) Emulation layer for 32bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+
+/*
+ * stolen from arch/arm/kernel/opcodes.c
+ *
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+	0xF0F0,			/* EQ == Z set            */
+	0x0F0F,			/* NE                     */
+	0xCCCC,			/* CS == C set            */
+	0x3333,			/* CC                     */
+	0xFF00,			/* MI == N set            */
+	0x00FF,			/* PL                     */
+	0xAAAA,			/* VS == V set            */
+	0x5555,			/* VC                     */
+	0x0C0C,			/* HI == C set && Z clear */
+	0xF3F3,			/* LS == C clear || Z set */
+	0xAA55,			/* GE == (N==V)           */
+	0x55AA,			/* LT == (N!=V)           */
+	0x0A05,			/* GT == (!Z && (N==V))   */
+	0xF5FA,			/* LE == (Z || (N!=V))    */
+	0xFFFF,			/* AL always              */
+	0			/* NV                     */
+};
+
+static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
+{
+	u32 esr = kvm_vcpu_get_hsr(vcpu);
+
+	if (esr & ESR_EL2_CV)
+		return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT;
+
+	return -1;
+}
+
+/*
+ * Check if a trapped instruction should have been executed or not.
+ */
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr;
+	u32 cpsr_cond;
+	int cond;
+
+	/* Top two bits non-zero?  Unconditional. */
+	if (kvm_vcpu_get_hsr(vcpu) >> 30)
+		return true;
+
+	/* Is condition field valid? */
+	cond = kvm_vcpu_get_condition(vcpu);
+	if (cond == 0xE)
+		return true;
+
+	cpsr = *vcpu_cpsr(vcpu);
+
+	if (cond < 0) {
+		/* This can happen in Thumb mode: examine IT state. */
+		unsigned long it;
+
+		it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
+
+		/* it == 0 => unconditional. */
+		if (it == 0)
+			return true;
+
+		/* The cond for this insn works out as the top 4 bits. */
+		cond = (it >> 4);
+	}
+
+	cpsr_cond = cpsr >> 28;
+
+	if (!((cc_map[cond] >> cpsr_cond) & 1))
+		return false;
+
+	return true;
+}
+
+/**
+ * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * @vcpu:	The VCPU pointer
+ *
+ * When exceptions occur while instructions are executed in Thumb IF-THEN
+ * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have
+ * to do this little bit of work manually. The fields map like this:
+ *
+ * IT[7:0] -> CPSR[26:25],CPSR[15:10]
+ */
+static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+{
+	unsigned long itbits, cond;
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_arm = !(cpsr & COMPAT_PSR_T_BIT);
+
+	BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK));
+
+	if (!(cpsr & COMPAT_PSR_IT_MASK))
+		return;
+
+	cond = (cpsr & 0xe000) >> 13;
+	itbits = (cpsr & 0x1c00) >> (10 - 2);
+	itbits |= (cpsr & (0x3 << 25)) >> 25;
+
+	/* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */
+	if ((itbits & 0x7) == 0)
+		itbits = cond = 0;
+	else
+		itbits = (itbits << 1) & 0x1f;
+
+	cpsr &= ~COMPAT_PSR_IT_MASK;
+	cpsr |= cond << 13;
+	cpsr |= (itbits & 0x1c) << (10 - 2);
+	cpsr |= (itbits & 0x3) << 25;
+	*vcpu_cpsr(vcpu) = cpsr;
+}
+
+/**
+ * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * @vcpu: The vcpu pointer
+ */
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	bool is_thumb;
+
+	is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT);
+	if (is_thumb && !is_wide_instr)
+		*vcpu_pc(vcpu) += 2;
+	else
+		*vcpu_pc(vcpu) += 4;
+	kvm_adjust_itstate(vcpu);
+}

From 33056d384f68642d62ba6dc6fe0a17ef92473135 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 7 Feb 2013 10:32:33 +0000
Subject: [PATCH 0465/1185] arm64: KVM: 32bit handling of coprocessor traps

Provide the necessary infrastructure to trap coprocessor accesses that
occur when running 32bit guests.

Also wire SMC and HVC trapped in 32bit mode while were at it.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 62a89c44954f09072bf07a714c8f68bda14ab87e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_coproc.h |   5 +
 arch/arm64/kvm/handle_exit.c        |   7 ++
 arch/arm64/kvm/sys_regs.c           | 181 ++++++++++++++++++++++++++--
 3 files changed, 186 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
index 9b4477acb554..9a59301cd014 100644
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -32,11 +32,16 @@ struct kvm_sys_reg_table {
 
 struct kvm_sys_reg_target_table {
 	struct kvm_sys_reg_table table64;
+	struct kvm_sys_reg_table table32;
 };
 
 void kvm_register_target_sys_reg_table(unsigned int target,
 				       struct kvm_sys_reg_target_table *table);
 
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
 #define kvm_coproc_table_init kvm_sys_reg_table_init
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 4766b7f3515e..9beaca033437 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -62,6 +62,13 @@ static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_EL2_EC_WFI]	= kvm_handle_wfi,
+	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
+	[ESR_EL2_EC_CP15_64]	= kvm_handle_cp15_64,
+	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_CP14_LS]	= kvm_handle_cp14_load_store,
+	[ESR_EL2_EC_CP14_64]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_HVC32]	= handle_hvc,
+	[ESR_EL2_EC_SMC32]	= handle_smc,
 	[ESR_EL2_EC_HVC64]	= handle_hvc,
 	[ESR_EL2_EC_SMC64]	= handle_smc,
 	[ESR_EL2_EC_SYS64]	= kvm_handle_sys_reg,
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 52fff0ae3442..94923609753b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -38,6 +38,10 @@
  * types are different. My gut feeling is that it should be pretty
  * easy to merge, but that would be an ABI breakage -- again. VFP
  * would also need to be abstracted.
+ *
+ * For AArch32, we only take care of what is being trapped. Anything
+ * that has to do with init and userspace access has to go via the
+ * 64bit interface.
  */
 
 /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
@@ -166,6 +170,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
 	  access_dcsw },
 
+	/* TEECR32_EL1 */
+	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, TEECR32_EL1, 0 },
+	/* TEEHBR32_EL1 */
+	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, TEEHBR32_EL1, 0 },
+	/* DBGVCR32_EL2 */
+	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
+	  NULL, reset_val, DBGVCR32_EL2, 0 },
+
 	/* MPIDR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101),
 	  NULL, reset_mpidr, MPIDR_EL1 },
@@ -276,6 +290,39 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	/* TPIDRRO_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
 	  NULL, reset_unknown, TPIDRRO_EL0 },
+
+	/* DACR32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, DACR32_EL2 },
+	/* IFSR32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001),
+	  NULL, reset_unknown, IFSR32_EL2 },
+	/* FPEXC32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000),
+	  NULL, reset_val, FPEXC32_EL2, 0x70 },
+};
+
+/* Trapped cp15 registers */
+static const struct sys_reg_desc cp15_regs[] = {
+	/*
+	 * DC{C,I,CI}SW operations:
+	 */
+	{ Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
 };
 
 /* Target specific emulation tables */
@@ -288,13 +335,20 @@ void kvm_register_target_sys_reg_table(unsigned int target,
 }
 
 /* Get specific register table for this target. */
-static const struct sys_reg_desc *get_target_table(unsigned target, size_t *num)
+static const struct sys_reg_desc *get_target_table(unsigned target,
+						   bool mode_is_64,
+						   size_t *num)
 {
 	struct kvm_sys_reg_target_table *table;
 
 	table = target_tables[target];
-	*num = table->table64.num;
-	return table->table64.table;
+	if (mode_is_64) {
+		*num = table->table64.num;
+		return table->table64.table;
+	} else {
+		*num = table->table32.num;
+		return table->table32.table;
+	}
 }
 
 static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
@@ -322,13 +376,126 @@ static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
 	return NULL;
 }
 
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static void emulate_cp15(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *params)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+
+	table = get_target_table(vcpu->arch.target, false, &num);
+
+	/* Search target-specific then generic table. */
+	r = find_reg(params, table, num);
+	if (!r)
+		r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs));
+
+	if (likely(r)) {
+		/*
+		 * Not having an accessor means that we have
+		 * configured a trap that we don't know how to
+		 * handle. This certainly qualifies as a gross bug
+		 * that should be fixed right away.
+		 */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return;
+		}
+		/* If access function fails, it should complain. */
+	}
+
+	kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	kvm_inject_undefined(vcpu);
+}
+
+/**
+ * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+	int Rt2 = (hsr >> 10) & 0xf;
+
+	params.CRm = (hsr >> 1) & 0xf;
+	params.Rt = (hsr >> 5) & 0xf;
+	params.is_write = ((hsr & 1) == 0);
+
+	params.Op0 = 0;
+	params.Op1 = (hsr >> 16) & 0xf;
+	params.Op2 = 0;
+	params.CRn = 0;
+
+	/*
+	 * Massive hack here. Store Rt2 in the top 32bits so we only
+	 * have one register to deal with. As we use the same trap
+	 * backends between AArch32 and AArch64, we get away with it.
+	 */
+	if (params.is_write) {
+		u64 val = *vcpu_reg(vcpu, params.Rt);
+		val &= 0xffffffff;
+		val |= *vcpu_reg(vcpu, Rt2) << 32;
+		*vcpu_reg(vcpu, params.Rt) = val;
+	}
+
+	emulate_cp15(vcpu, &params);
+
+	/* Do the opposite hack for the read side */
+	if (!params.is_write) {
+		u64 val = *vcpu_reg(vcpu, params.Rt);
+		val >>= 32;
+		*vcpu_reg(vcpu, Rt2) = val;
+	}
+
+	return 1;
+}
+
+/**
+ * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	params.CRm = (hsr >> 1) & 0xf;
+	params.Rt  = (hsr >> 5) & 0xf;
+	params.is_write = ((hsr & 1) == 0);
+	params.CRn = (hsr >> 10) & 0xf;
+	params.Op0 = 0;
+	params.Op1 = (hsr >> 14) & 0x7;
+	params.Op2 = (hsr >> 17) & 0x7;
+
+	emulate_cp15(vcpu, &params);
+	return 1;
+}
+
 static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 			   const struct sys_reg_params *params)
 {
 	size_t num;
 	const struct sys_reg_desc *table, *r;
 
-	table = get_target_table(vcpu->arch.target, &num);
+	table = get_target_table(vcpu->arch.target, true, &num);
 
 	/* Search target-specific then generic table. */
 	r = find_reg(params, table, num);
@@ -438,7 +605,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
 	if (!index_to_params(id, &params))
 		return NULL;
 
-	table = get_target_table(vcpu->arch.target, &num);
+	table = get_target_table(vcpu->arch.target, true, &num);
 	r = find_reg(&params, table, num);
 	if (!r)
 		r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
@@ -762,7 +929,7 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
 	size_t num;
 
 	/* We check for duplicates here, to allow arch-specific overrides. */
-	i1 = get_target_table(vcpu->arch.target, &num);
+	i1 = get_target_table(vcpu->arch.target, true, &num);
 	end1 = i1 + num;
 	i2 = sys_reg_descs;
 	end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
@@ -874,7 +1041,7 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
 	/* Generic chip reset first (so target could override). */
 	reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
 
-	table = get_target_table(vcpu->arch.target, &num);
+	table = get_target_table(vcpu->arch.target, true, &num);
 	reset_sys_reg_descs(vcpu, table, num);
 
 	for (num = 1; num < NR_SYS_REGS; num++)

From a6df8b5dd61c84f691c41663a1d2bb0e05342053 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 7 Feb 2013 10:50:18 +0000
Subject: [PATCH 0466/1185] arm64: KVM: CPU specific 32bit coprocessor access

Enable handling of CPU specific 32bit coprocessor access. Not much
here either.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 06c7654d2fb8bac7b1af4340ad59434a5d89b86a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs_generic_v8.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index d4e803907312..4268ab9356b1 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -59,11 +59,21 @@ static const struct sys_reg_desc genericv8_sys_regs[] = {
 	  access_actlr, reset_actlr, ACTLR_EL1 },
 };
 
+static const struct sys_reg_desc genericv8_cp15_regs[] = {
+	/* ACTLR */
+	{ Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+	  access_actlr },
+};
+
 static struct kvm_sys_reg_target_table genericv8_target_table = {
 	.table64 = {
 		.table = genericv8_sys_regs,
 		.num = ARRAY_SIZE(genericv8_sys_regs),
 	},
+	.table32 = {
+		.table = genericv8_cp15_regs,
+		.num = ARRAY_SIZE(genericv8_cp15_regs),
+	},
 };
 
 static int __init sys_reg_genericv8_init(void)

From ad0ed2f67ccf23f49c201cb05c542d46e04fb9c1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 7 Feb 2013 10:52:10 +0000
Subject: [PATCH 0467/1185] arm64: KVM: 32bit specific register world switch

Allow registers specific to 32bit guests to be saved/restored
during the world switch.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b4afad06c19e3489767532f86ff453a1d1e28b8c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 70 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 8b510835b440..ff985e3d8b72 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -266,6 +266,74 @@ __kvm_hyp_code_start:
 	msr	cntkctl_el1,	x23
 .endm
 
+.macro skip_32bit_state tmp, target
+	// Skip 32bit state if not needed
+	mrs	\tmp, hcr_el2
+	tbnz	\tmp, #HCR_RW_SHIFT, \target
+.endm
+
+.macro skip_tee_state tmp, target
+	// Skip ThumbEE state if not needed
+	mrs	\tmp, id_pfr0_el1
+	tbz	\tmp, #12, \target
+.endm
+
+.macro save_guest_32bit_state
+	skip_32bit_state x3, 1f
+
+	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+	mrs	x4, spsr_abt
+	mrs	x5, spsr_und
+	mrs	x6, spsr_irq
+	mrs	x7, spsr_fiq
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+	mrs	x4, dacr32_el2
+	mrs	x5, ifsr32_el2
+	mrs	x6, fpexc32_el2
+	mrs	x7, dbgvcr32_el2
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	skip_tee_state x8, 1f
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+	mrs	x4, teecr32_el1
+	mrs	x5, teehbr32_el1
+	stp	x4, x5, [x3]
+1:
+.endm
+
+.macro restore_guest_32bit_state
+	skip_32bit_state x3, 1f
+
+	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	msr	spsr_abt, x4
+	msr	spsr_und, x5
+	msr	spsr_irq, x6
+	msr	spsr_fiq, x7
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	msr	dacr32_el2, x4
+	msr	ifsr32_el2, x5
+	msr	fpexc32_el2, x6
+	msr	dbgvcr32_el2, x7
+
+	skip_tee_state x8, 1f
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+	ldp	x4, x5, [x3]
+	msr	teecr32_el1, x4
+	msr	teehbr32_el1, x5
+1:
+.endm
+
 .macro activate_traps
 	ldr	x2, [x0, #VCPU_IRQ_LINES]
 	ldr	x1, [x0, #VCPU_HCR_EL2]
@@ -494,6 +562,7 @@ ENTRY(__kvm_vcpu_run)
 
 	bl __restore_sysregs
 	bl __restore_fpsimd
+	restore_guest_32bit_state
 	restore_guest_regs
 
 	// That's it, no more messing around.
@@ -509,6 +578,7 @@ __kvm_vcpu_return:
 	save_guest_regs
 	bl __save_fpsimd
 	bl __save_sysregs
+	save_guest_32bit_state
 
 	save_timer_state
 	save_vgic_state

From 31960c92ba1763c3c5db1d09fb0f4547a39c8d4d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Feb 2013 11:29:35 +0000
Subject: [PATCH 0468/1185] arm64: KVM: 32bit guest fault injection

Add fault injection capability for 32bit guests.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit e82e030556e42e823e174e0c3bd97988d1a09d1f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/inject_fault.c | 79 ++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 54f656271266..81a02a8762b0 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -1,5 +1,5 @@
 /*
- * Fault injection for 64bit guests.
+ * Fault injection for both 32 and 64bit guests.
  *
  * Copyright (C) 2012,2013 - ARM Ltd
  * Author: Marc Zyngier <marc.zyngier@arm.com>
@@ -29,6 +29,74 @@
 				 PSR_I_BIT | PSR_D_BIT)
 #define EL1_EXCEPT_SYNC_OFFSET	0x200
 
+static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
+{
+	unsigned long cpsr;
+	unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
+	bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT);
+	u32 return_offset = (is_thumb) ? 4 : 0;
+	u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+
+	cpsr = mode | COMPAT_PSR_I_BIT;
+
+	if (sctlr & (1 << 30))
+		cpsr |= COMPAT_PSR_T_BIT;
+	if (sctlr & (1 << 25))
+		cpsr |= COMPAT_PSR_E_BIT;
+
+	*vcpu_cpsr(vcpu) = cpsr;
+
+	/* Note: These now point to the banked copies */
+	*vcpu_spsr(vcpu) = new_spsr_value;
+	*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+
+	/* Branch to exception vector */
+	if (sctlr & (1 << 13))
+		vect_offset += 0xffff0000;
+	else /* always have security exceptions */
+		vect_offset += vcpu_cp15(vcpu, c12_VBAR);
+
+	*vcpu_pc(vcpu) = vect_offset;
+}
+
+static void inject_undef32(struct kvm_vcpu *vcpu)
+{
+	prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4);
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
+			 unsigned long addr)
+{
+	u32 vect_offset;
+	u32 *far, *fsr;
+	bool is_lpae;
+
+	if (is_pabt) {
+		vect_offset = 12;
+		far = &vcpu_cp15(vcpu, c6_IFAR);
+		fsr = &vcpu_cp15(vcpu, c5_IFSR);
+	} else { /* !iabt */
+		vect_offset = 16;
+		far = &vcpu_cp15(vcpu, c6_DFAR);
+		fsr = &vcpu_cp15(vcpu, c5_DFSR);
+	}
+
+	prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset);
+
+	*far = addr;
+
+	/* Give the guest an IMPLEMENTATION DEFINED exception */
+	is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
+	if (is_lpae)
+		*fsr = 1 << 9 | 0x34;
+	else
+		*fsr = 0x14;
+}
+
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -98,6 +166,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
  */
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_abt32(vcpu, false, addr);
+
 	inject_abt64(vcpu, false, addr);
 }
 
@@ -111,6 +182,9 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
  */
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_abt32(vcpu, true, addr);
+
 	inject_abt64(vcpu, true, addr);
 }
 
@@ -122,5 +196,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
  */
 void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 {
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_undef32(vcpu);
+
 	inject_undef64(vcpu);
 }

From 80e531580f7bbb453119afd17bc752d9763676b5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 7 Feb 2013 10:46:46 +0000
Subject: [PATCH 0469/1185] arm64: KVM: enable initialization of a 32bit vcpu

Wire the init of a 32bit vcpu by allowing 32bit modes in pstate,
and providing sensible defaults out of reset state.

This feature is of course conditioned by the presence of 32bit
capability on the physical CPU, and is checked by the KVM_CAP_ARM_EL1_32BIT
capability.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 0d854a60b1d7d39a37b25dd28f63cfa0df637b91)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  2 +-
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 arch/arm64/kvm/guest.c            |  6 ++++++
 arch/arm64/kvm/reset.c            | 26 +++++++++++++++++++++++++-
 include/uapi/linux/kvm.h          |  1 +
 5 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 3f5830b3ca3f..644d73956864 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 
-#define KVM_VCPU_MAX_FEATURES 1
+#define KVM_VCPU_MAX_FEATURES 2
 
 /* We don't currently support large pages. */
 #define KVM_HPAGE_GFN_SHIFT(x)	0
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 5b1110c49df5..5031f4263937 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -75,6 +75,7 @@ struct kvm_regs {
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
 
 struct kvm_vcpu_init {
 	__u32 target;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 3d7518a7ebaa..2c3ff67a8ecb 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -99,6 +99,12 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
 		u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK;
 		switch (mode) {
+		case COMPAT_PSR_MODE_USR:
+		case COMPAT_PSR_MODE_FIQ:
+		case COMPAT_PSR_MODE_IRQ:
+		case COMPAT_PSR_MODE_SVC:
+		case COMPAT_PSR_MODE_ABT:
+		case COMPAT_PSR_MODE_UND:
 		case PSR_MODE_EL0t:
 		case PSR_MODE_EL1t:
 		case PSR_MODE_EL1h:
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 766150ac76ed..70a7816535cd 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -38,16 +38,32 @@ static const struct kvm_regs default_regs_reset = {
 			PSR_F_BIT | PSR_D_BIT),
 };
 
+static const struct kvm_regs default_regs_reset32 = {
+	.regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT |
+			COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
+};
+
 static const struct kvm_irq_level default_vtimer_irq = {
 	.irq	= 27,
 	.level	= 1,
 };
 
+static bool cpu_has_32bit_el1(void)
+{
+	u64 pfr0;
+
+	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+	return !!(pfr0 & 0x20);
+}
+
 int kvm_arch_dev_ioctl_check_extension(long ext)
 {
 	int r;
 
 	switch (ext) {
+	case KVM_CAP_ARM_EL1_32BIT:
+		r = cpu_has_32bit_el1();
+		break;
 	default:
 		r = 0;
 	}
@@ -70,7 +86,15 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 
 	switch (vcpu->arch.target) {
 	default:
-		cpu_reset = &default_regs_reset;
+		if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
+			if (!cpu_has_32bit_el1())
+				return -EINVAL;
+			cpu_reset = &default_regs_reset32;
+			vcpu->arch.hcr_el2 &= ~HCR_RW;
+		} else {
+			cpu_reset = &default_regs_reset;
+		}
+
 		cpu_vtimer_irq = &default_vtimer_irq;
 		break;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 97277d333e82..acccd08be6c7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_IRQ_MPIC 90
 #define KVM_CAP_PPC_RTAS 91
 #define KVM_CAP_IRQ_XICS 92
+#define KVM_CAP_ARM_EL1_32BIT 93
 
 #ifdef KVM_CAP_IRQ_ROUTING
 

From d3845bf4d5f6b104ec346dabf58817aa9d66d740 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 Apr 2013 17:46:31 +0100
Subject: [PATCH 0470/1185] arm64: KVM: userspace API documentation

Unsurprisingly, the arm64 userspace API is extremely similar to
the 32bit one, the only significant difference being the ONE_REG
register mapping.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 379e04c79e8a9ded8a202f1e266f0c5830185bea)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt | 58 +++++++++++++++++++++----------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 5f91eda91647..9bfadeb8be31 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -280,7 +280,7 @@ kvm_run' (see below).
 4.11 KVM_GET_REGS
 
 Capability: basic
-Architectures: all except ARM
+Architectures: all except ARM, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_regs (out)
 Returns: 0 on success, -1 on error
@@ -301,7 +301,7 @@ struct kvm_regs {
 4.12 KVM_SET_REGS
 
 Capability: basic
-Architectures: all except ARM
+Architectures: all except ARM, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_regs (in)
 Returns: 0 on success, -1 on error
@@ -587,7 +587,7 @@ struct kvm_fpu {
 4.24 KVM_CREATE_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, ARM
+Architectures: x86, ia64, ARM, arm64
 Type: vm ioctl
 Parameters: none
 Returns: 0 on success, -1 on error
@@ -595,14 +595,14 @@ Returns: 0 on success, -1 on error
 Creates an interrupt controller model in the kernel.  On x86, creates a virtual
 ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
 local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM, a GIC is
+only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
 created.
 
 
 4.25 KVM_IRQ_LINE
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, arm
+Architectures: x86, ia64, arm, arm64
 Type: vm ioctl
 Parameters: struct kvm_irq_level
 Returns: 0 on success, -1 on error
@@ -612,9 +612,10 @@ On some architectures it is required that an interrupt controller model has
 been previously created with KVM_CREATE_IRQCHIP.  Note that edge-triggered
 interrupts require the level to be set to 1 and then back to 0.
 
-ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip
-(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for
-specific cpus.  The irq field is interpreted like this:
+ARM/arm64 can signal an interrupt either at the CPU level, or at the
+in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
+use PPIs designated for specific cpus.  The irq field is interpreted
+like this:
 
   bits:  | 31 ... 24 | 23  ... 16 | 15    ...    0 |
   field: | irq_type  | vcpu_index |     irq_id     |
@@ -1831,6 +1832,22 @@ ARM 32-bit VFP control registers have the following id bit patterns:
 ARM 64-bit FP registers have the following id bit patterns:
   0x4030 0000 0012 0 <regno:12>
 
+
+arm64 registers are mapped using the lower 32 bits. The upper 16 of
+that is the register group type, or coprocessor number:
+
+arm64 core/FP-SIMD registers have the following id bit patterns. Note
+that the size of the access is variable, as the kvm_regs structure
+contains elements ranging from 32 to 128 bits. The index is a 32bit
+value in the kvm_regs structure seen as a 32bit array.
+  0x60x0 0000 0010 <index into the kvm_regs struct:16>
+
+arm64 CCSIDR registers are demultiplexed by CSSELR value:
+  0x6020 0000 0011 00 <csselr:8>
+
+arm64 system registers have the following id bit patterns:
+  0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
+
 4.69 KVM_GET_ONE_REG
 
 Capability: KVM_CAP_ONE_REG
@@ -2264,7 +2281,7 @@ current state.  "addr" is ignored.
 4.77 KVM_ARM_VCPU_INIT
 
 Capability: basic
-Architectures: arm
+Architectures: arm, arm64
 Type: vcpu ioctl
 Parameters: struct struct kvm_vcpu_init (in)
 Returns: 0 on success; -1 on error
@@ -2283,12 +2300,14 @@ should be created before this ioctl is invoked.
 Possible features:
 	- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
 	  Depends on KVM_CAP_ARM_PSCI.
+	- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
+	  Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
 
 
 4.78 KVM_GET_REG_LIST
 
 Capability: basic
-Architectures: arm
+Architectures: arm, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_reg_list (in/out)
 Returns: 0 on success; -1 on error
@@ -2308,7 +2327,7 @@ KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
 4.80 KVM_ARM_SET_DEVICE_ADDR
 
 Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
-Architectures: arm
+Architectures: arm, arm64
 Type: vm ioctl
 Parameters: struct kvm_arm_device_address (in)
 Returns: 0 on success, -1 on error
@@ -2329,18 +2348,19 @@ can access emulated or directly exposed devices, which the host kernel needs
 to know about. The id field is an architecture specific identifier for a
 specific device.
 
-ARM divides the id field into two parts, a device id and an address type id
-specific to the individual device.
+ARM/arm64 divides the id field into two parts, a device id and an
+address type id specific to the individual device.
 
   bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
   field: |        0x00000000      |     device id   |  addr type id  |
 
-ARM currently only require this when using the in-kernel GIC support for the
-hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id.  When
-setting the base address for the guest's mapping of the VGIC virtual CPU
-and distributor interface, the ioctl must be called after calling
-KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
-this ioctl twice for any of the base addresses will return -EEXIST.
+ARM/arm64 currently only require this when using the in-kernel GIC
+support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2
+as the device id.  When setting the base address for the guest's
+mapping of the VGIC virtual CPU and distributor interface, the ioctl
+must be called after calling KVM_CREATE_IRQCHIP, but before calling
+KVM_RUN on any of the VCPUs.  Calling this ioctl twice for any of the
+base addresses will return -EEXIST.
 
 4.82 KVM_PPC_RTAS_DEFINE_TOKEN
 

From 1a12f6e7bcaaf0ec1a6a01fe5fb9341eb2f74ffe Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 Apr 2013 17:49:40 +0100
Subject: [PATCH 0471/1185] arm64: KVM: MAINTAINERS update

Elect myself as the KVM/arm64 maintainer.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 6394a3ec02ab39147aab9ea56d0dabafd3dcae60)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 MAINTAINERS | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 48c748080c96..823010fce98a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4719,6 +4719,15 @@ F:	arch/arm/include/uapi/asm/kvm*
 F:	arch/arm/include/asm/kvm*
 F:	arch/arm/kvm/
 
+KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
+M:	Marc Zyngier <marc.zyngier@arm.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	kvmarm@lists.cs.columbia.edu
+S:	Maintained
+F:	arch/arm64/include/uapi/asm/kvm*
+F:	arch/arm64/include/asm/kvm*
+F:	arch/arm64/kvm/
+
 KEXEC
 M:	Eric Biederman <ebiederm@xmission.com>
 W:	http://kernel.org/pub/linux/utils/kernel/kexec/

From 092f9fbf3845c1c6e0bb14f3eae1be583c071a7d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 2 May 2013 14:31:03 +0100
Subject: [PATCH 0472/1185] arm64: KVM: document kernel object mappings in HYP

HYP mode has access to some of the kernel pages. Document the
memory mapping and the offset between kernel VA and HYP VA.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit aa4a73a0a23a65a2f531d01f1865d1e61c6acb55)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/arm64/memory.txt | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index c6941f815f15..d50fa618371b 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -102,3 +102,10 @@ Translation table lookup with 64KB pages:
  |                 |    +--------------------------> [41:29] L2 index (only 38:29 used)
  |                 +-------------------------------> [47:42] L1 index (not used)
  +-------------------------------------------------> [63] TTBR0/1
+
+When using KVM, the hypervisor maps kernel pages in EL2, at a fixed
+offset from the kernel VA (top 24bits of the kernel VA set to zero):
+
+Start			End			Size		Use
+-----------------------------------------------------------------------
+0000004000000000	0000007fffffffff	 256GB		kernel objects mapped in HYP

From 146844e0e0dc7bcb2a080a2fdfd792674bff105b Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 30 Apr 2013 12:02:15 +0530
Subject: [PATCH 0473/1185] ARM: KVM: Allow host virt timer irq to be different
 from guest timer virt irq

The arch_timer irq numbers (or PPI numbers) are implementation dependent,
so the host virtual timer irq number can be different from guest virtual
timer irq number.

This patch ensures that host virtual timer irq number is read from DTB and
guest virtual timer irq is determined based on vcpu target type.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit 5ae7f87a56fab10b8f9b135a8377c144397293ca)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/reset.c         | 12 ++++++++++++
 include/kvm/arm_arch_timer.h |  4 ++++
 virt/kvm/arm/arch_timer.c    | 29 ++++++++++++++++++++---------
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index b80256b554cd..b7840e7aa452 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -27,6 +27,8 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_coproc.h>
 
+#include <kvm/arm_arch_timer.h>
+
 /******************************************************************************
  * Cortex-A15 Reset Values
  */
@@ -37,6 +39,11 @@ static struct kvm_regs a15_regs_reset = {
 	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
+static const struct kvm_irq_level a15_vtimer_irq = {
+	.irq = 27,
+	.level = 1,
+};
+
 
 /*******************************************************************************
  * Exported reset function
@@ -52,6 +59,7 @@ static struct kvm_regs a15_regs_reset = {
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct kvm_regs *cpu_reset;
+	const struct kvm_irq_level *cpu_vtimer_irq;
 
 	switch (vcpu->arch.target) {
 	case KVM_ARM_TARGET_CORTEX_A15:
@@ -59,6 +67,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 			return -EINVAL;
 		cpu_reset = &a15_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
+		cpu_vtimer_irq = &a15_vtimer_irq;
 		break;
 	default:
 		return -ENODEV;
@@ -70,5 +79,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	/* Reset CP15 registers */
 	kvm_reset_coprocs(vcpu);
 
+	/* Reset arch_timer context */
+	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
 	return 0;
 }
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 68cb9e1dfb81..6d9aeddc09bf 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -61,6 +61,8 @@ struct arch_timer_cpu {
 #ifdef CONFIG_KVM_ARM_TIMER
 int kvm_timer_hyp_init(void);
 int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+			  const struct kvm_irq_level *irq);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
@@ -76,6 +78,8 @@ static inline int kvm_timer_init(struct kvm *kvm)
 	return 0;
 }
 
+static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+					const struct kvm_irq_level *irq) {}
 static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 6f485eaf643b..c2e1ef4604e8 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -30,9 +30,7 @@
 
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
-static struct kvm_irq_level timer_irq = {
-	.level	= 1,
-};
+static unsigned int host_vtimer_irq;
 
 static cycle_t kvm_phys_timer_read(void)
 {
@@ -67,8 +65,8 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
 
 	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
 	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-			    vcpu->arch.timer_cpu.irq->irq,
-			    vcpu->arch.timer_cpu.irq->level);
+			    timer->irq->irq,
+			    timer->irq->level);
 }
 
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -156,6 +154,20 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 	timer_arm(timer, ns);
 }
 
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+			  const struct kvm_irq_level *irq)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	/*
+	 * The vcpu timer irq number cannot be determined in
+	 * kvm_timer_vcpu_init() because it is called much before
+	 * kvm_vcpu_set_target(). To handle this, we determine
+	 * vcpu timer irq number when the vcpu is reset.
+	 */
+	timer->irq = irq;
+}
+
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -163,12 +175,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 	INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
 	hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	timer->timer.function = kvm_timer_expire;
-	timer->irq = &timer_irq;
 }
 
 static void kvm_timer_init_interrupt(void *info)
 {
-	enable_percpu_irq(timer_irq.irq, 0);
+	enable_percpu_irq(host_vtimer_irq, 0);
 }
 
 
@@ -182,7 +193,7 @@ static int kvm_timer_cpu_notify(struct notifier_block *self,
 		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
-		disable_percpu_irq(timer_irq.irq);
+		disable_percpu_irq(host_vtimer_irq);
 		break;
 	}
 
@@ -230,7 +241,7 @@ int kvm_timer_hyp_init(void)
 		goto out;
 	}
 
-	timer_irq.irq = ppi;
+	host_vtimer_irq = ppi;
 
 	err = register_cpu_notifier(&kvm_timer_cpu_nb);
 	if (err) {

From 2da084abe92d8aa40c8972c461114528c336ab54 Mon Sep 17 00:00:00 2001
From: Dave P Martin <Dave.Martin@arm.com>
Date: Wed, 1 May 2013 17:49:28 +0100
Subject: [PATCH 0474/1185] ARM: KVM: Don't handle PSCI calls via SMC

Currently, kvmtool unconditionally declares that HVC should be used
to call PSCI, so the function numbers in the DT tell the guest
nothing about the function ID namespace or calling convention for
SMC.

We already assume that the guest will examine and honour the DT,
since there is no way it could possibly guess the KVM-specific PSCI
function IDs otherwise.  So let's not encourage guests to violate
what's specified in the DT by using SMC to make the call.

[ Modified to apply to top of kvm/arm tree - Christoffer ]

Signed-off-by: Dave P Martin <Dave.Martin@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit 24a7f675752e06729589d40a5256970998a21502)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/handle_exit.c | 3 ---
 arch/arm/kvm/psci.c        | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 3d74a0be47db..df4c82d47ad7 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -52,9 +52,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
-		return 1;
-
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 7ee5bb7a3667..86a693a02ba3 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -75,7 +75,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
  * kvm_psci_call - handle PSCI call if r0 value is in range
  * @vcpu: Pointer to the VCPU struct
  *
- * Handle PSCI calls from guests through traps from HVC or SMC instructions.
+ * Handle PSCI calls from guests through traps from HVC instructions.
  * The calling convention is similar to SMC calls to the secure world where
  * the function number is placed in r0 and this function returns true if the
  * function number specified in r0 is withing the PSCI range, and false

From fd741ad3dd186a0df45994059de3a08517a856bc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 12:11:35 +0100
Subject: [PATCH 0475/1185] ARM: KVM: remove dead prototype for
 __kvm_tlb_flush_vmid

__kvm_tlb_flush_vmid has been renamed to __kvm_tlb_flush_vmid_ipa,
and the old prototype should have been removed when the code was
modified.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit 368074d908b785588778f00b4384376cd636f4a1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 4bb08e3e52bc..a2f43ddcc300 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -74,8 +74,6 @@ extern char __kvm_hyp_vector[];
 extern char __kvm_hyp_code_start[];
 extern char __kvm_hyp_code_end[];
 
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 

From 3be39e7f5aaca996a006717658c517650f345476 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 12:11:37 +0100
Subject: [PATCH 0476/1185] ARM: KVM: use phys_addr_t instead of unsigned long
 long for HYP PGDs

HYP PGDs are passed around as phys_addr_t, except just before calling
into the hypervisor init code, where they are cast to a rather weird
unsigned long long.

Just keep them around as phys_addr_t, which is what makes the most
sense.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit dac288f7b38a7439502b77dabcdf8a9a5c4ae721)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h | 4 ++--
 arch/arm/kvm/arm.c              | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index ff5aaf10e6ec..1f3cee2e210e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -190,8 +190,8 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
 
-static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
-				       unsigned long long pgd_ptr,
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+				       phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ef1703b9587b..741f66a2edbd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -800,8 +800,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 static void cpu_init_hyp_mode(void *dummy)
 {
-	unsigned long long boot_pgd_ptr;
-	unsigned long long pgd_ptr;
+	phys_addr_t boot_pgd_ptr;
+	phys_addr_t pgd_ptr;
 	unsigned long hyp_stack_ptr;
 	unsigned long stack_page;
 	unsigned long vector_ptr;
@@ -809,8 +809,8 @@ static void cpu_init_hyp_mode(void *dummy)
 	/* Switch from the HYP stub to our own HYP init vector */
 	__hyp_set_vectors(kvm_get_idmap_vector());
 
-	boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
-	pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
+	boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+	pgd_ptr = kvm_mmu_get_httbr();
 	stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
 	vector_ptr = (unsigned long)__kvm_hyp_vector;

From a7232858be9e29371ef8ed2a39658443344b7765 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 12:11:38 +0100
Subject: [PATCH 0477/1185] ARM: KVM: don't special case PC when doing an MMIO

Admitedly, reading a MMIO register to load PC is very weird.
Writing PC to a MMIO register is probably even worse. But
the architecture doesn't forbid any of these, and injecting
a Prefetch Abort is the wrong thing to do anyway.

Remove this check altogether, and let the adventurous guest
wander into LaLaLand if they feel compelled to do so.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit 8734f16fb2aa4ff0bb57ad6532661a38bc8ff957)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h | 5 -----
 arch/arm/kvm/mmio.c                | 6 ------
 2 files changed, 11 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 82b4babead2c..a464e8d7b6c5 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
 	return cpsr_mode > USR_MODE;;
 }
 
-static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg)
-{
-	return reg == 15;
-}
-
 static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault.hsr;
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 72a12f2171b2..b8e06b7a2833 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -86,12 +86,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	sign_extend = kvm_vcpu_dabt_issext(vcpu);
 	rt = kvm_vcpu_dabt_get_rd(vcpu);
 
-	if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
-		/* IO memory trying to read/write pc */
-		kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
-		return 1;
-	}
-
 	mmio->is_write = is_write;
 	mmio->phys_addr = fault_ipa;
 	mmio->len = len;

From 99f2cf12576cffa1a0168b0cc05216de54ca12f4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 12:11:39 +0100
Subject: [PATCH 0478/1185] ARM: KVM: get rid of S2_PGD_SIZE

S2_PGD_SIZE defines the number of pages used by a stage-2 PGD
and is unused, except for a VM_BUG_ON check that missuses the
define.

As the check is very unlikely to ever triggered except in
circumstances where KVM is the least of our worries, just kill
both the define and the VM_BUG_ON check.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit 4db845c3d8e2f8a219e8ac48834dd4fe085e5d63)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h | 1 -
 arch/arm/kvm/mmu.c             | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 124623e5ef14..64e96960de29 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -135,7 +135,6 @@
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1ULL)
 #define PTRS_PER_S2_PGD	(1ULL << (KVM_PHYS_SHIFT - 30))
 #define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
-#define S2_PGD_SIZE	(1 << S2_PGD_ORDER)
 
 /* Virtualization Translation Control Register (VTCR) bits */
 #define VTCR_SH0	(3 << 12)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 84ba67b982c0..ca6bea4859b4 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -382,9 +382,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	if (!pgd)
 		return -ENOMEM;
 
-	/* stage-2 pgd must be aligned to its size */
-	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
-
 	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
 	kvm_clean_pgd(pgd);
 	kvm->arch.pgd = pgd;

From d144c1d2cd86381c80187f5ccee5eb27dad6b21b Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org>
Date: Thu, 6 Jun 2013 18:02:54 -0700
Subject: [PATCH 0479/1185] arm/kvm: Cleanup KVM_ARM_MAX_VCPUS logic

Commit d21a1c83c7595e387545632e44cd7797b76e19cc (ARM: KVM: define KVM_ARM_MAX_VCPUS
unconditionally) changed the Kconfig logic for KVM_ARM_MAX_VCPUS to work around a
build error arising from the use of KVM_ARM_MAX_VCPUS when CONFIG_KVM=n.  The
resulting Kconfig logic is a bit awkward and leaves a KVM_ARM_MAX_VCPUS always
defined in the kernel config file.

This change reverts the Kconfig logic back and adds a simple preprocessor
conditional in kvm_host.h to handle when CONFIG_KVM_ARM_MAX_VCPUS is undefined.

Signed-off-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit f2dda9d829818b055510187059cdfa4ece10c82d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h | 5 +++++
 arch/arm/kvm/Kconfig            | 6 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 1f3cee2e210e..7d22517d8071 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -25,7 +25,12 @@
 #include <asm/fpstate.h>
 #include <kvm/arm_arch_timer.h>
 
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 370e1a8af6ac..49dd64e579c2 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -41,9 +41,9 @@ config KVM_ARM_HOST
 	  Provides host support for ARM processors.
 
 config KVM_ARM_MAX_VCPUS
-	int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
-	default 4 if KVM_ARM_HOST
-	default 0
+	int "Number maximum supported virtual CPUs per VM"
+	depends on KVM_ARM_HOST
+	default 4
 	help
 	  Static number of max supported virtual CPUs per VM.
 

From 8847470177f0d453fe41bc8de72b89a41a264830 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 21 Jun 2013 22:33:22 +0200
Subject: [PATCH 0480/1185] ARM: kvm: don't include drivers/virtio/Kconfig

The virtio configuration has recently moved and is now visible everywhere.
Including the file again from KVM as we used to need earlier now causes
dependency problems:

warning: (CAIF_VIRTIO && VIRTIO_PCI && VIRTIO_MMIO && REMOTEPROC && RPMSG)
selects VIRTIO which has unmet direct dependencies (VIRTUALIZATION)

Cc: Christoffer Dall <cdall@cs.columbia.edu>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 8bd4ffd6b3a98f00267051dc095076ea2ff06ea8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/Kconfig | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 49dd64e579c2..ebf5015508b5 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -67,6 +67,4 @@ config KVM_ARM_TIMER
 	---help---
 	  Adds support for the Architected Timers in virtual machines
 
-source drivers/virtio/Kconfig
-
 endif # VIRTUALIZATION

From 54993349e584212be7d554a2188f6fef3efeb701 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 4 Jul 2013 13:34:32 +0100
Subject: [PATCH 0481/1185] arm64: KVM: Kconfig integration

Finally plug KVM/arm64 into the config system, making it possible
to enable KVM support on AArch64 CPUs.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c3eb5b14449a0949e9764d39374a2ea63faae14f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/Kconfig              |  2 ++
 arch/arm64/kernel/asm-offsets.c |  1 +
 arch/arm64/kvm/Kconfig          | 51 +++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+)
 create mode 100644 arch/arm64/kvm/Kconfig

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 43068cf44c2d..de9eeb43f622 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -275,6 +275,8 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
+source "arch/arm64/kvm/Kconfig"
+
 source "arch/arm64/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 49c162c03b69..666e231d410b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
+#include <linux/kvm_host.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/cputable.h>
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
new file mode 100644
index 000000000000..21e90820bd23
--- /dev/null
+++ b/arch/arm64/kvm/Kconfig
@@ -0,0 +1,51 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	---help---
+	  Say Y here to get to see options for using your Linux host to run
+	  other operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and
+	  disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	bool "Kernel-based Virtual Machine (KVM) support"
+	select MMU_NOTIFIER
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select KVM_MMIO
+	select KVM_ARM_HOST
+	select KVM_ARM_VGIC
+	select KVM_ARM_TIMER
+	---help---
+	  Support hosting virtualized guest machines.
+
+	  If unsure, say N.
+
+config KVM_ARM_HOST
+	bool
+	---help---
+	  Provides host support for ARM processors.
+
+config KVM_ARM_VGIC
+	bool
+	depends on KVM_ARM_HOST && OF
+	select HAVE_KVM_IRQCHIP
+	---help---
+	  Adds support for a hardware assisted, in-kernel GIC emulation.
+
+config KVM_ARM_TIMER
+	bool
+	depends on KVM_ARM_VGIC
+	---help---
+	  Adds support for the Architected Timers in virtual machines.
+
+endif # VIRTUALIZATION

From a043deb0317044bdeda876d2170de0b24c2bfba8 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Thu, 4 Jul 2013 13:40:29 +0900
Subject: [PATCH 0482/1185] KVM: Introduce kvm_arch_memslots_updated()

This is called right after the memslots is updated, i.e. when the result
of update_memslots() gets installed in install_new_memslots().  Since
the memslots needs to be updated twice when we delete or move a memslot,
kvm_arch_commit_memory_region() does not correspond to this exactly.

In the following patch, x86 will use this new API to check if the mmio
generation has reached its maximum value, in which case mmio sptes need
to be flushed out.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Acked-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e59dbe09f8e6fb8f6ee19dc79d1a2f14299e4cd2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c         | 4 ++++
 arch/ia64/kvm/kvm-ia64.c   | 4 ++++
 arch/mips/kvm/kvm_mips.c   | 4 ++++
 arch/powerpc/kvm/powerpc.c | 4 ++++
 arch/s390/kvm/kvm-s390.c   | 4 ++++
 arch/x86/kvm/x86.c         | 4 ++++
 include/linux/kvm_host.h   | 1 +
 virt/kvm/kvm_main.c        | 5 ++++-
 8 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 741f66a2edbd..9c697db2787e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -219,6 +219,10 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_userspace_memory_region *mem,
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5b2dc0d10c8f..bdfd8789b376 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1560,6 +1560,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		struct kvm_memory_slot *memslot,
 		struct kvm_userspace_memory_region *mem,
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index dd203e59e6fd..a7b044536de4 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -208,6 +208,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                 struct kvm_memory_slot *memslot,
                                 struct kvm_userspace_memory_region *mem,
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6316ee336e88..ae63ae4a1a5f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -420,6 +420,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return kvmppc_core_create_memslot(slot, npages);
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_userspace_memory_region *mem,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 698fb826e149..e515b2d4a947 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -981,6 +981,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8ba99c34180..894003d79d22 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6976,6 +6976,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return -ENOMEM;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8db53cfaccdb..15018c572ac5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -497,6 +497,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+void kvm_arch_memslots_updated(struct kvm *kvm);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b547a1ceecbc..7ca9939ee9a7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -731,7 +731,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	update_memslots(slots, new, kvm->memslots->generation);
 	rcu_assign_pointer(kvm->memslots, slots);
 	synchronize_srcu_expedited(&kvm->srcu);
-	return old_memslots; 
+
+	kvm_arch_memslots_updated(kvm);
+
+	return old_memslots;
 }
 
 /*

From 3b180da00d212c6968ab821dcf85f3776929a44d Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 6 Aug 2013 13:50:54 -0700
Subject: [PATCH 0483/1185] ARM: KVM: Fix unaligned unmap_range leak

The unmap_range function did not properly cover the case when the start
address was not aligned to PMD_SIZE or PUD_SIZE and an entire pte table
or pmd table was cleared, causing us to leak memory when incrementing
the addr.

The fix is to always move onto the next page table entry boundary
instead of adding the full size of the VA range covered by the
corresponding table level entry.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit d3840b26614d8ce3db53c98061d9fcb1b9ccb0dd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index ca6bea4859b4..80a83ec4a9ae 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -132,37 +132,37 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long long addr = start, end = start + size;
-	u64 range;
+	u64 next;
 
 	while (addr < end) {
 		pgd = pgdp + pgd_index(addr);
 		pud = pud_offset(pgd, addr);
 		if (pud_none(*pud)) {
-			addr += PUD_SIZE;
+			addr = pud_addr_end(addr, end);
 			continue;
 		}
 
 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd)) {
-			addr += PMD_SIZE;
+			addr = pmd_addr_end(addr, end);
 			continue;
 		}
 
 		pte = pte_offset_kernel(pmd, addr);
 		clear_pte_entry(kvm, pte, addr);
-		range = PAGE_SIZE;
+		next = addr + PAGE_SIZE;
 
 		/* If we emptied the pte, walk back up the ladder */
 		if (pte_empty(pte)) {
 			clear_pmd_entry(kvm, pmd, addr);
-			range = PMD_SIZE;
+			next = pmd_addr_end(addr, end);
 			if (pmd_empty(pmd)) {
 				clear_pud_entry(kvm, pud, addr);
-				range = PUD_SIZE;
+				next = pud_addr_end(addr, end);
 			}
 		}
 
-		addr += range;
+		addr = next;
 	}
 }
 

From ce7fc7403b536adb6b0871f3e4d07a4da08ad63b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 6 Aug 2013 13:05:48 +0100
Subject: [PATCH 0484/1185] arm64: KVM: fix 2-level page tables unmapping

When using 64kB pages, we only have two levels of page tables,
meaning that PGD, PUD and PMD are fused. In this case, trying
to refcount PUDs and PMDs independently is a a complete disaster,
as they are the same.

We manage to get it right for the allocation (stage2_set_pte uses
{pmd,pud}_none), but the unmapping path clears both pud and pmd
refcounts, which fails spectacularly with 2-level page tables.

The fix is to avoid calling clear_pud_entry when both the pmd and
pud pages are empty. For this, and instead of introducing another
pud_empty function, consolidate both pte_empty and pmd_empty into
page_empty (the code is actually identical) and use that to also
test the validity of the pud.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 979acd5e18c3e5cb7e3308c699d79553af5af8c6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 80a83ec4a9ae..0988d9e04dd4 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -85,6 +85,12 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
+static bool page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
 	pmd_t *pmd_table = pmd_offset(pud, 0);
@@ -103,12 +109,6 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 	put_page(virt_to_page(pmd));
 }
 
-static bool pmd_empty(pmd_t *pmd)
-{
-	struct page *pmd_page = virt_to_page(pmd);
-	return page_count(pmd_page) == 1;
-}
-
 static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
 {
 	if (pte_present(*pte)) {
@@ -118,12 +118,6 @@ static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
 	}
 }
 
-static bool pte_empty(pte_t *pte)
-{
-	struct page *pte_page = virt_to_page(pte);
-	return page_count(pte_page) == 1;
-}
-
 static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 			unsigned long long start, u64 size)
 {
@@ -153,10 +147,10 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 		next = addr + PAGE_SIZE;
 
 		/* If we emptied the pte, walk back up the ladder */
-		if (pte_empty(pte)) {
+		if (page_empty(pte)) {
 			clear_pmd_entry(kvm, pmd, addr);
 			next = pmd_addr_end(addr, end);
-			if (pmd_empty(pmd)) {
+			if (page_empty(pmd) && !page_empty(pud)) {
 				clear_pud_entry(kvm, pud, addr);
 				next = pud_addr_end(addr, end);
 			}

From ebd362b7533b7866928c9bca7727c48558aedbd2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 7 Jun 2013 11:02:34 +0100
Subject: [PATCH 0485/1185] arm64: KVM: perform save/restore of PAR_EL1

Not saving PAR_EL1 is an unfortunate oversight. If the guest
performs an AT* operation and gets scheduled out before reading
the result of the translation from PAREL1, it could become
corrupted by another guest or the host.

Saving this register is made slightly more complicated as KVM also
uses it on the permission fault handling path, leading to an ugly
"stash and restore" sequence. Fortunately, this is already a slow
path so we don't really care. Also, Linux doesn't do any AT*
operation, so Linux guests are not impacted by this bug.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 1bbd80549810637b7381ab0649ba7c7d62f1342a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h | 17 ++++++++++-------
 arch/arm64/kvm/hyp.S             | 10 ++++++++++
 arch/arm64/kvm/sys_regs.c        |  3 +++
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index c92de4163eba..b25763bc0ec4 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -42,14 +42,15 @@
 #define	TPIDR_EL1	18	/* Thread ID, Privileged */
 #define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
 #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
+#define	PAR_EL1		21	/* Physical Address Register */
 /* 32bit specific registers. Keep them at the end of the range */
-#define	DACR32_EL2	21	/* Domain Access Control Register */
-#define	IFSR32_EL2	22	/* Instruction Fault Status Register */
-#define	FPEXC32_EL2	23	/* Floating-Point Exception Control Register */
-#define	DBGVCR32_EL2	24	/* Debug Vector Catch Register */
-#define	TEECR32_EL1	25	/* ThumbEE Configuration Register */
-#define	TEEHBR32_EL1	26	/* ThumbEE Handler Base Register */
-#define	NR_SYS_REGS	27
+#define	DACR32_EL2	22	/* Domain Access Control Register */
+#define	IFSR32_EL2	23	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	24	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	25	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	26	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	27	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	28
 
 /* 32bit mapping */
 #define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
@@ -69,6 +70,8 @@
 #define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
 #define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
 #define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
+#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
 #define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
 #define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
 #define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index ff985e3d8b72..218802f68b20 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -214,6 +214,7 @@ __kvm_hyp_code_start:
 	mrs	x21,	tpidr_el1
 	mrs	x22, 	amair_el1
 	mrs	x23, 	cntkctl_el1
+	mrs	x24,	par_el1
 
 	stp	x4, x5, [x3]
 	stp	x6, x7, [x3, #16]
@@ -225,6 +226,7 @@ __kvm_hyp_code_start:
 	stp	x18, x19, [x3, #112]
 	stp	x20, x21, [x3, #128]
 	stp	x22, x23, [x3, #144]
+	str	x24, [x3, #160]
 .endm
 
 .macro restore_sysregs
@@ -243,6 +245,7 @@ __kvm_hyp_code_start:
 	ldp	x18, x19, [x3, #112]
 	ldp	x20, x21, [x3, #128]
 	ldp	x22, x23, [x3, #144]
+	ldr	x24, [x3, #160]
 
 	msr	vmpidr_el2,	x4
 	msr	csselr_el1,	x5
@@ -264,6 +267,7 @@ __kvm_hyp_code_start:
 	msr	tpidr_el1,	x21
 	msr	amair_el1,	x22
 	msr	cntkctl_el1,	x23
+	msr	par_el1,	x24
 .endm
 
 .macro skip_32bit_state tmp, target
@@ -753,6 +757,10 @@ el1_trap:
 	 */
 	tbnz	x1, #7, 1f	// S1PTW is set
 
+	/* Preserve PAR_EL1 */
+	mrs	x3, par_el1
+	push	x3, xzr
+
 	/*
 	 * Permission fault, HPFAR_EL2 is invalid.
 	 * Resolve the IPA the hard way using the guest VA.
@@ -766,6 +774,8 @@ el1_trap:
 
 	/* Read result */
 	mrs	x3, par_el1
+	pop	x0, xzr			// Restore PAR_EL1 from the stack
+	msr	par_el1, x0
 	tbnz	x3, #0, 3f		// Bail out if we failed the translation
 	ubfx	x3, x3, #12, #36	// Extract IPA
 	lsl	x3, x3, #4		// and present it like HPFAR
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 94923609753b..02e9d09e1d80 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -211,6 +211,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	/* FAR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
 	  NULL, reset_unknown, FAR_EL1 },
+	/* PAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
+	  NULL, reset_unknown, PAR_EL1 },
 
 	/* PMINTENSET_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),

From e336bcc2613d7332083819ac8f148313e9471f59 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 11 Jun 2013 18:05:25 +0100
Subject: [PATCH 0486/1185] arm64: KVM: add missing dsb before invalidating
 Stage-2 TLBs

When performing a Stage-2 TLB invalidation, it is necessary to
make sure the write to the page tables is observable by all CPUs.

For this purpose, add dsb instructions to __kvm_tlb_flush_vmid_ipa
and __kvm_flush_vm_context before doing the TLB invalidation itself.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f142e5eeb724cfbedd203b32b3b542d78dbe2545)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 218802f68b20..1ac0bbbdddb2 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -604,6 +604,8 @@ END(__kvm_vcpu_run)
 
 // void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 ENTRY(__kvm_tlb_flush_vmid_ipa)
+	dsb	ishst
+
 	kern_hyp_va	x0
 	ldr	x2, [x0, #KVM_VTTBR]
 	msr	vttbr_el2, x2
@@ -625,6 +627,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 ENDPROC(__kvm_tlb_flush_vmid_ipa)
 
 ENTRY(__kvm_flush_vm_context)
+	dsb	ishst
 	tlbi	alle1is
 	ic	ialluis
 	dsb	sy

From fbcac5446f1c5be6e54e282e1dbed74c58671a72 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Mon, 22 Jul 2013 04:40:38 +0100
Subject: [PATCH 0487/1185] arm64: KVM: use 'int' instead of 'u32' for variable
 'target' in kvm_host.h.

'target' will be set to '-1' in kvm_arch_vcpu_init(), and it need check
'target' whether less than zero or not in kvm_vcpu_initialized().

So need define target as 'int' instead of 'u32', just like ARM has done.

The related warning:

  arch/arm64/kvm/../../../arch/arm/kvm/arm.c:497:2: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits]

Signed-off-by: Chen Gang <gang.chen@asianux.com>
[Marc: reformated the Subject line to fit the series]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 6c8c0c4dc0e98ee2191211d66e9f876e95787073)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 644d73956864..0859a4ddd1e7 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -129,7 +129,7 @@ struct kvm_vcpu_arch {
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
 	/* Target CPU and feature flags */
-	u32 target;
+	int target;
 	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
 
 	/* Detect first run of a vcpu */

From 54118be422ac3ff0af613676d47f8d46cc9f8801 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 29 Jul 2013 20:46:04 -0700
Subject: [PATCH 0488/1185] KVM: ARM: Squash len warning

The 'len' variable was declared an unsigned and then checked for less
than 0, which results in warnings on some compilers.  Since len is
assigned an int, make it an int.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 2184a60de26b94bc5a88de3e5a960ef9ff54ba5a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index b8e06b7a2833..0c25d9487d53 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -63,7 +63,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		      struct kvm_exit_mmio *mmio)
 {
-	unsigned long rt, len;
+	unsigned long rt;
+	int len;
 	bool is_write, sign_extend;
 
 	if (kvm_vcpu_dabt_isextabt(vcpu)) {

From d829a739332e33ffc9753c607325f461e9c994d8 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 13 May 2013 12:08:06 +0100
Subject: [PATCH 0489/1185] ARM: kvm: use inner-shareable barriers after TLB
 flushing

When flushing the TLB at PL2 in response to remapping at stage-2 or VMID
rollover, we have a dsb instruction to ensure completion of the command
before continuing.

Since we only care about other processors for TLB invalidation, use the
inner-shareable variant of the dsb instruction instead.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit e3ab547f57bd626201d4b715b696c80ad1ef4ba2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/init.S       | 2 +-
 arch/arm/kvm/interrupts.S | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index f048338135f7..1b9844d369cc 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -142,7 +142,7 @@ target:	@ We're now in the trampoline code, switch page tables
 
 	@ Invalidate the old TLBs
 	mcr	p15, 4, r0, c8, c7, 0	@ TLBIALLH
-	dsb
+	dsb	ish
 
 	eret
 
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 16cd4ba5d7fd..f85052facffc 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -55,7 +55,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
 	isb
 	mcr     p15, 0, r0, c8, c3, 0	@ TLBIALLIS (rt ignored)
-	dsb
+	dsb	ish
 	isb
 	mov	r2, #0
 	mov	r3, #0
@@ -79,7 +79,7 @@ ENTRY(__kvm_flush_vm_context)
 	mcr     p15, 4, r0, c8, c3, 4
 	/* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
 	mcr     p15, 0, r0, c7, c1, 0
-	dsb
+	dsb	ish
 	isb				@ Not necessary if followed by eret
 
 	bx	lr

From 678b5999d8ded1bff60ef8e3bfb95c93d63c5ebc Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 6 Aug 2013 05:34:16 +0100
Subject: [PATCH 0490/1185] ARM: 7808/1: KVM: mm: Get rid of L_PTE_USER ref
 from PAGE_S2_DEVICE

THe L_PTE_USER actually has nothing to do with stage 2 mappings and the
L_PTE_S2_RDWR value sets the readable bit, which was what L_PTE_USER
was used for before proper handling of stage 2 memory defines.

Changelog:
  [v3]: Drop call to kvm_set_s2pte_writable in mmu.c
  [v2]: Change default mappings to be r/w instead of r/o, as per Marc
     Zyngier's suggestion.

Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
(cherry picked from commit 8947c09d05da9f0436f423518f449beaa5ea1bdc)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/pgtable.h | 2 +-
 arch/arm/kvm/mmu.c             | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 9bcd262a9008..8afc60c55e82 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -97,7 +97,7 @@ extern pgprot_t		pgprot_s2_device;
 #define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_HYP)
 #define PAGE_HYP_DEVICE		_MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
 #define PAGE_S2			_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_USER | L_PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR)
 
 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 0988d9e04dd4..b0de86b56c13 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -489,7 +489,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 
 	for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
 		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
-		kvm_set_s2pte_writable(&pte);
 
 		ret = mmu_topup_memory_cache(&cache, 2, 2);
 		if (ret)

From effab3928a14e5574f32801eb665ce7c6a98ef14 Mon Sep 17 00:00:00 2001
From: Yann Droneaud <ydroneaud@opteya.com>
Date: Sat, 24 Aug 2013 22:14:07 +0200
Subject: [PATCH 0491/1185] kvm: use anon_inode_getfd() with O_CLOEXEC flag

KVM uses anon_inode_get() to allocate file descriptors as part
of some of its ioctls. But those ioctls are lacking a flag argument
allowing userspace to choose options for the newly opened file descriptor.

In such case it's advised to use O_CLOEXEC by default so that
userspace is allowed to choose, without race, if the file descriptor
is going to be inherited across exec().

This patch set O_CLOEXEC flag on all file descriptors created
with anon_inode_getfd() to not leak file descriptors across exec().

Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Link: http://lkml.kernel.org/r/cover.1377372576.git.ydroneaud@opteya.com
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 24009b0549de563006705b9af8694fc8fc9a5aa1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7ca9939ee9a7..b5afee319073 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1896,7 +1896,7 @@ static struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
+	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
 }
 
 /*
@@ -2305,7 +2305,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 		return ret;
 	}
 
-	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR);
+	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
 	if (ret < 0) {
 		ops->destroy(dev);
 		return ret;
@@ -2589,7 +2589,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 		return r;
 	}
 #endif
-	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
 	if (r < 0)
 		kvm_put_kvm(kvm);
 

From 4ace5f4542de34fb99068fa2285947d6bbfee26a Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 25 Jul 2013 03:04:38 +0200
Subject: [PATCH 0492/1185] kvm: optimize away THP checks in kvm_is_mmio_pfn()

The checks on PG_reserved in the page structure on head and tail pages
aren't necessary because split_huge_page wouldn't transfer the
PG_reserved bit from head to tail anyway.

This was a forward-thinking check done in the case PageReserved was
set by a driver-owned page mapped in userland with something like
remap_pfn_range in a VM_PFNMAP region, but using hugepmds (not
possible right now). It was meant to be very safe, but it's overkill
as it's unlikely split_huge_page could ever run without the driver
noticing and tearing down the hugepage itself.

And if a driver in the future will really want to map a reserved
hugepage in userland using an huge pmd it should simply take care of
marking all subpages reserved too to keep KVM safe. This of course
would require such a hypothetical driver to tear down the huge pmd
itself and splitting the hugepage itself, instead of relaying on
split_huge_page, but that sounds very reasonable, especially
considering split_huge_page wouldn't currently transfer the reserved
bit anyway.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 11feeb498086a3a5907b8148bdf1786a9b18fc55)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b5afee319073..6c9130bd16c8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,28 +102,8 @@ static bool largepages_enabled = true;
 
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
-	if (pfn_valid(pfn)) {
-		int reserved;
-		struct page *tail = pfn_to_page(pfn);
-		struct page *head = compound_trans_head(tail);
-		reserved = PageReserved(head);
-		if (head != tail) {
-			/*
-			 * "head" is not a dangling pointer
-			 * (compound_trans_head takes care of that)
-			 * but the hugepage may have been splitted
-			 * from under us (and we may not hold a
-			 * reference count on the head page so it can
-			 * be reused before we run PageReferenced), so
-			 * we've to check PageTail before returning
-			 * what we just read.
-			 */
-			smp_rmb();
-			if (PageTail(tail))
-				return reserved;
-		}
-		return PageReserved(tail);
-	}
+	if (pfn_valid(pfn))
+		return PageReserved(pfn_to_page(pfn));
 
 	return true;
 }

From 1752d0c7e51f1a5e8d1f021d6b4eef1845c6da06 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 29 Aug 2013 11:08:22 +0100
Subject: [PATCH 0493/1185] ARM: KVM: vgic: simplify vgic_get_target_reg

vgic_get_target_reg is quite complicated, for no good reason.
Actually, it is fairly easy to write it in a much more efficient
way by using the target CPU array instead of the bitmap.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 986af8e0789a41ac4844e6eefed4a33e86524918)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 17c5ac7d10ed..a2d478aec046 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -432,19 +432,13 @@ static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
 static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int i, c;
-	unsigned long *bmap;
+	int i;
 	u32 val = 0;
 
 	irq -= VGIC_NR_PRIVATE_IRQS;
 
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
-		for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
-			if (test_bit(irq + i, bmap))
-				val |= 1 << (c + i * 8);
-	}
+	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
+		val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
 
 	return val;
 }

From 635c887f2931a200a52694170122e2b62d1b6218 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 29 Aug 2013 11:08:23 +0100
Subject: [PATCH 0494/1185] ARM: KVM: vgic: fix GICD_ICFGRn access

All the code in handle_mmio_cfg_reg() assumes the offset has
been shifted right to accomodate for the 2:1 bit compression,
but this is only done when getting the register address.

Shift the offset early so the code works mostly unchanged.

Reported-by: Zhaobo (Bob, ERC) <zhaobo@huawei.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 6545eae3d7a1b6dc2edb8ede9107998aee1207ef)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index a2d478aec046..902789ff4abb 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -541,8 +541,12 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
 				struct kvm_exit_mmio *mmio, phys_addr_t offset)
 {
 	u32 val;
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
-				       vcpu->vcpu_id, offset >> 1);
+	u32 *reg;
+
+	offset >>= 1;
+	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
+				  vcpu->vcpu_id, offset);
+
 	if (offset & 2)
 		val = *reg >> 16;
 	else

From a144ec826c81facd70fd157aef6f7b7030687f68 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 29 Aug 2013 11:08:24 +0100
Subject: [PATCH 0495/1185] ARM: KVM: Bugfix: vgic_bytemap_get_reg per cpu regs

For bytemaps each IRQ field is 1 byte wide, so we pack 4 irq fields in
one word and since there are 32 private (per cpu) irqs, we have 8
private u32 fields on the vgic_bytemap struct.  We shift the offset from
the base of the register group right by 2, giving us the word index
instead of the field index.  But then there are 8 private words, not 4,
which is also why we subtract 8 words from the offset of the shared
words.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 8d98915b6bda499e47d19166101d0bbcfd409c80)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 902789ff4abb..685fc72fc751 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -149,7 +149,7 @@ static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
 {
 	offset >>= 2;
 	BUG_ON(offset > (VGIC_NR_IRQS / 4));
-	if (offset < 4)
+	if (offset < 8)
 		return x->percpu[cpuid] + offset;
 	else
 		return x->shared + offset - 8;

From de5324b84480246ad6064cd3469d362995c2e929 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 29 Aug 2013 11:08:25 +0100
Subject: [PATCH 0496/1185] ARM: KVM: vgic: Bump VGIC_NR_IRQS to 256

The Versatile Express TC2 board, which we use as our main emulated
platform in QEMU, defines 160+32 == 192 interrupts, so limiting the
number of interrupts to 128 is not quite going to cut it for real board
emulation.

Note that this didn't use to be a problem because QEMU was buggy and
only defined 128 interrupts until recently.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 9b2d2e0df8a49414b1e5bc89148c9984dd87782a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 343744e4809c..7e2d15837b02 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -26,7 +26,7 @@
 #include <linux/types.h>
 #include <linux/irqchip/arm-gic.h>
 
-#define VGIC_NR_IRQS		128
+#define VGIC_NR_IRQS		256
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)

From 07557caabbdc70c2644e6f195fd07ca8be7d16b2 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 8 Aug 2013 20:35:07 -0700
Subject: [PATCH 0497/1185] ARM: KVM: Fix kvm_set_pte assignment

THe kvm_set_pte function was actually assigning the entire struct to the
structure member, which should work because the structure only has that
one member, but it is still not very nice.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 0963e5d0f22f9d197dbf206d8b5b2a150722cf5e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 472ac7091003..9b28c41f4ba9 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -64,7 +64,7 @@ void kvm_clear_hyp_idmap(void);
 
 static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 {
-	pte_val(*pte) = new_pte;
+	*pte = new_pte;
 	/*
 	 * flush_pmd_entry just takes a void pointer and cleans the necessary
 	 * cache entries, so we can reuse the function for ptes.

From 14cffe44b8aad9a839fcbcf35f9d2bbd90f572c0 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 8 Aug 2013 20:34:22 -0700
Subject: [PATCH 0498/1185] ARM: KVM: Simplify tracepoint text

The tracepoint for kvm_guest_fault was extremely long, make it a
slightly bit shorter.

Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 6e72cc5700fe6b8776d537b736dab64b21ae0f1f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/trace.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index a8e73ed5ad5b..b1d640f78623 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault,
 		__entry->ipa			= ipa;
 	),
 
-	TP_printk("guest fault at PC %#08lx (hxfar %#08lx, "
-		  "ipa %#16llx, hsr %#08lx",
-		  __entry->vcpu_pc, __entry->hxfar,
-		  __entry->ipa, __entry->hsr)
+	TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
+		  __entry->ipa, __entry->hsr,
+		  __entry->hxfar, __entry->vcpu_pc)
 );
 
 TRACE_EVENT(kvm_irq_line,

From 411b0c990125d54d48314d8ecd6e800bc9660509 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 19 Aug 2013 14:16:57 -0700
Subject: [PATCH 0499/1185] ARM: KVM: Work around older compiler bug

Compilers before 4.6 do not behave well with unnamed fields in structure
initializers and therefore produces build errors:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10676

By refering to the unnamed union using braces, both older and newer
compilers produce the same result.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reported-by: Russell King <linux@arm.linux.org.uk>
Tested-by: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 6833d83891140aedab7841589b7c7dbd7b600235)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/reset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index b7840e7aa452..71e08baee209 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -40,7 +40,7 @@ static struct kvm_regs a15_regs_reset = {
 };
 
 static const struct kvm_irq_level a15_vtimer_irq = {
-	.irq = 27,
+	{ .irq = 27 },
 	.level = 1,
 };
 

From 215ed7558daf77d296ee388f64df34b0d479ea07 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 14 Aug 2013 12:33:48 -0700
Subject: [PATCH 0500/1185] ARM: KVM: Add newlines to panic strings

The panic strings are hard to read and on narrow terminals some
characters are simply truncated off the panic message.

Make is slightly prettier with a newline in the Hyp panic strings.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1fe40f6d39d23f39e643607a3e1883bfc74f1244)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/interrupts.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index f85052facffc..ddc15539bad2 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -492,10 +492,10 @@ __kvm_hyp_code_end:
 	.section ".rodata"
 
 und_die_str:
-	.ascii	"unexpected undefined exception in Hyp mode at: %#08x"
+	.ascii	"unexpected undefined exception in Hyp mode at: %#08x\n"
 pabt_die_str:
-	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x"
+	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x\n"
 dabt_die_str:
-	.ascii	"unexpected data abort in Hyp mode at: %#08x"
+	.ascii	"unexpected data abort in Hyp mode at: %#08x\n"
 svc_die_str:
-	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x"
+	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x\n"

From 02ef4f0c0d0930d9d44e9929caaee12043447010 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 9 Sep 2013 13:52:33 +0200
Subject: [PATCH 0501/1185] KVM: mmu: allow page tables to be in read-only
 slots

Page tables in a read-only memory slot will currently cause a triple
fault because the page walker uses gfn_to_hva and it fails on such a slot.

OVMF uses such a page table; however, real hardware seems to be fine with
that as long as the accessed/dirty bits are set.  Save whether the slot
is readonly, and later check it when updating the accessed and dirty bits.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ba6a3541545542721ce821d1e7e5ce35752e6fdf)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/x86/kvm/paging_tmpl.h | 20 +++++++++++++++++++-
 include/linux/kvm_host.h   |  1 +
 virt/kvm/kvm_main.c        | 14 +++++++++-----
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index da20860b457a..e1af2394a23f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -69,6 +69,7 @@ struct guest_walker {
 	pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
 	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
 	pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
+	bool pte_writable[PT_MAX_FULL_LEVELS];
 	unsigned pt_access;
 	unsigned pte_access;
 	gfn_t gfn;
@@ -130,6 +131,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
 		if (pte == orig_pte)
 			continue;
 
+		/*
+		 * If the slot is read-only, simply do not process the accessed
+		 * and dirty bits.  This is the correct thing to do if the slot
+		 * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
+		 * are only supported if the accessed and dirty bits are already
+		 * set in the ROM (so that MMIO writes are never needed).
+		 *
+		 * Note that NPT does not allow this at all and faults, since
+		 * it always wants nested page table entries for the guest
+		 * page tables to be writable.  And EPT works but will simply
+		 * overwrite the read-only memory to set the accessed and dirty
+		 * bits.
+		 */
+		if (unlikely(!walker->pte_writable[level - 1]))
+			continue;
+
 		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
 		if (ret)
 			return ret;
@@ -204,7 +221,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 			goto error;
 		real_gfn = gpa_to_gfn(real_gfn);
 
-		host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
+		host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
+					    &walker->pte_writable[walker->level - 1]);
 		if (unlikely(kvm_is_error_hva(host_addr)))
 			goto error;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 15018c572ac5..e2befc2b1647 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -519,6 +519,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6c9130bd16c8..fcbcf5312f93 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
 /*
- * The hva returned by this function is only allowed to be read.
- * It should pair with kvm_read_hva() or kvm_read_hva_atomic().
+ * If writable is set to false, the hva returned by this function is only
+ * allowed to be read.
  */
-static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 {
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+	if (writable)
+		*writable = !memslot_is_readonly(slot);
+
 	return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
 }
 
@@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 	int r;
 	unsigned long addr;
 
-	addr = gfn_to_hva_read(kvm, gfn);
+	addr = gfn_to_hva_prot(kvm, gfn, NULL);
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
 	r = kvm_read_hva(data, (void __user *)addr + offset, len);
@@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	int offset = offset_in_page(gpa);
 
-	addr = gfn_to_hva_read(kvm, gfn);
+	addr = gfn_to_hva_prot(kvm, gfn, NULL);
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
 	pagefault_disable();

From ed363014a225b257a32ad69b1ef0da615aecb50a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Wed, 4 Sep 2013 22:32:23 +0200
Subject: [PATCH 0502/1185] kvm: free resources after canceling async_pf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we cancel 'async_pf_execute()', we should behave as if the work was
never scheduled in 'kvm_setup_async_pf()'.
Fixes a bug when we can't unload module because the vm wasn't destroyed.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 28b441e24088081c1e213139d1303b451a34a4f4)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index ea475cd03511..8a39dda7a325 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -101,8 +101,11 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 				   typeof(*work), queue);
 		cancel_work_sync(&work->work);
 		list_del(&work->queue);
-		if (!work->done) /* work was canceled */
+		if (!work->done) { /* work was canceled */
+			mmdrop(work->mm);
+			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
 			kmem_cache_free(async_pf_cache, work);
+		}
 	}
 
 	spin_lock(&vcpu->async_pf.lock);

From 78169fda11a0b68310885d8991399bc3a51e5b2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Wed, 4 Sep 2013 22:32:24 +0200
Subject: [PATCH 0503/1185] kvm: remove .done from struct kvm_async_pf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'.done' is used to mark the completion of 'async_pf_execute()', but
'cancel_work_sync()' returns true when the work was canceled, so we
use it instead.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 98fda169290b3b28c0f2db2b8f02290c13da50ef)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h | 1 -
 virt/kvm/async_pf.c      | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e2befc2b1647..dbbd78215204 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -176,7 +176,6 @@ struct kvm_async_pf {
 	unsigned long addr;
 	struct kvm_arch_async_pf arch;
 	struct page *page;
-	bool done;
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8a39dda7a325..b197950ac4d5 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -75,7 +75,6 @@ static void async_pf_execute(struct work_struct *work)
 	spin_lock(&vcpu->async_pf.lock);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);
 	apf->page = page;
-	apf->done = true;
 	spin_unlock(&vcpu->async_pf.lock);
 
 	/*
@@ -99,9 +98,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 		struct kvm_async_pf *work =
 			list_entry(vcpu->async_pf.queue.next,
 				   typeof(*work), queue);
-		cancel_work_sync(&work->work);
 		list_del(&work->queue);
-		if (!work->done) { /* work was canceled */
+		if (cancel_work_sync(&work->work)) {
 			mmdrop(work->mm);
 			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
 			kmem_cache_free(async_pf_cache, work);
@@ -166,7 +164,6 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 		return 0;
 
 	work->page = NULL;
-	work->done = false;
 	work->vcpu = vcpu;
 	work->gva = gva;
 	work->addr = gfn_to_hva(vcpu->kvm, gfn);

From c3832c083abf356e0df1b581df22ee8a21034841 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 11 Sep 2013 15:27:41 -0700
Subject: [PATCH 0504/1185] ARM: kvm: rename cpu_reset to avoid name clash

cpu_reset is already #defined in <asm/proc-fns.h> as processor.reset,
so it expands here and causes problems.

Cc: <stable@vger.kernel.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit ac570e0493815e0b41681c89cb50d66421429d27)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/reset.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index 71e08baee209..c02ba4af599f 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -58,14 +58,14 @@ static const struct kvm_irq_level a15_vtimer_irq = {
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_regs *cpu_reset;
+	struct kvm_regs *reset_regs;
 	const struct kvm_irq_level *cpu_vtimer_irq;
 
 	switch (vcpu->arch.target) {
 	case KVM_ARM_TARGET_CORTEX_A15:
 		if (vcpu->vcpu_id > a15_max_cpu_idx)
 			return -EINVAL;
-		cpu_reset = &a15_regs_reset;
+		reset_regs = &a15_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
 		cpu_vtimer_irq = &a15_vtimer_irq;
 		break;
@@ -74,7 +74,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	}
 
 	/* Reset core registers */
-	memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs));
+	memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));
 
 	/* Reset CP15 registers */
 	kvm_reset_coprocs(vcpu);

From ae0e4b34f8d96028faefddb83994cfd2c8cb8681 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 10 Sep 2013 12:57:17 +0200
Subject: [PATCH 0505/1185] KVM: cleanup (physical) CPU hotplug

Remove the useless argument, and do not do anything if there are no
VMs running at the time of the hotplug.

Cc: kvm@vger.kernel.org
Cc: gleb@redhat.com
Cc: jan.kiszka@siemens.com
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 4fa92fb25ae5a2d79d872ab54df511c831b1f363)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fcbcf5312f93..7103b989ba54 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2681,10 +2681,11 @@ static void hardware_enable_nolock(void *junk)
 	}
 }
 
-static void hardware_enable(void *junk)
+static void hardware_enable(void)
 {
 	raw_spin_lock(&kvm_lock);
-	hardware_enable_nolock(junk);
+	if (kvm_usage_count)
+		hardware_enable_nolock(NULL);
 	raw_spin_unlock(&kvm_lock);
 }
 
@@ -2698,10 +2699,11 @@ static void hardware_disable_nolock(void *junk)
 	kvm_arch_hardware_disable(NULL);
 }
 
-static void hardware_disable(void *junk)
+static void hardware_disable(void)
 {
 	raw_spin_lock(&kvm_lock);
-	hardware_disable_nolock(junk);
+	if (kvm_usage_count)
+		hardware_disable_nolock(NULL);
 	raw_spin_unlock(&kvm_lock);
 }
 
@@ -2748,20 +2750,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 {
 	int cpu = (long)v;
 
-	if (!kvm_usage_count)
-		return NOTIFY_OK;
-
 	val &= ~CPU_TASKS_FROZEN;
 	switch (val) {
 	case CPU_DYING:
 		printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
 		       cpu);
-		hardware_disable(NULL);
+		hardware_disable();
 		break;
 	case CPU_STARTING:
 		printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
 		       cpu);
-		hardware_enable(NULL);
+		hardware_enable();
 		break;
 	}
 	return NOTIFY_OK;

From 753f251708b5322430cf4902264d432a5daf06e4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 10 Sep 2013 12:58:35 +0200
Subject: [PATCH 0506/1185] KVM: protect kvm_usage_count with its own spinlock

The VM list need not be protected by a raw spinlock.  Separate the
two so that kvm_lock can be made non-raw.

Cc: kvm@vger.kernel.org
Cc: gleb@redhat.com
Cc: jan.kiszka@siemens.com
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 4a937f96f3a29c58b7edd349d2e4dfac371efdf2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/locking.txt |  6 +++++-
 virt/kvm/kvm_main.c                   | 19 ++++++++++---------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 41b7ac9884b5..b1f5de22d090 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -135,7 +135,11 @@ Name:		kvm_lock
 Type:		raw_spinlock
 Arch:		any
 Protects:	- vm_list
-		- hardware virtualization enable/disable
+
+Name:		kvm_count_lock
+Type:		raw_spinlock_t
+Arch:		any
+Protects:	- hardware virtualization enable/disable
 Comment:	'raw' because hardware enabling/disabling must be atomic /wrt
 		migration.
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7103b989ba54..6bc0481bf8e5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -71,6 +71,7 @@ MODULE_LICENSE("GPL");
  */
 
 DEFINE_RAW_SPINLOCK(kvm_lock);
+static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
@@ -2683,10 +2684,10 @@ static void hardware_enable_nolock(void *junk)
 
 static void hardware_enable(void)
 {
-	raw_spin_lock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
 	if (kvm_usage_count)
 		hardware_enable_nolock(NULL);
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_nolock(void *junk)
@@ -2701,10 +2702,10 @@ static void hardware_disable_nolock(void *junk)
 
 static void hardware_disable(void)
 {
-	raw_spin_lock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
 	if (kvm_usage_count)
 		hardware_disable_nolock(NULL);
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_all_nolock(void)
@@ -2718,16 +2719,16 @@ static void hardware_disable_all_nolock(void)
 
 static void hardware_disable_all(void)
 {
-	raw_spin_lock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
 	hardware_disable_all_nolock();
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_unlock(&kvm_count_lock);
 }
 
 static int hardware_enable_all(void)
 {
 	int r = 0;
 
-	raw_spin_lock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
 
 	kvm_usage_count++;
 	if (kvm_usage_count == 1) {
@@ -2740,7 +2741,7 @@ static int hardware_enable_all(void)
 		}
 	}
 
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_unlock(&kvm_count_lock);
 
 	return r;
 }
@@ -3050,7 +3051,7 @@ static int kvm_suspend(void)
 static void kvm_resume(void)
 {
 	if (kvm_usage_count) {
-		WARN_ON(raw_spin_is_locked(&kvm_lock));
+		WARN_ON(raw_spin_is_locked(&kvm_count_lock));
 		hardware_enable_nolock(NULL);
 	}
 }

From 9c904866d4424d18852872878a57a899e48f3838 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 25 Sep 2013 13:53:07 +0200
Subject: [PATCH 0507/1185] KVM: Convert kvm_lock back to non-raw spinlock

In commit e935b8372cf8 ("KVM: Convert kvm_lock to raw_spinlock"),
the kvm_lock was made a raw lock.  However, the kvm mmu_shrink()
function tries to grab the (non-raw) mmu_lock within the scope of
the raw locked kvm_lock being held.  This leads to the following:

BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
in_atomic(): 1, irqs_disabled(): 0, pid: 55, name: kswapd0
Preemption disabled at:[<ffffffffa0376eac>] mmu_shrink+0x5c/0x1b0 [kvm]

Pid: 55, comm: kswapd0 Not tainted 3.4.34_preempt-rt
Call Trace:
 [<ffffffff8106f2ad>] __might_sleep+0xfd/0x160
 [<ffffffff817d8d64>] rt_spin_lock+0x24/0x50
 [<ffffffffa0376f3c>] mmu_shrink+0xec/0x1b0 [kvm]
 [<ffffffff8111455d>] shrink_slab+0x17d/0x3a0
 [<ffffffff81151f00>] ? mem_cgroup_iter+0x130/0x260
 [<ffffffff8111824a>] balance_pgdat+0x54a/0x730
 [<ffffffff8111fe47>] ? set_pgdat_percpu_threshold+0xa7/0xd0
 [<ffffffff811185bf>] kswapd+0x18f/0x490
 [<ffffffff81070961>] ? get_parent_ip+0x11/0x50
 [<ffffffff81061970>] ? __init_waitqueue_head+0x50/0x50
 [<ffffffff81118430>] ? balance_pgdat+0x730/0x730
 [<ffffffff81060d2b>] kthread+0xdb/0xe0
 [<ffffffff8106e122>] ? finish_task_switch+0x52/0x100
 [<ffffffff817e1e94>] kernel_thread_helper+0x4/0x10
 [<ffffffff81060c50>] ? __init_kthread_worker+0x

After the previous patch, kvm_lock need not be a raw spinlock anymore,
so change it back.

Reported-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: kvm@vger.kernel.org
Cc: gleb@redhat.com
Cc: jan.kiszka@siemens.com
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 2f303b74a62fb74983c0a66e2df353be963c527c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/locking.txt |  2 +-
 arch/x86/kvm/mmu.c                    |  4 ++--
 arch/x86/kvm/x86.c                    |  8 ++++----
 include/linux/kvm_host.h              |  2 +-
 virt/kvm/kvm_main.c                   | 18 +++++++++---------
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index b1f5de22d090..ba035c33d01c 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -132,7 +132,7 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
 ------------
 
 Name:		kvm_lock
-Type:		raw_spinlock
+Type:		spinlock_t
 Arch:		any
 Protects:	- vm_list
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 004cc87b781c..3c1877bbfe6a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4220,7 +4220,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 	if (nr_to_scan == 0)
 		goto out;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		int idx;
@@ -4256,7 +4256,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 		break;
 	}
 
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 out:
 	return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 894003d79d22..96765c116c26 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5104,7 +5104,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
 	smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (vcpu->cpu != freq->cpu)
@@ -5114,7 +5114,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 				send_ipi = 1;
 		}
 	}
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 	if (freq->old < freq->new && send_ipi) {
 		/*
@@ -5261,12 +5261,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		kvm_for_each_vcpu(i, vcpu, kvm)
 			set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
 	atomic_set(&kvm_guest_has_master_clock, 0);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 }
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dbbd78215204..97e39fc02020 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -133,7 +133,7 @@ struct kvm;
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
-extern raw_spinlock_t kvm_lock;
+extern spinlock_t kvm_lock;
 extern struct list_head vm_list;
 
 struct kvm_io_range {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6bc0481bf8e5..8b47fd241a61 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,7 +70,7 @@ MODULE_LICENSE("GPL");
  * 		kvm->lock --> kvm->slots_lock --> kvm->irq_lock
  */
 
-DEFINE_RAW_SPINLOCK(kvm_lock);
+DEFINE_SPINLOCK(kvm_lock);
 static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
@@ -491,9 +491,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (r)
 		goto out_err;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 	return kvm;
 
@@ -582,9 +582,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_del(&kvm->vm_list);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kvm_io_bus_destroy(kvm->buses[i]);
@@ -2974,10 +2974,10 @@ static int vm_stat_get(void *_offset, u64 *val)
 	struct kvm *kvm;
 
 	*val = 0;
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		*val += *(u32 *)((void *)kvm + offset);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	return 0;
 }
 
@@ -2991,12 +2991,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 	int i;
 
 	*val = 0;
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		kvm_for_each_vcpu(i, vcpu, kvm)
 			*val += *(u32 *)((void *)vcpu + offset);
 
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	return 0;
 }
 

From 5c1d6aafed853c88a645ac52134e217389c2cc8a Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Wed, 11 Sep 2013 18:34:22 +0530
Subject: [PATCH 0508/1185] KVM: ARM: Fix typo in comments of inject_abt()

Very minor typo in comments of inject_abt() when we update fault status
register for injecting prefetch abort.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit b373e492f3a3469c615c2ae218d2f723900bf981)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/emulate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index bdede9e7da51..d6c005283678 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
 	*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
 
 	if (is_pabt) {
-		/* Set DFAR and DFSR */
+		/* Set IFAR and IFSR */
 		vcpu->arch.cp15[c6_IFAR] = addr;
 		is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
 		/* Always give debug fault for now - should give guest a clue */

From f166457e087b577af2ee72ec843d43ef6653e0f8 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Mon, 30 Sep 2013 14:20:05 +0530
Subject: [PATCH 0509/1185] ARM: KVM: Implement kvm_vcpu_preferred_target()
 function

This patch implements kvm_vcpu_preferred_target() function for
KVM ARM which will help us implement KVM_ARM_PREFERRED_TARGET ioctl
for user space.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4a6fee805d5e278e4733bf933cb5b184b7a8be1f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h |  1 +
 arch/arm/kvm/guest.c            | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7d22517d8071..76f3c1978442 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -154,6 +154,7 @@ struct kvm_vcpu_stat {
 struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 struct kvm_one_reg;
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 152d03612181..ec98209fda71 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -222,6 +222,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 	return kvm_reset_vcpu(vcpu);
 }
 
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+	int target = kvm_target_cpu();
+
+	if (target < 0)
+		return -ENODEV;
+
+	memset(init, 0, sizeof(*init));
+
+	/*
+	 * For now, we don't return any features.
+	 * In future, we might use features to return target
+	 * specific features available for the preferred
+	 * target type.
+	 */
+	init->target = (__u32)target;
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -EINVAL;

From 9778336fe11f088c3ddbd12714ab88f5760a1de4 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Mon, 30 Sep 2013 14:20:06 +0530
Subject: [PATCH 0510/1185] ARM64: KVM: Implement kvm_vcpu_preferred_target()
 function

This patch implements kvm_vcpu_preferred_target() function for
KVM ARM64 which will help us implement KVM_ARM_PREFERRED_TARGET
ioctl for user space.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 473bdc0e6565ebb22455657a40daa21b6b4ee16b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  1 +
 arch/arm64/kvm/guest.c            | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0859a4ddd1e7..4cc8c7078f39 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -151,6 +151,7 @@ struct kvm_vcpu_stat {
 struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 struct kvm_one_reg;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2c3ff67a8ecb..3f0731e53274 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -248,6 +248,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 	return kvm_reset_vcpu(vcpu);
 }
 
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+	int target = kvm_target_cpu();
+
+	if (target < 0)
+		return -ENODEV;
+
+	memset(init, 0, sizeof(*init));
+
+	/*
+	 * For now, we don't return any features.
+	 * In future, we might use features to return target
+	 * specific features available for the preferred
+	 * target type.
+	 */
+	init->target = (__u32)target;
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -EINVAL;

From df50e5da2c1c65f1db1e9186112b57a8dd0b41ae Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Mon, 30 Sep 2013 14:20:07 +0530
Subject: [PATCH 0511/1185] ARM/ARM64: KVM: Implement KVM_ARM_PREFERRED_TARGET
 ioctl

For implementing CPU=host, we need a mechanism for querying
preferred VCPU target type on underlying Host.

This patch implements KVM_ARM_PREFERRED_TARGET vm ioctl which
returns struct kvm_vcpu_init instance containing information
about preferred VCPU target type and target specific features
available for it.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 42c4e0c77ac91505ab94284b14025e3a0865c0a5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c       | 13 +++++++++++++
 include/uapi/linux/kvm.h |  1 +
 2 files changed, 14 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9c697db2787e..cc5adb9349ef 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -797,6 +797,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			return -EFAULT;
 		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
 	}
+	case KVM_ARM_PREFERRED_TARGET: {
+		int err;
+		struct kvm_vcpu_init init;
+
+		err = kvm_vcpu_preferred_target(&init);
+		if (err)
+			return err;
+
+		if (copy_to_user(argp, &init, sizeof(init)))
+			return -EFAULT;
+
+		return 0;
+	}
 	default:
 		return -EINVAL;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index acccd08be6c7..cba62019348d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1011,6 +1011,7 @@ struct kvm_s390_ucas_mapping {
 /* VM is being stopped by host */
 #define KVM_KVMCLOCK_CTRL	  _IO(KVMIO,   0xad)
 #define KVM_ARM_VCPU_INIT	  _IOW(KVMIO,  0xae, struct kvm_vcpu_init)
+#define KVM_ARM_PREFERRED_TARGET  _IOR(KVMIO,  0xaf, struct kvm_vcpu_init)
 #define KVM_GET_REG_LIST	  _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)

From 89c774beb24924963aa146784a149e47efb29d82 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 1 Oct 2013 19:58:36 +0300
Subject: [PATCH 0512/1185] Fix NULL dereference in gfn_to_hva_prot()

gfn_to_memslot() can return NULL or invalid slot. We need to check slot
validity before accessing it.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit a2ac07fe292ea41296049dfdbfeed203e2467ee7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b47fd241a61..c5bc5aef11f5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1065,10 +1065,12 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 {
 	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
-	if (writable)
+	unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
+
+	if (!kvm_is_error_hva(hva) && writable)
 		*writable = !memslot_is_readonly(slot);
 
-	return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+	return hva;
 }
 
 static int kvm_read_hva(void *data, void __user *hva, int len)

From 708854902d50f40783a81a629c4131383249236d Mon Sep 17 00:00:00 2001
From: Andre Richter <andre.o.richter@gmail.com>
Date: Wed, 2 Oct 2013 12:23:26 +0200
Subject: [PATCH 0513/1185] virt/kvm/iommu.c: Add leading zeros to device's BDF
 notation in debug messages

When KVM (de)assigns PCI(e) devices to VMs, a debug message is printed
including the BDF notation of the respective device. Currently, the BDF
notation does not have the commonly used leading zeros. This produces
messages like "assign device 0:1:8.0", which look strange at first sight.

The patch fixes this by exchanging the printk(KERN_DEBUG ...) with dev_info()
and also inserts "kvm" into the debug message, so that it is obvious where
the message comes from. Also reduces LoC.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Andre Richter <andre.o.richter@gmail.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 29242cb5c63b1f8e12e8055ba1a6c3e0004fa86d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/iommu.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 72a130bc448a..a3b14109049b 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -190,11 +190,7 @@ int kvm_assign_device(struct kvm *kvm,
 
 	pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
 
-	printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
-		assigned_dev->host_segnr,
-		assigned_dev->host_busnr,
-		PCI_SLOT(assigned_dev->host_devfn),
-		PCI_FUNC(assigned_dev->host_devfn));
+	dev_info(&pdev->dev, "kvm assign device\n");
 
 	return 0;
 out_unmap:
@@ -220,11 +216,7 @@ int kvm_deassign_device(struct kvm *kvm,
 
 	pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 
-	printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
-		assigned_dev->host_segnr,
-		assigned_dev->host_busnr,
-		PCI_SLOT(assigned_dev->host_devfn),
-		PCI_FUNC(assigned_dev->host_devfn));
+	dev_info(&pdev->dev, "kvm deassign device\n");
 
 	return 0;
 }

From c1b378c34e3f2856c0ce060a8dad0877c6d6bfb9 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 26 Sep 2013 16:49:27 +0100
Subject: [PATCH 0514/1185] KVM: ARM: Fix calculation of virtual CPU ID

KVM does not have a notion of multiple clusters for CPUs, just a linear
array of CPUs. When using a system with cores in more than one cluster, the
current method for calculating the virtual MPIDR will leak the (physical)
cluster information into the virtual MPIDR. One effect of this is that
Linux under KVM fails to boot multiple CPUs that aren't in the 0th cluster.

This patch does away with exposing the real MPIDR fields in favour of simply
using the virtual CPU number (but preserving the U bit, as before).

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1158fca401e09665c440a9fe4fd4f131ee85c13b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc_a15.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index cf93472b9dd6..bbd4b888dbf3 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -27,14 +27,11 @@
 static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
 {
 	/*
-	 * Compute guest MPIDR:
-	 * (Even if we present only one VCPU to the guest on an SMP
-	 * host we don't set the U bit in the MPIDR, or vice versa, as
-	 * revealing the underlying hardware properties is likely to
-	 * be the best choice).
+	 * Compute guest MPIDR. No need to mess around with different clusters
+	 * but we read the 'U' bit from the underlying hardware directly.
 	 */
-	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
-		| (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
+	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK)
+					| vcpu->vcpu_id;
 }
 
 #include "coproc.h"

From e82866a69bd6e3ed1a1405ba19f1d5e66e5129a2 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 26 Sep 2013 16:49:26 +0100
Subject: [PATCH 0515/1185] KVM: ARM: fix the size of TTBCR_{T0SZ,T1SZ} masks

The T{0,1}SZ fields of TTBCR are 3 bits wide when using the long descriptor
format. Likewise, the T0SZ field of the HTCR is 3-bits. KVM currently
defines TTBCR_T{0,1}SZ as 3, not 7.

The T0SZ mask is used to calculate the value for the HTCR, both to pick out
TTBCR.T0SZ and mask off the equivalent field in the HTCR during
read-modify-write. The incorrect mask size causes the (UNKNOWN) reset value
of HTCR.T0SZ to leak in to the calculated HTCR value. Linux will hang when
initializing KVM if HTCR's reset value has bit 2 set (sometimes the case on
A7/TC2)

Fixing T0SZ allows A7 cores to boot and T1SZ is also fixed for completeness.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 5e497046f005528464f9600a4ee04f49df713596)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 64e96960de29..d556f03bca17 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -95,12 +95,12 @@
 #define TTBCR_IRGN1	(3 << 24)
 #define TTBCR_EPD1	(1 << 23)
 #define TTBCR_A1	(1 << 22)
-#define TTBCR_T1SZ	(3 << 16)
+#define TTBCR_T1SZ	(7 << 16)
 #define TTBCR_SH0	(3 << 12)
 #define TTBCR_ORGN0	(3 << 10)
 #define TTBCR_IRGN0	(3 << 8)
 #define TTBCR_EPD0	(1 << 7)
-#define TTBCR_T0SZ	3
+#define TTBCR_T0SZ	(7 << 0)
 #define HTCR_MASK	(TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
 
 /* Hyp System Trap Register */

From 5e1ddf60b6830d405ebb026e3d9570e9236a6600 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 26 Sep 2013 16:49:28 +0100
Subject: [PATCH 0516/1185] KVM: ARM: Add support for Cortex-A7

This patch adds support for running Cortex-A7 guests on Cortex-A7 hosts.

As Cortex-A7 is architecturally compatible with A15, this patch is largely just
generalising existing code. Areas where 'implementation defined' behaviour
is identical for A7 and A15 is moved to allow it to be used by both cores.

The check to ensure that coprocessor register tables are sorted correctly is
also moved in to 'common' code to avoid each new cpu doing its own check
(and possibly forgetting to do so!)

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e8c2d99f8277d68d28a9f99d16289712bc2aee7f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h  |   2 +-
 arch/arm/include/uapi/asm/kvm.h |   3 +-
 arch/arm/kvm/Makefile           |   2 +-
 arch/arm/kvm/coproc.c           | 114 ++++++++++++++++++++++++++++++++
 arch/arm/kvm/coproc_a15.c       | 114 +-------------------------------
 arch/arm/kvm/coproc_a7.c        |  54 +++++++++++++++
 arch/arm/kvm/guest.c            |   4 +-
 arch/arm/kvm/reset.c            |  15 +++--
 8 files changed, 184 insertions(+), 124 deletions(-)
 create mode 100644 arch/arm/kvm/coproc_a7.c

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index a2f43ddcc300..661da11f76f4 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -39,7 +39,7 @@
 #define c6_IFAR		17	/* Instruction Fault Address Register */
 #define c7_PAR		18	/* Physical Address Register */
 #define c7_PAR_high	19	/* PAR top 32 bits */
-#define c9_L2CTLR	20	/* Cortex A15 L2 Control Register */
+#define c9_L2CTLR	20	/* Cortex A15/A7 L2 Control Register */
 #define c10_PRRR	21	/* Primary Region Remap Register */
 #define c10_NMRR	22	/* Normal Memory Remap Register */
 #define c12_VBAR	23	/* Vector Base Address Register */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c1ee007523d7..c498b60c0505 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -63,7 +63,8 @@ struct kvm_regs {
 
 /* Supported Processor Types */
 #define KVM_ARM_TARGET_CORTEX_A15	0
-#define KVM_ARM_NUM_TARGETS		1
+#define KVM_ARM_TARGET_CORTEX_A7	1
+#define KVM_ARM_NUM_TARGETS		2
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index d99bee4950e5..789bca9e64a7 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index db9cf692d4dd..a629f2c1d0f9 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -71,6 +71,92 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	/*
+	 * Compute guest MPIDR. No need to mess around with different clusters
+	 * but we read the 'U' bit from the underlying hardware directly.
+	 */
+	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK)
+					| vcpu->vcpu_id;
+}
+
+/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
+static bool access_actlr(struct kvm_vcpu *vcpu,
+			 const struct coproc_params *p,
+			 const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+	return true;
+}
+
+/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
+static bool access_cbar(struct kvm_vcpu *vcpu,
+			const struct coproc_params *p,
+			const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return write_to_read_only(vcpu, p);
+	return read_zero(vcpu, p);
+}
+
+/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
+static bool access_l2ctlr(struct kvm_vcpu *vcpu,
+			  const struct coproc_params *p,
+			  const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+	return true;
+}
+
+static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	u32 l2ctlr, ncores;
+
+	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+	l2ctlr &= ~(3 << 24);
+	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+	l2ctlr |= (ncores & 3) << 24;
+
+	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	u32 actlr;
+
+	/* ACTLR contains SMP bit: make sure you create all cpus first! */
+	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+	/* Make the SMP bit consistent with the guest configuration */
+	if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+		actlr |= 1U << 6;
+	else
+		actlr &= ~(1U << 6);
+
+	vcpu->arch.cp15[c1_ACTLR] = actlr;
+}
+
+/*
+ * TRM entries: A7:4.3.50, A15:4.3.49
+ * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
+ */
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+			   const struct coproc_params *p,
+			   const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = 0;
+	return true;
+}
+
 /* See note at ARM ARM B1.14.4 */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
 			const struct coproc_params *p,
@@ -153,10 +239,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
  *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg cp15_regs[] = {
+	/* MPIDR: we use VMPIDR for guest access. */
+	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
+			NULL, reset_mpidr, c0_MPIDR },
+
 	/* CSSELR: swapped by interrupt.S. */
 	{ CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
 			NULL, reset_unknown, c0_CSSELR },
 
+	/* ACTLR: trapped by HCR.TAC bit. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
+			access_actlr, reset_actlr, c1_ACTLR },
+
+	/* CPACR: swapped by interrupt.S. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
+			NULL, reset_val, c1_CPACR, 0x00000000 },
+
 	/* TTBR0/TTBR1: swapped by interrupt.S. */
 	{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
 	{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
@@ -194,6 +292,13 @@ static const struct coproc_reg cp15_regs[] = {
 	{ CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, access_dcsw},
 	{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
 	{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
+	/*
+	 * L2CTLR access (guest wants to know #CPUs).
+	 */
+	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
+			access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
+	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
+
 	/*
 	 * Dummy performance monitor implementation.
 	 */
@@ -234,6 +339,9 @@ static const struct coproc_reg cp15_regs[] = {
 	/* CNTKCTL: swapped by interrupt.S. */
 	{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+
+	/* The Configuration Base Address Register. */
+	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
 
 /* Target specific emulation tables */
@@ -241,6 +349,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
 
 void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
 {
+	unsigned int i;
+
+	for (i = 1; i < table->num; i++)
+		BUG_ON(cmp_reg(&table->table[i-1],
+			       &table->table[i]) >= 0);
+
 	target_tables[table->target] = table;
 }
 
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index bbd4b888dbf3..bb0cac1410cc 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -17,98 +17,12 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 #include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_host.h>
-#include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
 #include <linux/init.h>
 
-static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	/*
-	 * Compute guest MPIDR. No need to mess around with different clusters
-	 * but we read the 'U' bit from the underlying hardware directly.
-	 */
-	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK)
-					| vcpu->vcpu_id;
-}
-
 #include "coproc.h"
 
-/* A15 TRM 4.3.28: RO WI */
-static bool access_actlr(struct kvm_vcpu *vcpu,
-			 const struct coproc_params *p,
-			 const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
-	return true;
-}
-
-/* A15 TRM 4.3.60: R/O. */
-static bool access_cbar(struct kvm_vcpu *vcpu,
-			const struct coproc_params *p,
-			const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return write_to_read_only(vcpu, p);
-	return read_zero(vcpu, p);
-}
-
-/* A15 TRM 4.3.48: R/O WI. */
-static bool access_l2ctlr(struct kvm_vcpu *vcpu,
-			  const struct coproc_params *p,
-			  const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
-	return true;
-}
-
-static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	u32 l2ctlr, ncores;
-
-	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
-	l2ctlr &= ~(3 << 24);
-	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
-	l2ctlr |= (ncores & 3) << 24;
-
-	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	u32 actlr;
-
-	/* ACTLR contains SMP bit: make sure you create all cpus first! */
-	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
-	/* Make the SMP bit consistent with the guest configuration */
-	if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
-		actlr |= 1U << 6;
-	else
-		actlr &= ~(1U << 6);
-
-	vcpu->arch.cp15[c1_ACTLR] = actlr;
-}
-
-/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
-static bool access_l2ectlr(struct kvm_vcpu *vcpu,
-			   const struct coproc_params *p,
-			   const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = 0;
-	return true;
-}
-
 /*
  * A15-specific CP15 registers.
  * CRn denotes the primary register number, but is copied to the CRm in the
@@ -118,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
  *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg a15_regs[] = {
-	/* MPIDR: we use VMPIDR for guest access. */
-	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
-			NULL, reset_mpidr, c0_MPIDR },
-
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c1_SCTLR, 0x00C50078 },
-	/* ACTLR: trapped by HCR.TAC bit. */
-	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
-			access_actlr, reset_actlr, c1_ACTLR },
-	/* CPACR: swapped by interrupt.S. */
-	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_val, c1_CPACR, 0x00000000 },
-
-	/*
-	 * L2CTLR access (guest wants to know #CPUs).
-	 */
-	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
-			access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
-	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
-
-	/* The Configuration Base Address Register. */
-	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
 
 static struct kvm_coproc_target_table a15_target_table = {
@@ -151,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {
 
 static int __init coproc_a15_init(void)
 {
-	unsigned int i;
-
-	for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
-		BUG_ON(cmp_reg(&a15_regs[i-1],
-			       &a15_regs[i]) >= 0);
-
 	kvm_register_target_coproc_table(&a15_target_table);
 	return 0;
 }
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
new file mode 100644
index 000000000000..1df767331588
--- /dev/null
+++ b/arch/arm/kvm/coproc_a7.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Copyright (C) 2013 - ARM Ltd
+ *
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *          Jonathan Austin <jonathan.austin@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <linux/init.h>
+
+#include "coproc.h"
+
+/*
+ * Cortex-A7 specific CP15 registers.
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
+ */
+static const struct coproc_reg a7_regs[] = {
+	/* SCTLR: swapped by interrupt.S. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
+			NULL, reset_val, c1_SCTLR, 0x00C50878 },
+};
+
+static struct kvm_coproc_target_table a7_target_table = {
+	.target = KVM_ARM_TARGET_CORTEX_A7,
+	.table = a7_regs,
+	.num = ARRAY_SIZE(a7_regs),
+};
+
+static int __init coproc_a7_init(void)
+{
+	kvm_register_target_coproc_table(&a7_target_table);
+	return 0;
+}
+late_initcall(coproc_a7_init);
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index ec98209fda71..20f8d97904af 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void)
 		return -EINVAL;
 
 	switch (part_number) {
+	case ARM_CPU_PART_CORTEX_A7:
+		return KVM_ARM_TARGET_CORTEX_A7;
 	case ARM_CPU_PART_CORTEX_A15:
 		return KVM_ARM_TARGET_CORTEX_A15;
 	default:
@@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 {
 	unsigned int i;
 
-	/* We can only do a cortex A15 for now. */
+	/* We can only cope with guest==host and only on A15/A7 (for now). */
 	if (init->target != kvm_target_cpu())
 		return -EINVAL;
 
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index c02ba4af599f..d153e64d1255 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -30,16 +30,16 @@
 #include <kvm/arm_arch_timer.h>
 
 /******************************************************************************
- * Cortex-A15 Reset Values
+ * Cortex-A15 and Cortex-A7 Reset Values
  */
 
-static const int a15_max_cpu_idx = 3;
+static const int cortexa_max_cpu_idx = 3;
 
-static struct kvm_regs a15_regs_reset = {
+static struct kvm_regs cortexa_regs_reset = {
 	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
-static const struct kvm_irq_level a15_vtimer_irq = {
+static const struct kvm_irq_level cortexa_vtimer_irq = {
 	{ .irq = 27 },
 	.level = 1,
 };
@@ -62,12 +62,13 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	const struct kvm_irq_level *cpu_vtimer_irq;
 
 	switch (vcpu->arch.target) {
+	case KVM_ARM_TARGET_CORTEX_A7:
 	case KVM_ARM_TARGET_CORTEX_A15:
-		if (vcpu->vcpu_id > a15_max_cpu_idx)
+		if (vcpu->vcpu_id > cortexa_max_cpu_idx)
 			return -EINVAL;
-		reset_regs = &a15_regs_reset;
+		reset_regs = &cortexa_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
-		cpu_vtimer_irq = &a15_vtimer_irq;
+		cpu_vtimer_irq = &cortexa_vtimer_irq;
 		break;
 	default:
 		return -ENODEV;

From c7a9a5f3f02ff35d7ee820e98c1b9d46ab425808 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 2 Oct 2013 14:22:28 -0700
Subject: [PATCH 0517/1185] KVM: Move gfn_to_index to x86 specific code

The gfn_to_index function relies on huge page defines which either may
not make sense on systems that don't support huge pages or are defined
in an unconvenient way for other architectures.  Since this is
x86-specific, move the function to arch/x86/include/asm/kvm_host.h.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 6d9d41e57440e32a3400f37aa05ef7a1a09ced64)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/x86/include/asm/kvm_host.h | 7 +++++++
 include/linux/kvm_host.h        | 7 -------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3741c653767c..53db582487c9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,13 @@
 #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
 #define SELECTOR_TI_MASK (1 << 2)
 #define SELECTOR_RPL_MASK 0x03
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 97e39fc02020..b5267c4f5392 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -827,13 +827,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
 	return gfn_to_memslot(kvm, gfn)->id;
 }
 
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
-	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
-	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
-		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
 static inline gfn_t
 hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
 {

From 4b0745341e63f3878fcc1240d44d1ddcab068ce1 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 2 Oct 2013 14:22:29 -0700
Subject: [PATCH 0518/1185] KVM: ARM: Get rid of KVM_HPAGE defines

The KVM_HPAGE_DEFINES are a little artificial on ARM, since the huge
page size is statically defined at compile time and there is only a
single huge page size.

Now when the main kvm code relying on these defines has been moved to
the x86 specific part of the world, we can get rid of these.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit dc6f6763dfeaf2dfec906bb78875dcea162accd9)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 76f3c1978442..8a6f6db14ee4 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -38,11 +38,6 @@
 
 #define KVM_VCPU_MAX_FEATURES 1
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x)	0
-#define KVM_NR_PAGE_SIZES	1
-#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
-
 #include <kvm/arm_vgic.h>
 
 struct kvm_vcpu;

From 1feff9299a9b7b1f137166f062772cd2044c7cbb Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 2 Oct 2013 14:22:30 -0700
Subject: [PATCH 0519/1185] KVM: arm64: Get rid of KVM_HPAGE defines

Now when the main kvm code relying on these defines has been moved to
the x86 specific part of the world, we can get rid of these.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit ef0cfe71c2b1710cd4ae747537e36c56f9a26ccf)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4cc8c7078f39..5d85a02d1231 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -36,11 +36,6 @@
 
 #define KVM_VCPU_MAX_FEATURES 2
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x)	0
-#define KVM_NR_PAGE_SIZES	1
-#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
-
 struct kvm_vcpu;
 int kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);

From 0a2e6af7befb4c0336f2c789d172bbb5438aec72 Mon Sep 17 00:00:00 2001
From: chai wen <chaiw.fnst@cn.fujitsu.com>
Date: Mon, 14 Oct 2013 22:22:33 +0800
Subject: [PATCH 0520/1185] KVM: Drop FOLL_GET in GUP when doing async page
 fault

Page pinning is not mandatory in kvm async page fault processing since
after async page fault event is delivered to a guest it accesses page once
again and does its own GUP.  Drop the FOLL_GET flag in GUP in async_pf
code, and do some simplifying in check/clear processing.

Suggested-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Gu zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: chai wen <chaiw.fnst@cn.fujitsu.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit f2e106692d5189303997ad7b96de8d8123aa5613)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/x86/kvm/x86.c         |  4 ++--
 include/linux/kvm_host.h   |  2 +-
 include/trace/events/kvm.h | 10 ++++------
 virt/kvm/async_pf.c        | 17 +++++------------
 4 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 96765c116c26..e4985fc604fe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7118,7 +7118,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	int r;
 
 	if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
-	      is_error_page(work->page))
+	      work->wakeup_all)
 		return;
 
 	r = kvm_mmu_reload(vcpu);
@@ -7228,7 +7228,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 	struct x86_exception fault;
 
 	trace_kvm_async_pf_ready(work->arch.token, work->gva);
-	if (is_error_page(work->page))
+	if (work->wakeup_all)
 		work->arch.token = ~0; /* broadcast wakeup */
 	else
 		kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b5267c4f5392..384fb0a74924 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -175,7 +175,7 @@ struct kvm_async_pf {
 	gva_t gva;
 	unsigned long addr;
 	struct kvm_arch_async_pf arch;
-	struct page *page;
+	bool   wakeup_all;
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 7005d1109ec9..131a0bda7aec 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
 
 TRACE_EVENT(
 	kvm_async_pf_completed,
-	TP_PROTO(unsigned long address, struct page *page, u64 gva),
-	TP_ARGS(address, page, gva),
+	TP_PROTO(unsigned long address, u64 gva),
+	TP_ARGS(address, gva),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, address)
-		__field(pfn_t, pfn)
 		__field(u64, gva)
 		),
 
 	TP_fast_assign(
 		__entry->address = address;
-		__entry->pfn = page ? page_to_pfn(page) : 0;
 		__entry->gva = gva;
 		),
 
-	TP_printk("gva %#llx address %#lx pfn %#llx",  __entry->gva,
-		  __entry->address, __entry->pfn)
+	TP_printk("gva %#llx address %#lx",  __entry->gva,
+		  __entry->address)
 );
 
 #endif
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index b197950ac4d5..8631d9c14320 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void async_pf_execute(struct work_struct *work)
 {
-	struct page *page = NULL;
 	struct kvm_async_pf *apf =
 		container_of(work, struct kvm_async_pf, work);
 	struct mm_struct *mm = apf->mm;
@@ -68,13 +67,12 @@ static void async_pf_execute(struct work_struct *work)
 
 	use_mm(mm);
 	down_read(&mm->mmap_sem);
-	get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
+	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
 	unuse_mm(mm);
 
 	spin_lock(&vcpu->async_pf.lock);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);
-	apf->page = page;
 	spin_unlock(&vcpu->async_pf.lock);
 
 	/*
@@ -82,7 +80,7 @@ static void async_pf_execute(struct work_struct *work)
 	 * this point
 	 */
 
-	trace_kvm_async_pf_completed(addr, page, gva);
+	trace_kvm_async_pf_completed(addr, gva);
 
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
@@ -112,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 			list_entry(vcpu->async_pf.done.next,
 				   typeof(*work), link);
 		list_del(&work->link);
-		if (!is_error_page(work->page))
-			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
 	spin_unlock(&vcpu->async_pf.lock);
@@ -133,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 		list_del(&work->link);
 		spin_unlock(&vcpu->async_pf.lock);
 
-		if (work->page)
-			kvm_arch_async_page_ready(vcpu, work);
+		kvm_arch_async_page_ready(vcpu, work);
 		kvm_arch_async_page_present(vcpu, work);
 
 		list_del(&work->queue);
 		vcpu->async_pf.queued--;
-		if (!is_error_page(work->page))
-			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
 }
@@ -163,7 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 	if (!work)
 		return 0;
 
-	work->page = NULL;
+	work->wakeup_all = false;
 	work->vcpu = vcpu;
 	work->gva = gva;
 	work->addr = gfn_to_hva(vcpu->kvm, gfn);
@@ -203,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
 	if (!work)
 		return -ENOMEM;
 
-	work->page = KVM_ERR_PTR_BAD_PAGE;
+	work->wakeup_all = true;
 	INIT_LIST_HEAD(&work->queue); /* for list_del to work */
 
 	spin_lock(&vcpu->async_pf.lock);

From 841372680792857d33c4c49d21be8a3cbde0490b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:18:00 +0530
Subject: [PATCH 0521/1185] kvm: Add struct kvm arg to memslot APIs

We will use that in the later patch to find the kvm ops handler

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
(cherry picked from commit 5587027ce9d59a57aecaa190be1c8e560aaff45d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c                 |  5 +++--
 arch/ia64/kvm/kvm-ia64.c           |  5 +++--
 arch/mips/kvm/kvm_mips.c           |  5 +++--
 arch/powerpc/include/asm/kvm_ppc.h |  6 ++++--
 arch/powerpc/kvm/booke.c           |  4 ++--
 arch/powerpc/kvm/powerpc.c         |  9 +++++----
 arch/s390/kvm/kvm-s390.c           |  5 +++--
 arch/x86/kvm/x86.c                 |  5 +++--
 include/linux/kvm_host.h           |  5 +++--
 virt/kvm/kvm_main.c                | 12 ++++++------
 10 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index cc5adb9349ef..e312e4a53f8d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bdfd8789b376..985bf80c622e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index a7b044536de4..73b34827826c 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 	return -ENOIOCTLCMD;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe03d77..e2dd05c81bc6 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -143,9 +143,11 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
 extern void kvm_release_hpt(struct kvmppc_linear_info *li);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
-extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+extern void kvmppc_core_free_memslot(struct kvm *kvm,
+				     struct kvm_memory_slot *free,
 				     struct kvm_memory_slot *dont);
-extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+extern int kvmppc_core_create_memslot(struct kvm *kvm,
+				      struct kvm_memory_slot *slot,
 				      unsigned long npages);
 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1a1b51189773..0a91f47e264b 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1592,12 +1592,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return -ENOTSUPP;
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
 	return 0;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ae63ae4a1a5f..750835a4ef70 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -409,15 +409,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
-	kvmppc_core_free_memslot(free, dont);
+	kvmppc_core_free_memslot(kvm, free, dont);
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
-	return kvmppc_core_create_memslot(slot, npages);
+	return kvmppc_core_create_memslot(kvm, slot, npages);
 }
 
 void kvm_arch_memslots_updated(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e515b2d4a947..54612d0e79dd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -971,12 +971,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e4985fc604fe..68b139fe0dbd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6897,7 +6897,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 	int i;
@@ -6918,7 +6918,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 	}
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	int i;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 384fb0a74924..4de0a8fedf3f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -493,9 +493,10 @@ int kvm_set_memory_region(struct kvm *kvm,
 			  struct kvm_userspace_memory_region *mem);
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem);
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages);
 void kvm_arch_memslots_updated(struct kvm *kvm);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c5bc5aef11f5..c777a6e582f0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -541,13 +541,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 /*
  * Free any memory in @free but not in @dont.
  */
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		kvm_destroy_dirty_bitmap(free);
 
-	kvm_arch_free_memslot(free, dont);
+	kvm_arch_free_memslot(kvm, free, dont);
 
 	free->npages = 0;
 }
@@ -558,7 +558,7 @@ void kvm_free_physmem(struct kvm *kvm)
 	struct kvm_memory_slot *memslot;
 
 	kvm_for_each_memslot(memslot, slots)
-		kvm_free_physmem_slot(memslot, NULL);
+		kvm_free_physmem_slot(kvm, memslot, NULL);
 
 	kfree(kvm->memslots);
 }
@@ -822,7 +822,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (change == KVM_MR_CREATE) {
 		new.userspace_addr = mem->userspace_addr;
 
-		if (kvm_arch_create_memslot(&new, npages))
+		if (kvm_arch_create_memslot(kvm, &new, npages))
 			goto out_free;
 	}
 
@@ -898,7 +898,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
-	kvm_free_physmem_slot(&old, &new);
+	kvm_free_physmem_slot(kvm, &old, &new);
 	kfree(old_memslots);
 
 	return 0;
@@ -906,7 +906,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 out_slots:
 	kfree(slots);
 out_free:
-	kvm_free_physmem_slot(&new, &old);
+	kvm_free_physmem_slot(kvm, &new, &old);
 out:
 	return r;
 }

From 46a037fd36656b8283c282319876dd8e9c4f1087 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Oct 2013 18:38:13 +0100
Subject: [PATCH 0522/1185] ARM: KVM: Yield CPU when vcpu executes a WFE

On an (even slightly) oversubscribed system, spinlocks are quickly
becoming a bottleneck, as some vcpus are spinning, waiting for a
lock to be released, while the vcpu holding the lock may not be
running at all.

This creates contention, and the observed slowdown is 40x for
hackbench. No, this isn't a typo.

The solution is to trap blocking WFEs and tell KVM that we're
now spinning. This ensures that other vpus will get a scheduling
boost, allowing the lock to be released more quickly. Also, using
CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT slightly improves the performance
when the VM is severely overcommited.

Quick test to estimate the performance: hackbench 1 process 1000

2xA15 host (baseline):	1.843s

2xA15 guest w/o patch:	2.083s
4xA15 guest w/o patch:	80.212s
8xA15 guest w/o patch:	Could not be bothered to find out

2xA15 guest w/ patch:	2.102s
4xA15 guest w/ patch:	3.205s
8xA15 guest w/ patch:	6.887s

So we go from a 40x degradation to 1.5x in the 2x overcommit case,
which is vaguely more acceptable.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 58d5ec8f8ee318b26b29207874fbaee626973952)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h | 4 +++-
 arch/arm/kvm/Kconfig           | 1 +
 arch/arm/kvm/handle_exit.c     | 6 +++++-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index d556f03bca17..fe395b7b1ce2 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -67,7 +67,7 @@
  */
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
 			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
-			HCR_SWIO | HCR_TIDCP)
+			HCR_TWE | HCR_SWIO | HCR_TIDCP)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
 /* System Control Register (SCTLR) bits */
@@ -208,6 +208,8 @@
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT_HYP	(0x25)
 
+#define HSR_WFI_IS_WFE		(1U << 0)
+
 #define HSR_HVC_IMM_MASK	((1UL << 16) - 1)
 
 #define HSR_DABT_S1PTW		(1U << 7)
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index ebf5015508b5..466bd299b1a8 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
 	depends on ARM_VIRT_EXT && ARM_LPAE
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index df4c82d47ad7..c4c496f7619c 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -84,7 +84,11 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	trace_kvm_wfi(*vcpu_pc(vcpu));
-	kvm_vcpu_block(vcpu);
+	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
+		kvm_vcpu_on_spin(vcpu);
+	else
+		kvm_vcpu_block(vcpu);
+
 	return 1;
 }
 

From 1391c263d202d6ce8b74f361cd3607c69ae26e77 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 15 Oct 2013 18:10:42 -0700
Subject: [PATCH 0523/1185] KVM: ARM: Update comments for kvm_handle_wfi

Update comments to reflect what is really going on and add the TWE bit
to the comments in kvm_arm.h.

Also renames the function to kvm_handle_wfx like is done on arm64 for
consistency and uber-correctness.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 86ed81aa2e1ce05a4e7f0819f0dfc34e8d8fb910)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h |  1 +
 arch/arm/kvm/handle_exit.c     | 14 ++++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index fe395b7b1ce2..1d3153c7eb41 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -57,6 +57,7 @@
  * TSC:		Trap SMC
  * TSW:		Trap cache operations by set/way
  * TWI:		Trap WFI
+ * TWE:		Trap WFE
  * TIDCP:	Trap L2CTLR/L2ECTLR
  * BSU_IS:	Upgrade barriers to the inner shareable domain
  * FB:		Force broadcast of all maintainance operations
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index c4c496f7619c..a92079011a83 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -73,15 +73,17 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 }
 
 /**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests
  * @vcpu:	the vcpu pointer
  * @run:	the kvm_run structure pointer
  *
- * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
- * halt execution of world-switches and schedule other host processes until
- * there is an incoming IRQ or FIQ to the VM.
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
  */
-static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	trace_kvm_wfi(*vcpu_pc(vcpu));
 	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
@@ -93,7 +95,7 @@ static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
 }
 
 static exit_handle_fn arm_exit_handlers[] = {
-	[HSR_EC_WFI]		= kvm_handle_wfi,
+	[HSR_EC_WFI]		= kvm_handle_wfx,
 	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
 	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
 	[HSR_EC_CP14_MR]	= kvm_handle_cp14_access,

From cd709bb9fb35e687107761d5feed331d4c2c48e5 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 1 Nov 2012 17:14:45 +0100
Subject: [PATCH 0524/1185] KVM: ARM: Support hugetlbfs backed huge pages

Support huge pages in KVM/ARM and KVM/ARM64.  The pud_huge checking on
the unmap path may feel a bit silly as the pud_huge check is always
defined to false, but the compiler should be smart about this.

Note: This deals only with VMAs marked as huge which are allocated by
users through hugetlbfs only.  Transparent huge pages can only be
detected by looking at the underlying pages (or the page tables
themselves) and this patch so far simply maps these on a page-by-page
level in the Stage-2 page tables.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit ad361f093c1e31d0b43946210a32ab4ff5c49850)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h         |  17 ++-
 arch/arm/include/asm/pgtable-3level.h  |   2 +
 arch/arm/kvm/mmu.c                     | 169 +++++++++++++++++++------
 arch/arm64/include/asm/kvm_mmu.h       |  12 +-
 arch/arm64/include/asm/pgtable-hwdef.h |   2 +
 5 files changed, 158 insertions(+), 44 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 9b28c41f4ba9..77de4a41cc50 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -62,6 +62,12 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
+static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
+{
+	*pmd = new_pmd;
+	flush_pmd_entry(pmd);
+}
+
 static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 {
 	*pte = new_pte;
@@ -103,9 +109,15 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
 	pte_val(*pte) |= L_PTE_S2_RDWR;
 }
 
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+	pmd_val(*pmd) |= L_PMD_S2_RDWR;
+}
+
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
+					      unsigned long size)
 {
 	/*
 	 * If we are going to insert an instruction page and the icache is
@@ -120,8 +132,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
 	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
 	 */
 	if (icache_is_pipt()) {
-		unsigned long hva = gfn_to_hva(kvm, gfn);
-		__cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
+		__cpuc_coherent_user_range(hva, hva + size);
 	} else if (!icache_is_vivt_asid_tagged()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 86b8fe398b95..ad52938bb264 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -113,6 +113,8 @@
 #define L_PTE_S2_RDONLY		 (_AT(pteval_t, 1) << 6)   /* HAP[1]   */
 #define L_PTE_S2_RDWR		 (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 
+#define L_PMD_S2_RDWR		 (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+
 /*
  * Hyp-mode PL2 PTE definitions for LPAE.
  */
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index b0de86b56c13..745d8b1630cc 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
 #include <linux/mman.h>
 #include <linux/kvm_host.h>
 #include <linux/io.h>
+#include <linux/hugetlb.h>
 #include <trace/events/kvm.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
@@ -41,6 +42,8 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
+#define kvm_pmd_huge(_x)	(pmd_huge(_x))
+
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
 	/*
@@ -93,19 +96,29 @@ static bool page_empty(void *ptr)
 
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
-	pmd_t *pmd_table = pmd_offset(pud, 0);
-	pud_clear(pud);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	pmd_free(NULL, pmd_table);
+	if (pud_huge(*pud)) {
+		pud_clear(pud);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+	} else {
+		pmd_t *pmd_table = pmd_offset(pud, 0);
+		pud_clear(pud);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		pmd_free(NULL, pmd_table);
+	}
 	put_page(virt_to_page(pud));
 }
 
 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
-	pte_t *pte_table = pte_offset_kernel(pmd, 0);
-	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	pte_free_kernel(NULL, pte_table);
+	if (kvm_pmd_huge(*pmd)) {
+		pmd_clear(pmd);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+	} else {
+		pte_t *pte_table = pte_offset_kernel(pmd, 0);
+		pmd_clear(pmd);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		pte_free_kernel(NULL, pte_table);
+	}
 	put_page(virt_to_page(pmd));
 }
 
@@ -136,18 +149,32 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 			continue;
 		}
 
+		if (pud_huge(*pud)) {
+			/*
+			 * If we are dealing with a huge pud, just clear it and
+			 * move on.
+			 */
+			clear_pud_entry(kvm, pud, addr);
+			addr = pud_addr_end(addr, end);
+			continue;
+		}
+
 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd)) {
 			addr = pmd_addr_end(addr, end);
 			continue;
 		}
 
-		pte = pte_offset_kernel(pmd, addr);
-		clear_pte_entry(kvm, pte, addr);
-		next = addr + PAGE_SIZE;
+		if (!kvm_pmd_huge(*pmd)) {
+			pte = pte_offset_kernel(pmd, addr);
+			clear_pte_entry(kvm, pte, addr);
+			next = addr + PAGE_SIZE;
+		}
 
-		/* If we emptied the pte, walk back up the ladder */
-		if (page_empty(pte)) {
+		/*
+		 * If the pmd entry is to be cleared, walk back up the ladder
+		 */
+		if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
 			clear_pmd_entry(kvm, pmd, addr);
 			next = pmd_addr_end(addr, end);
 			if (page_empty(pmd) && !page_empty(pud)) {
@@ -420,29 +447,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	kvm->arch.pgd = NULL;
 }
 
-
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
-			  phys_addr_t addr, const pte_t *new_pte, bool iomap)
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			     phys_addr_t addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte, old_pte;
 
-	/* Create 2nd stage page table mapping - Level 1 */
 	pgd = kvm->arch.pgd + pgd_index(addr);
 	pud = pud_offset(pgd, addr);
 	if (pud_none(*pud)) {
 		if (!cache)
-			return 0; /* ignore calls from kvm_set_spte_hva */
+			return NULL;
 		pmd = mmu_memory_cache_alloc(cache);
 		pud_populate(NULL, pud, pmd);
 		get_page(virt_to_page(pud));
 	}
 
-	pmd = pmd_offset(pud, addr);
+	return pmd_offset(pud, addr);
+}
 
-	/* Create 2nd stage page table mapping - Level 2 */
+static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+			       *cache, phys_addr_t addr, const pmd_t *new_pmd)
+{
+	pmd_t *pmd, old_pmd;
+
+	pmd = stage2_get_pmd(kvm, cache, addr);
+	VM_BUG_ON(!pmd);
+
+	/*
+	 * Mapping in huge pages should only happen through a fault.  If a
+	 * page is merged into a transparent huge page, the individual
+	 * subpages of that huge page should be unmapped through MMU
+	 * notifiers before we get here.
+	 *
+	 * Merging of CompoundPages is not supported; they should become
+	 * splitting first, unmapped, merged, and mapped back in on-demand.
+	 */
+	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
+
+	old_pmd = *pmd;
+	kvm_set_pmd(pmd, *new_pmd);
+	if (pmd_present(old_pmd))
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+	else
+		get_page(virt_to_page(pmd));
+	return 0;
+}
+
+static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			  phys_addr_t addr, const pte_t *new_pte, bool iomap)
+{
+	pmd_t *pmd;
+	pte_t *pte, old_pte;
+
+	/* Create stage-2 page table mapping - Level 1 */
+	pmd = stage2_get_pmd(kvm, cache, addr);
+	if (!pmd) {
+		/*
+		 * Ignore calls from kvm_set_spte_hva for unallocated
+		 * address ranges.
+		 */
+		return 0;
+	}
+
+	/* Create stage-2 page mappings - Level 2 */
 	if (pmd_none(*pmd)) {
 		if (!cache)
 			return 0; /* ignore calls from kvm_set_spte_hva */
@@ -508,15 +577,18 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  gfn_t gfn, struct kvm_memory_slot *memslot,
+			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
 {
-	pte_t new_pte;
-	pfn_t pfn;
 	int ret;
-	bool write_fault, writable;
+	bool write_fault, writable, hugetlb = false;
 	unsigned long mmu_seq;
+	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+	unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
+	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+	struct vm_area_struct *vma;
+	pfn_t pfn;
 
 	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -524,6 +596,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return -EFAULT;
 	}
 
+	/* Let's check if we will get back a huge page backed by hugetlbfs */
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma_intersection(current->mm, hva, hva + 1);
+	if (is_vm_hugetlb_page(vma)) {
+		hugetlb = true;
+		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+	}
+	up_read(&current->mm->mmap_sem);
+
 	/* We need minimum second+third level pages */
 	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
 	if (ret)
@@ -541,26 +622,38 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 */
 	smp_rmb();
 
-	pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
+	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
 	if (is_error_pfn(pfn))
 		return -EFAULT;
 
-	new_pte = pfn_pte(pfn, PAGE_S2);
-	coherent_icache_guest_page(vcpu->kvm, gfn);
-
-	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+	spin_lock(&kvm->mmu_lock);
+	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
-	if (writable) {
-		kvm_set_s2pte_writable(&new_pte);
-		kvm_set_pfn_dirty(pfn);
+
+	if (hugetlb) {
+		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
+		new_pmd = pmd_mkhuge(new_pmd);
+		if (writable) {
+			kvm_set_s2pmd_writable(&new_pmd);
+			kvm_set_pfn_dirty(pfn);
+		}
+		coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
+		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+	} else {
+		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
+		if (writable) {
+			kvm_set_s2pte_writable(&new_pte);
+			kvm_set_pfn_dirty(pfn);
+		}
+		coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
+		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
 	}
-	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
+
 
 out_unlock:
-	spin_unlock(&vcpu->kvm->mmu_lock);
+	spin_unlock(&kvm->mmu_lock);
 	kvm_release_pfn_clean(pfn);
-	return 0;
+	return ret;
 }
 
 /**
@@ -629,7 +722,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 
-	ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index efe609c6a3c9..680f74e67497 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -91,6 +91,7 @@ int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
 #define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
+#define	kvm_set_pmd(pmdp, pmd)		set_pmd(pmdp, pmd)
 
 static inline bool kvm_is_write_fault(unsigned long esr)
 {
@@ -116,13 +117,18 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
 	pte_val(*pte) |= PTE_S2_RDWR;
 }
 
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+	pmd_val(*pmd) |= PMD_S2_RDWR;
+}
+
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
+					      unsigned long size)
 {
 	if (!icache_is_aliasing()) {		/* PIPT */
-		unsigned long hva = gfn_to_hva(kvm, gfn);
-		flush_icache_range(hva, hva + PAGE_SIZE);
+		flush_icache_range(hva, hva + size);
 	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
 		/* any kind of VIPT cache */
 		__flush_icache_all();
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 2e9d83673ef6..b1d2e26c3c88 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -85,6 +85,8 @@
 #define PTE_S2_RDONLY		(_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
 #define PTE_S2_RDWR		(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 
+#define PMD_S2_RDWR		(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+
 /*
  * Memory Attribute override for Stage-2 (MemAttr[3:0])
  */

From ad8b3ca795b8534c292e8916ffdfc36b4bd09940 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 2 Oct 2013 15:32:01 -0700
Subject: [PATCH 0525/1185] KVM: ARM: Transparent huge page (THP) support

Support transparent huge pages in KVM/ARM and KVM/ARM64.  The
transparent_hugepage_adjust is not very pretty, but this is also how
it's solved on x86 and seems to be simply an artifact on how THPs
behave.  This should eventually be shared across architectures if
possible, but that can always be changed down the road.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 9b5fdb9781f74fb15827e465bfb5aa63211953c8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 58 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 745d8b1630cc..371958370de4 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
-#define kvm_pmd_huge(_x)	(pmd_huge(_x))
+#define kvm_pmd_huge(_x)	(pmd_huge(_x) || pmd_trans_huge(_x))
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
@@ -576,12 +576,53 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 	return ret;
 }
 
+static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
+{
+	pfn_t pfn = *pfnp;
+	gfn_t gfn = *ipap >> PAGE_SHIFT;
+
+	if (PageTransCompound(pfn_to_page(pfn))) {
+		unsigned long mask;
+		/*
+		 * The address we faulted on is backed by a transparent huge
+		 * page.  However, because we map the compound huge page and
+		 * not the individual tail page, we need to transfer the
+		 * refcount to the head page.  We have to be careful that the
+		 * THP doesn't start to split while we are adjusting the
+		 * refcounts.
+		 *
+		 * We are sure this doesn't happen, because mmu_notifier_retry
+		 * was successful and we are holding the mmu_lock, so if this
+		 * THP is trying to split, it will be blocked in the mmu
+		 * notifier before touching any of the pages, specifically
+		 * before being able to call __split_huge_page_refcount().
+		 *
+		 * We can therefore safely transfer the refcount from PG_tail
+		 * to PG_head and switch the pfn from a tail page to the head
+		 * page accordingly.
+		 */
+		mask = PTRS_PER_PMD - 1;
+		VM_BUG_ON((gfn & mask) != (pfn & mask));
+		if (pfn & mask) {
+			*ipap &= PMD_MASK;
+			kvm_release_pfn_clean(pfn);
+			pfn &= ~mask;
+			kvm_get_pfn(pfn);
+			*pfnp = pfn;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
 {
 	int ret;
-	bool write_fault, writable, hugetlb = false;
+	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
 	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
 	unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
@@ -602,6 +643,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (is_vm_hugetlb_page(vma)) {
 		hugetlb = true;
 		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+	} else {
+		/*
+		 * Pages belonging to VMAs not aligned to the PMD mapping
+		 * granularity cannot be mapped using block descriptors even
+		 * if the pages belong to a THP for the process, because the
+		 * stage-2 block descriptor will cover more than a single THP
+		 * and we loose atomicity for unmapping, updates, and splits
+		 * of the THP or other pages in the stage-2 block range.
+		 */
+		if (vma->vm_start & ~PMD_MASK)
+			force_pte = true;
 	}
 	up_read(&current->mm->mmap_sem);
 
@@ -629,6 +681,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	spin_lock(&kvm->mmu_lock);
 	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
+	if (!hugetlb && !force_pte)
+		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);

From c0acda6fd7c83ca3cc8c5892247ece8c9651c9f3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 18 Oct 2013 18:19:04 +0100
Subject: [PATCH 0526/1185] ARM: KVM: Fix MPIDR computing to support virtual
 clusters

In order to be able to support more than 4 A7 or A15 CPUs,
we need to fix the MPIDR computing to reflect the fact that
both A15 and A7 can only exist in clusters of at most 4 CPUs.

Fix the MPIDR computing to allow virtual clusters to be exposed
to the guest.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 2d1d841bd44e24b58a3d3cc4fa793670aaa38fbf)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index a629f2c1d0f9..631e6bd0e05f 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -74,11 +74,13 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
 {
 	/*
-	 * Compute guest MPIDR. No need to mess around with different clusters
-	 * but we read the 'U' bit from the underlying hardware directly.
+	 * Compute guest MPIDR. We build a virtual cluster out of the
+	 * vcpu_id, but we read the 'U' bit from the underlying
+	 * hardware directly.
 	 */
-	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK)
-					| vcpu->vcpu_id;
+	vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
+				     ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
+				     (vcpu->vcpu_id & 3));
 }
 
 /* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */

From 8c02aa50010946b98e5077e5e05b5d99e6286ab1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 18 Oct 2013 18:19:05 +0100
Subject: [PATCH 0527/1185] ARM: KVM: fix L2CTLR to be per-cluster

The L2CTLR register contains the number of CPUs in this cluster.

Make sure the register content is actually relevant to the vcpu
that is being configured by computing the number of cores that are
part of its cluster.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 9cbb6d969cb6561de45d917b8bb9281cb374bb35)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 631e6bd0e05f..78c0885d6501 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -124,6 +124,10 @@ static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
 	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
 	l2ctlr &= ~(3 << 24);
 	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+	/* How many cores in the current cluster and the next ones */
+	ncores -= (vcpu->vcpu_id & ~3);
+	/* Cap it to the maximum number of cores in a single cluster */
+	ncores = min(ncores, 3U);
 	l2ctlr |= (ncores & 3) << 24;
 
 	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;

From c93a79267ff11590dd7835c6622bd9f29ef43e73 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 18 Oct 2013 18:19:06 +0100
Subject: [PATCH 0528/1185] ARM: KVM: drop limitation to 4 CPU VMs

Now that the KVM/arm code knows about affinity, remove the hard
limit of 4 vcpus per VM.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 7999b4d18211bcfb40e3574cf75e94518e9fa2c6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/reset.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index d153e64d1255..f558c073c023 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -33,8 +33,6 @@
  * Cortex-A15 and Cortex-A7 Reset Values
  */
 
-static const int cortexa_max_cpu_idx = 3;
-
 static struct kvm_regs cortexa_regs_reset = {
 	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
@@ -64,8 +62,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	switch (vcpu->arch.target) {
 	case KVM_ARM_TARGET_CORTEX_A7:
 	case KVM_ARM_TARGET_CORTEX_A15:
-		if (vcpu->vcpu_id > cortexa_max_cpu_idx)
-			return -EINVAL;
 		reset_regs = &cortexa_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
 		cpu_vtimer_irq = &cortexa_vtimer_irq;

From 3d6b7ab3028ceacadd0a29b4757a788e24987d10 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 18 Oct 2013 18:19:03 +0100
Subject: [PATCH 0529/1185] arm/arm64: KVM: PSCI: use MPIDR to identify a
 target CPU

The KVM PSCI code blindly assumes that vcpu_id and MPIDR are
the same thing. This is true when vcpus are organized as a flat
topology, but is wrong when trying to emulate any other topology
(such as A15 clusters).

Change the KVM PSCI CPU_ON code to look at the MPIDR instead
of the vcpu_id to pick a target CPU.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 79c648806f9034abf54332b78043bb242189d953)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   |  5 +++++
 arch/arm/kvm/psci.c                  | 17 +++++++++++++----
 arch/arm64/include/asm/kvm_emulate.h |  5 +++++
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index a464e8d7b6c5..708e4d8a647f 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -157,4 +157,9 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
 }
 
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.cp15[c0_MPIDR];
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 86a693a02ba3..311263124acf 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -18,6 +18,7 @@
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
+#include <asm/cputype.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_psci.h>
 
@@ -34,22 +35,30 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 {
 	struct kvm *kvm = source_vcpu->kvm;
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu = NULL, *tmp;
 	wait_queue_head_t *wq;
 	unsigned long cpu_id;
+	unsigned long mpidr;
 	phys_addr_t target_pc;
+	int i;
 
 	cpu_id = *vcpu_reg(source_vcpu, 1);
 	if (vcpu_mode_is_32bit(source_vcpu))
 		cpu_id &= ~((u32) 0);
 
-	if (cpu_id >= atomic_read(&kvm->online_vcpus))
+	kvm_for_each_vcpu(i, tmp, kvm) {
+		mpidr = kvm_vcpu_get_mpidr(tmp);
+		if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
+			vcpu = tmp;
+			break;
+		}
+	}
+
+	if (!vcpu)
 		return KVM_PSCI_RET_INVAL;
 
 	target_pc = *vcpu_reg(source_vcpu, 2);
 
-	vcpu = kvm_get_vcpu(kvm, cpu_id);
-
 	wq = kvm_arch_vcpu_wq(vcpu);
 	if (!waitqueue_active(wq))
 		return KVM_PSCI_RET_INVAL;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index eec073875218..6df93cdc652b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -177,4 +177,9 @@ static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
 }
 
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+	return vcpu_sys_reg(vcpu, MPIDR_EL1);
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */

From b695de83412eb8ff7c77b215fa24bca5b1871618 Mon Sep 17 00:00:00 2001
From: Yang Zhang <yang.z.zhang@Intel.com>
Date: Thu, 24 Oct 2013 09:56:39 +0800
Subject: [PATCH 0530/1185] KVM: Mapping IOMMU pages after updating memslot

In kvm_iommu_map_pages(), we need to know the page size via call
kvm_host_page_size(). And it will check whether the target slot
is valid before return the right page size.
Currently, we will map the iommu pages when creating a new slot.
But we call kvm_iommu_map_pages() during preparing the new slot.
At that time, the new slot is not visible by domain(still in preparing).
So we cannot get the right page size from kvm_host_page_size() and
this will break the IOMMU super page logic.
The solution is to map the iommu pages after we insert the new slot
into domain.

Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Tested-by: Patrick Lu <patrick.lu@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e0230e1327fb862c9b6cde24ae62d55f9db62c9b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c777a6e582f0..340c97cf55b6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -873,21 +873,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 			goto out_free;
 	}
 
-	/*
-	 * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
-	 * un-mapped and re-mapped if their base changes.  Since base change
-	 * unmapping is handled above with slot deletion, mapping alone is
-	 * needed here.  Anything else the iommu might care about for existing
-	 * slots (size changes, userspace addr changes and read-only flag
-	 * changes) is disallowed above, so any other attribute changes getting
-	 * here can be skipped.
-	 */
-	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
-		r = kvm_iommu_map_pages(kvm, &new);
-		if (r)
-			goto out_slots;
-	}
-
 	/* actual memory is freed via old in kvm_free_physmem_slot below */
 	if (change == KVM_MR_DELETE) {
 		new.dirty_bitmap = NULL;
@@ -901,6 +886,20 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	kvm_free_physmem_slot(kvm, &old, &new);
 	kfree(old_memslots);
 
+	/*
+	 * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
+	 * un-mapped and re-mapped if their base changes.  Since base change
+	 * unmapping is handled above with slot deletion, mapping alone is
+	 * needed here.  Anything else the iommu might care about for existing
+	 * slots (size changes, userspace addr changes and read-only flag
+	 * changes) is disallowed above, so any other attribute changes getting
+	 * here can be skipped.
+	 */
+	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+		r = kvm_iommu_map_pages(kvm, &new);
+		return r;
+	}
+
 	return 0;
 
 out_slots:

From c0cdef185a6b5c8f3c5e6b923e95774c36764011 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 2 Aug 2013 11:41:13 +0100
Subject: [PATCH 0531/1185] arm64: KVM: Yield CPU when vcpu executes a WFE

On an (even slightly) oversubscribed system, spinlocks are quickly
becoming a bottleneck, as some vcpus are spinning, waiting for a
lock to be released, while the vcpu holding the lock may not be
running at all.

The solution is to trap blocking WFEs and tell KVM that we're
now spinning. This ensures that other vpus will get a scheduling
boost, allowing the lock to be released more quickly. Also, using
CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT slightly improves the performance
when the VM is severely overcommited.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit d241aac798eb042e605f78c31a4122e583b2cd13)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h |  8 ++++++--
 arch/arm64/kvm/Kconfig           |  1 +
 arch/arm64/kvm/handle_exit.c     | 18 +++++++++++++-----
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index a5f28e2720c7..c98ef4771c73 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -63,6 +63,7 @@
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
  * TSW:		Trap cache operations by set/way
+ * TWE:		Trap WFE
  * TWI:		Trap WFI
  * TIDCP:	Trap L2CTLR/L2ECTLR
  * BSU_IS:	Upgrade barriers to the inner shareable domain
@@ -72,8 +73,9 @@
  * FMO:		Override CPSR.F and enable signaling with VF
  * SWIO:	Turn set/way invalidates into set/way clean+invalidate
  */
-#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
-			 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
+#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
+			 HCR_BSU_IS | HCR_FB | HCR_TAC | \
+			 HCR_AMO | HCR_IMO | HCR_FMO | \
 			 HCR_SWIO | HCR_TIDCP | HCR_RW)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
@@ -242,4 +244,6 @@
 
 #define ESR_EL2_EC_xABT_xFSR_EXTABT	0x10
 
+#define ESR_EL2_EC_WFI_ISS_WFE	(1 << 0)
+
 #endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 21e90820bd23..4480ab339a00 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
 	select KVM_ARM_VGIC
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 9beaca033437..8da56067c304 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -47,21 +47,29 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 }
 
 /**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
+ *		    instruction executed by a guest
+ *
  * @vcpu:	the vcpu pointer
  *
- * Simply call kvm_vcpu_block(), which will halt execution of
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
  * world-switches and schedule other host processes until there is an
  * incoming IRQ or FIQ to the VM.
  */
-static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	kvm_vcpu_block(vcpu);
+	if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE)
+		kvm_vcpu_on_spin(vcpu);
+	else
+		kvm_vcpu_block(vcpu);
+
 	return 1;
 }
 
 static exit_handle_fn arm_exit_handlers[] = {
-	[ESR_EL2_EC_WFI]	= kvm_handle_wfi,
+	[ESR_EL2_EC_WFI]	= kvm_handle_wfx,
 	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
 	[ESR_EL2_EC_CP15_64]	= kvm_handle_cp15_64,
 	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_access,

From e845f9d367f2ac8197a142fd29d1baa8fcc4dd75 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 30 Oct 2013 12:12:13 +0100
Subject: [PATCH 0532/1185] KVM: use a more sensible error number when debugfs
 directory creation fails

I don't know if this was due to cut and paste, or somebody was really
using a D20 to pick the error code for kvm_init_debugfs as suggested by
Linus (EFAULT is 14, so the possibility cannot be entirely ruled out).

In any case, this patch fixes it.

Reported-by: Tim Gardner <tim.gardner@canonical.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0c8eb04a6241da28deb108181213b791c378123b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 340c97cf55b6..d6b7d797cb16 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3010,7 +3010,7 @@ static const struct file_operations *stat_fops[] = {
 
 static int kvm_init_debug(void)
 {
-	int r = -EFAULT;
+	int r = -EEXIST;
 	struct kvm_stats_debugfs_item *p;
 
 	kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);

From b4fe3057a0c543ed42aa2a69a6ab9a5312571c34 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 22 Sep 2013 16:44:50 +0200
Subject: [PATCH 0533/1185] kvm: Add KVM_GET_EMULATED_CPUID

Add a kvm ioctl which states which system functionality kvm emulates.
The format used is that of CPUID and we return the corresponding CPUID
bits set for which we do emulate functionality.

Make sure ->padding is being passed on clean from userspace so that we
can use it for something in the future, after the ioctl gets cast in
stone.

s/kvm_dev_ioctl_get_supported_cpuid/kvm_dev_ioctl_get_cpuid/ while at
it.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 9c15bb1d0a8411f9bb3395d21d5309bde7da0c1c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt | 77 +++++++++++++++++++++++++++++--
 arch/x86/include/uapi/asm/kvm.h   |  6 +--
 arch/x86/kvm/cpuid.c              | 57 ++++++++++++++++++++---
 arch/x86/kvm/cpuid.h              |  5 +-
 arch/x86/kvm/x86.c                |  9 ++--
 include/uapi/linux/kvm.h          |  2 +
 6 files changed, 139 insertions(+), 17 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 9bfadeb8be31..d196ebe8956e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1122,9 +1122,9 @@ struct kvm_cpuid2 {
 	struct kvm_cpuid_entry2 entries[0];
 };
 
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
 
 struct kvm_cpuid_entry2 {
 	__u32 function;
@@ -2661,6 +2661,77 @@ and usually define the validity of a groups of registers. (e.g. one bit
 };
 
 
+4.81 KVM_GET_EMULATED_CPUID
+
+Capability: KVM_CAP_EXT_EMUL_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+	__u32 nent;
+	__u32 flags;
+	struct kvm_cpuid_entry2 entries[0];
+};
+
+The member 'flags' is used for passing flags from userspace.
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
+
+struct kvm_cpuid_entry2 {
+	__u32 function;
+	__u32 index;
+	__u32 flags;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are emulated by
+kvm.Userspace can use the information returned by this ioctl to query
+which features are emulated by kvm instead of being present natively.
+
+Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
+structure with the 'nent' field indicating the number of entries in
+the variable-size array 'entries'. If the number of entries is too low
+to describe the cpu capabilities, an error (E2BIG) is returned. If the
+number is too high, the 'nent' field is adjusted and an error (ENOMEM)
+is returned. If the number is just right, the 'nent' field is adjusted
+to the number of valid entries in the 'entries' array, which is then
+filled.
+
+The entries returned are the set CPUID bits of the respective features
+which kvm emulates, as returned by the CPUID instruction, with unknown
+or unsupported feature bits cleared.
+
+Features like x2apic, for example, may not be present in the host cpu
+but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
+emulated efficiently and thus not included here.
+
+The fields in each entry are defined as follows:
+
+  function: the eax value used to obtain the entry
+  index: the ecx value used to obtain the entry (for entries that are
+         affected by ecx)
+  flags: an OR of zero or more of the following:
+        KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+           if the index field is valid
+        KVM_CPUID_FLAG_STATEFUL_FUNC:
+           if cpuid for this function returns different values for successive
+           invocations; there will be several entries with the same function,
+           all with this flag set
+        KVM_CPUID_FLAG_STATE_READ_NEXT:
+           for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+           the first entry to be read by a cpu
+   eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+         this function/index combination
+
+
 6. Capabilities that can be enabled
 -----------------------------------
 
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5d9a3033b3d7..d3a87780c70b 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 {
 	__u32 padding[3];
 };
 
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
 
 /* for KVM_SET_CPUID2 */
 struct kvm_cpuid2 {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a20ecb5b6cbf..89d288237b9c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -187,8 +187,14 @@ static bool supported_xcr0_bit(unsigned bit)
 
 #define F(x) bit(X86_FEATURE_##x)
 
-static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
-			 u32 index, int *nent, int maxnent)
+static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
+				   u32 func, u32 index, int *nent, int maxnent)
+{
+	return 0;
+}
+
+static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+				 u32 index, int *nent, int maxnent)
 {
 	int r;
 	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@@ -480,6 +486,15 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	return r;
 }
 
+static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
+			u32 idx, int *nent, int maxnent, unsigned int type)
+{
+	if (type == KVM_GET_EMULATED_CPUID)
+		return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
+
+	return __do_cpuid_ent(entry, func, idx, nent, maxnent);
+}
+
 #undef F
 
 struct kvm_cpuid_param {
@@ -494,8 +509,34 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
 	return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
 }
 
-int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-				      struct kvm_cpuid_entry2 __user *entries)
+static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
+				 __u32 num_entries, unsigned int ioctl_type)
+{
+	int i;
+
+	if (ioctl_type != KVM_GET_EMULATED_CPUID)
+		return false;
+
+	/*
+	 * We want to make sure that ->padding is being passed clean from
+	 * userspace in case we want to use it for something in the future.
+	 *
+	 * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
+	 * have to give ourselves satisfied only with the emulated side. /me
+	 * sheds a tear.
+	 */
+	for (i = 0; i < num_entries; i++) {
+		if (entries[i].padding[0] ||
+		    entries[i].padding[1] ||
+		    entries[i].padding[2])
+			return true;
+	}
+	return false;
+}
+
+int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
+			    struct kvm_cpuid_entry2 __user *entries,
+			    unsigned int type)
 {
 	struct kvm_cpuid_entry2 *cpuid_entries;
 	int limit, nent = 0, r = -E2BIG, i;
@@ -512,6 +553,10 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 		goto out;
 	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
 		cpuid->nent = KVM_MAX_CPUID_ENTRIES;
+
+	if (sanity_check_entries(entries, cpuid->nent, type))
+		return -EINVAL;
+
 	r = -ENOMEM;
 	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
 	if (!cpuid_entries)
@@ -525,7 +570,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 			continue;
 
 		r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
-				&nent, cpuid->nent);
+				&nent, cpuid->nent, type);
 
 		if (r)
 			goto out_free;
@@ -536,7 +581,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 		limit = cpuid_entries[nent - 1].eax;
 		for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
 			r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
-				     &nent, cpuid->nent);
+				     &nent, cpuid->nent, type);
 
 		if (r)
 			goto out_free;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index b7fd07984888..f1e4895174b2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -6,8 +6,9 @@
 void kvm_update_cpuid(struct kvm_vcpu *vcpu);
 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 					      u32 function, u32 index);
-int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-				      struct kvm_cpuid_entry2 __user *entries);
+int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
+			    struct kvm_cpuid_entry2 __user *entries,
+			    unsigned int type);
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 			     struct kvm_cpuid *cpuid,
 			     struct kvm_cpuid_entry __user *entries);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 68b139fe0dbd..5276618579d3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2510,6 +2510,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
+	case KVM_CAP_EXT_EMUL_CPUID:
 	case KVM_CAP_CLOCKSOURCE:
 	case KVM_CAP_PIT:
 	case KVM_CAP_NOP_IO_DELAY:
@@ -2619,15 +2620,17 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-	case KVM_GET_SUPPORTED_CPUID: {
+	case KVM_GET_SUPPORTED_CPUID:
+	case KVM_GET_EMULATED_CPUID: {
 		struct kvm_cpuid2 __user *cpuid_arg = argp;
 		struct kvm_cpuid2 cpuid;
 
 		r = -EFAULT;
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
 			goto out;
-		r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
-						      cpuid_arg->entries);
+
+		r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
+					    ioctl);
 		if (r)
 			goto out;
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cba62019348d..3d365d191145 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -541,6 +541,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_TRACE_ENABLE          __KVM_DEPRECATED_MAIN_W_0x06
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
+#define KVM_GET_EMULATED_CPUID	  _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
 
 /*
  * Extension capability list.
@@ -667,6 +668,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_RTAS 91
 #define KVM_CAP_IRQ_XICS 92
 #define KVM_CAP_ARM_EL1_32BIT 93
+#define KVM_CAP_EXT_EMUL_CPUID 95
 
 #ifdef KVM_CAP_IRQ_ROUTING
 

From a7dc5f55357702cc04773cd9e92a853767209d7d Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 30 Oct 2013 11:02:17 -0600
Subject: [PATCH 0534/1185] kvm: Add VFIO device

So far we've succeeded at making KVM and VFIO mostly unaware of each
other, but areas are cropping up where a connection beyond eventfds
and irqfds needs to be made.  This patch introduces a KVM-VFIO device
that is meant to be a gateway for such interaction.  The user creates
the device and can add and remove VFIO groups to it via file
descriptors.  When a group is added, KVM verifies the group is valid
and gets a reference to it via the VFIO external user interface.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ec53500fae421e07c5d035918ca454a429732ef4)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/devices/vfio.txt |  22 +++
 arch/x86/kvm/Kconfig                       |   1 +
 arch/x86/kvm/Makefile                      |   2 +-
 include/linux/kvm_host.h                   |   1 +
 include/uapi/linux/kvm.h                   |   4 +
 virt/kvm/Kconfig                           |   3 +
 virt/kvm/kvm_main.c                        |   5 +
 virt/kvm/vfio.c                            | 220 +++++++++++++++++++++
 8 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/virtual/kvm/devices/vfio.txt
 create mode 100644 virt/kvm/vfio.c

diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
new file mode 100644
index 000000000000..ef51740c67ca
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -0,0 +1,22 @@
+VFIO virtual device
+===================
+
+Device types supported:
+  KVM_DEV_TYPE_VFIO
+
+Only one VFIO instance may be created per VM.  The created device
+tracks VFIO groups in use by the VM and features of those groups
+important to the correctness and acceleration of the VM.  As groups
+are enabled and disabled for use by the VM, KVM should be updated
+about their presence.  When registered with KVM, a reference to the
+VFIO-group is held by KVM.
+
+Groups:
+  KVM_DEV_VFIO_GROUP
+
+KVM_DEV_VFIO_GROUP attributes:
+  KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
+  KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
+
+For each, kvm_device_attr.addr points to an int32_t file descriptor
+for the VFIO group.
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a47a3e54b964..b89c5db2b832 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -38,6 +38,7 @@ config KVM
 	select PERF_EVENTS
 	select HAVE_KVM_MSI
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select KVM_VFIO
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index bf4fb04d0112..25d22b2d6509 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -9,7 +9,7 @@ KVM := ../../../virt/kvm
 
 kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
 				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
-				$(KVM)/eventfd.o $(KVM)/irqchip.o
+				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4de0a8fedf3f..214fc2d84366 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1045,6 +1045,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
+extern struct kvm_device_ops kvm_vfio_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 3d365d191145..d5b8501cebfd 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -844,6 +844,10 @@ struct kvm_device_attr {
 #define KVM_DEV_TYPE_FSL_MPIC_20	1
 #define KVM_DEV_TYPE_FSL_MPIC_42	2
 #define KVM_DEV_TYPE_XICS		3
+#define KVM_DEV_TYPE_VFIO		4
+#define  KVM_DEV_VFIO_GROUP			1
+#define   KVM_DEV_VFIO_GROUP_ADD			1
+#define   KVM_DEV_VFIO_GROUP_DEL			2
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 779262f59e25..fbe1a48bd629 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -27,3 +27,6 @@ config HAVE_KVM_MSI
 
 config HAVE_KVM_CPU_RELAX_INTERCEPT
        bool
+
+config KVM_VFIO
+       bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d6b7d797cb16..652d682b1d55 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2270,6 +2270,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	case KVM_DEV_TYPE_XICS:
 		ops = &kvm_xics_ops;
 		break;
+#endif
+#ifdef CONFIG_KVM_VFIO
+	case KVM_DEV_TYPE_VFIO:
+		ops = &kvm_vfio_ops;
+		break;
 #endif
 	default:
 		return -ENODEV;
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
new file mode 100644
index 000000000000..597c258245ea
--- /dev/null
+++ b/virt/kvm/vfio.c
@@ -0,0 +1,220 @@
+/*
+ * VFIO-KVM bridge pseudo device
+ *
+ * Copyright (C) 2013 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+struct kvm_vfio_group {
+	struct list_head node;
+	struct vfio_group *vfio_group;
+};
+
+struct kvm_vfio {
+	struct list_head group_list;
+	struct mutex lock;
+};
+
+static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
+{
+	struct vfio_group *vfio_group;
+	struct vfio_group *(*fn)(struct file *);
+
+	fn = symbol_get(vfio_group_get_external_user);
+	if (!fn)
+		return ERR_PTR(-EINVAL);
+
+	vfio_group = fn(filep);
+
+	symbol_put(vfio_group_get_external_user);
+
+	return vfio_group;
+}
+
+static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
+{
+	void (*fn)(struct vfio_group *);
+
+	fn = symbol_get(vfio_group_put_external_user);
+	if (!fn)
+		return;
+
+	fn(vfio_group);
+
+	symbol_put(vfio_group_put_external_user);
+}
+
+static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct vfio_group *vfio_group;
+	struct kvm_vfio_group *kvg;
+	void __user *argp = (void __user *)arg;
+	struct fd f;
+	int32_t fd;
+	int ret;
+
+	switch (attr) {
+	case KVM_DEV_VFIO_GROUP_ADD:
+		if (get_user(fd, (int32_t __user *)argp))
+			return -EFAULT;
+
+		f = fdget(fd);
+		if (!f.file)
+			return -EBADF;
+
+		vfio_group = kvm_vfio_group_get_external_user(f.file);
+		fdput(f);
+
+		if (IS_ERR(vfio_group))
+			return PTR_ERR(vfio_group);
+
+		mutex_lock(&kv->lock);
+
+		list_for_each_entry(kvg, &kv->group_list, node) {
+			if (kvg->vfio_group == vfio_group) {
+				mutex_unlock(&kv->lock);
+				kvm_vfio_group_put_external_user(vfio_group);
+				return -EEXIST;
+			}
+		}
+
+		kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
+		if (!kvg) {
+			mutex_unlock(&kv->lock);
+			kvm_vfio_group_put_external_user(vfio_group);
+			return -ENOMEM;
+		}
+
+		list_add_tail(&kvg->node, &kv->group_list);
+		kvg->vfio_group = vfio_group;
+
+		mutex_unlock(&kv->lock);
+
+		return 0;
+
+	case KVM_DEV_VFIO_GROUP_DEL:
+		if (get_user(fd, (int32_t __user *)argp))
+			return -EFAULT;
+
+		f = fdget(fd);
+		if (!f.file)
+			return -EBADF;
+
+		vfio_group = kvm_vfio_group_get_external_user(f.file);
+		fdput(f);
+
+		if (IS_ERR(vfio_group))
+			return PTR_ERR(vfio_group);
+
+		ret = -ENOENT;
+
+		mutex_lock(&kv->lock);
+
+		list_for_each_entry(kvg, &kv->group_list, node) {
+			if (kvg->vfio_group != vfio_group)
+				continue;
+
+			list_del(&kvg->node);
+			kvm_vfio_group_put_external_user(kvg->vfio_group);
+			kfree(kvg);
+			ret = 0;
+			break;
+		}
+
+		mutex_unlock(&kv->lock);
+
+		kvm_vfio_group_put_external_user(vfio_group);
+
+		return ret;
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_vfio_set_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_VFIO_GROUP:
+		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_vfio_has_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_VFIO_GROUP:
+		switch (attr->attr) {
+		case KVM_DEV_VFIO_GROUP_ADD:
+		case KVM_DEV_VFIO_GROUP_DEL:
+			return 0;
+		}
+
+		break;
+	}
+
+	return -ENXIO;
+}
+
+static void kvm_vfio_destroy(struct kvm_device *dev)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct kvm_vfio_group *kvg, *tmp;
+
+	list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
+		kvm_vfio_group_put_external_user(kvg->vfio_group);
+		list_del(&kvg->node);
+		kfree(kvg);
+	}
+
+	kfree(kv);
+	kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
+}
+
+static int kvm_vfio_create(struct kvm_device *dev, u32 type)
+{
+	struct kvm_device *tmp;
+	struct kvm_vfio *kv;
+
+	/* Only one VFIO "device" per VM */
+	list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
+		if (tmp->ops == &kvm_vfio_ops)
+			return -EBUSY;
+
+	kv = kzalloc(sizeof(*kv), GFP_KERNEL);
+	if (!kv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&kv->group_list);
+	mutex_init(&kv->lock);
+
+	dev->private = kv;
+
+	return 0;
+}
+
+struct kvm_device_ops kvm_vfio_ops = {
+	.name = "kvm-vfio",
+	.create = kvm_vfio_create,
+	.destroy = kvm_vfio_destroy,
+	.set_attr = kvm_vfio_set_attr,
+	.has_attr = kvm_vfio_has_attr,
+};

From 7f17a13bdd7902c3e4d55c273820e3099c1e1d33 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 30 Oct 2013 21:43:01 +0200
Subject: [PATCH 0535/1185] kvm_host: typo fix

fix up typo in comment.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 81e87e26796782e014fd1f2bb9cd8fb6ce4021a8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 214fc2d84366..6d24a2671d29 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -775,7 +775,7 @@ static inline void kvm_guest_enter(void)
 
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
-	 * is very similar to exiting to userspase from rcu point of view. In
+	 * is very similar to exiting to userspace from rcu point of view. In
 	 * addition CPU may stay in a guest mode for quite a long time (up to
 	 * one time slice). Lets treat guest mode as quiescent state, just like
 	 * we do with user-mode execution.

From d49c7a4473eebfda645f469c7a2e77846c8008e2 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 5 Nov 2013 16:04:18 +0200
Subject: [PATCH 0536/1185] KVM: remove vm mmap method

It was used in conjunction with KVM_SET_MEMORY_REGION ioctl which was
removed by b74a07beed0 in 2010, QEMU stopped using it in 2008, so
it is time to remove the code finally.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 80f5b5e700fa9c58480eafce0d47367bafb70006)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 652d682b1d55..dd9ce144cf99 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2524,44 +2524,12 @@ static long kvm_vm_compat_ioctl(struct file *filp,
 }
 #endif
 
-static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct page *page[1];
-	unsigned long addr;
-	int npages;
-	gfn_t gfn = vmf->pgoff;
-	struct kvm *kvm = vma->vm_file->private_data;
-
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr))
-		return VM_FAULT_SIGBUS;
-
-	npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
-				NULL);
-	if (unlikely(npages != 1))
-		return VM_FAULT_SIGBUS;
-
-	vmf->page = page[0];
-	return 0;
-}
-
-static const struct vm_operations_struct kvm_vm_vm_ops = {
-	.fault = kvm_vm_fault,
-};
-
-static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	vma->vm_ops = &kvm_vm_vm_ops;
-	return 0;
-}
-
 static struct file_operations kvm_vm_fops = {
 	.release        = kvm_vm_release,
 	.unlocked_ioctl = kvm_vm_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = kvm_vm_compat_ioctl,
 #endif
-	.mmap           = kvm_vm_mmap,
 	.llseek		= noop_llseek,
 };
 

From 60979ebbbbe9d4c56e87cffcaa92933d96637d7b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Tue, 5 Nov 2013 18:29:45 +0000
Subject: [PATCH 0537/1185] arm64: KVM: initialize HYP mode following the
 kernel endianness

Force SCTLR_EL2.EE to 1 if the kernel is compiled as BE.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 18ea3dbc9e5c8a53a361b17c4a5676ea6f4bcb72)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp-init.S | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index ba84e6705e20..2b0244d65c16 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -74,7 +74,10 @@ __do_hyp_init:
 	msr	mair_el2, x4
 	isb
 
-	mov	x4, #SCTLR_EL2_FLAGS
+	mrs	x4, sctlr_el2
+	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
+	ldr	x5, =SCTLR_EL2_FLAGS
+	orr	x4, x4, x5
 	msr	sctlr_el2, x4
 	isb
 

From 128a021aa83b73d07153f8fee5dd22e6933f62e2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Tue, 5 Nov 2013 18:29:46 +0000
Subject: [PATCH 0538/1185] arm64: KVM: vgic: byteswap GICv2 access on world
 switch if BE

Ensure that accesses to the GICH_* registers are byteswapped
when the kernel is compiled as big-endian.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit c5b2c0f5203b3bc678a8967daedf7114029975ae)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 1ac0bbbdddb2..3b47c36e10ff 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -403,6 +403,14 @@ __kvm_hyp_code_start:
 	ldr	w9, [x2, #GICH_ELRSR0]
 	ldr	w10, [x2, #GICH_ELRSR1]
 	ldr	w11, [x2, #GICH_APR]
+CPU_BE(	rev	w4,  w4  )
+CPU_BE(	rev	w5,  w5  )
+CPU_BE(	rev	w6,  w6  )
+CPU_BE(	rev	w7,  w7  )
+CPU_BE(	rev	w8,  w8  )
+CPU_BE(	rev	w9,  w9  )
+CPU_BE(	rev	w10, w10 )
+CPU_BE(	rev	w11, w11 )
 
 	str	w4, [x3, #VGIC_CPU_HCR]
 	str	w5, [x3, #VGIC_CPU_VMCR]
@@ -421,6 +429,7 @@ __kvm_hyp_code_start:
 	ldr	w4, [x3, #VGIC_CPU_NR_LR]
 	add	x3, x3, #VGIC_CPU_LR
 1:	ldr	w5, [x2], #4
+CPU_BE(	rev	w5, w5 )
 	str	w5, [x3], #4
 	sub	w4, w4, #1
 	cbnz	w4, 1b
@@ -446,6 +455,9 @@ __kvm_hyp_code_start:
 	ldr	w4, [x3, #VGIC_CPU_HCR]
 	ldr	w5, [x3, #VGIC_CPU_VMCR]
 	ldr	w6, [x3, #VGIC_CPU_APR]
+CPU_BE(	rev	w4, w4 )
+CPU_BE(	rev	w5, w5 )
+CPU_BE(	rev	w6, w6 )
 
 	str	w4, [x2, #GICH_HCR]
 	str	w5, [x2, #GICH_VMCR]
@@ -456,6 +468,7 @@ __kvm_hyp_code_start:
 	ldr	w4, [x3, #VGIC_CPU_NR_LR]
 	add	x3, x3, #VGIC_CPU_LR
 1:	ldr	w5, [x3], #4
+CPU_BE(	rev	w5, w5 )
 	str	w5, [x2], #4
 	sub	w4, w4, #1
 	cbnz	w4, 1b

From dd8858820ec48b90c5ab5c6631f50a613deb55bc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 12 Feb 2013 12:40:22 +0000
Subject: [PATCH 0539/1185] arm/arm64: KVM: MMIO support for BE guest

Do the necessary byteswap when host and guest have different
views of the universe. Actually, the only case we need to take
care of is when the guest is BE. All the other cases are naturally
handled.

Also be careful about endianness when the data is being memcopy-ed
from/to the run buffer.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 6d89d2d9b5bac9dbe40ee106ceda9307b6265234)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   | 41 +++++++++++++
 arch/arm/kvm/mmio.c                  | 86 ++++++++++++++++++++++++----
 arch/arm64/include/asm/kvm_emulate.h | 48 ++++++++++++++++
 3 files changed, 164 insertions(+), 11 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 708e4d8a647f..b79cd8d3d6ee 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -162,4 +162,45 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cp15[c0_MPIDR];
 }
 
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
+{
+	return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT);
+}
+
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return be16_to_cpu(data & 0xffff);
+		default:
+			return be32_to_cpu(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_be16(data & 0xffff);
+		default:
+			return cpu_to_be32(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 0c25d9487d53..4cb5a93182e9 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -23,6 +23,68 @@
 
 #include "trace.h"
 
+static void mmio_write_buf(char *buf, unsigned int len, unsigned long data)
+{
+	void *datap = NULL;
+	union {
+		u8	byte;
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		tmp.byte	= data;
+		datap		= &tmp.byte;
+		break;
+	case 2:
+		tmp.hword	= data;
+		datap		= &tmp.hword;
+		break;
+	case 4:
+		tmp.word	= data;
+		datap		= &tmp.word;
+		break;
+	case 8:
+		tmp.dword	= data;
+		datap		= &tmp.dword;
+		break;
+	}
+
+	memcpy(buf, datap, len);
+}
+
+static unsigned long mmio_read_buf(char *buf, unsigned int len)
+{
+	unsigned long data = 0;
+	union {
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		data = buf[0];
+		break;
+	case 2:
+		memcpy(&tmp.hword, buf, len);
+		data = tmp.hword;
+		break;
+	case 4:
+		memcpy(&tmp.word, buf, len);
+		data = tmp.word;
+		break;
+	case 8:
+		memcpy(&tmp.dword, buf, len);
+		data = tmp.dword;
+		break;
+	}
+
+	return data;
+}
+
 /**
  * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
  * @vcpu: The VCPU pointer
@@ -33,28 +95,27 @@
  */
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	unsigned long *dest;
+	unsigned long data;
 	unsigned int len;
 	int mask;
 
 	if (!run->mmio.is_write) {
-		dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt);
-		*dest = 0;
-
 		len = run->mmio.len;
 		if (len > sizeof(unsigned long))
 			return -EINVAL;
 
-		memcpy(dest, run->mmio.data, len);
-
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
-				*((u64 *)run->mmio.data));
+		data = mmio_read_buf(run->mmio.data, len);
 
 		if (vcpu->arch.mmio_decode.sign_extend &&
 		    len < sizeof(unsigned long)) {
 			mask = 1U << ((len * 8) - 1);
-			*dest = (*dest ^ mask) - mask;
+			data = (data ^ mask) - mask;
 		}
+
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+			       data);
+		data = vcpu_data_host_to_guest(vcpu, data, len);
+		*vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;
 	}
 
 	return 0;
@@ -105,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 phys_addr_t fault_ipa)
 {
 	struct kvm_exit_mmio mmio;
+	unsigned long data;
 	unsigned long rt;
 	int ret;
 
@@ -125,13 +187,15 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	}
 
 	rt = vcpu->arch.mmio_decode.rt;
+	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
+
 	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
 					 KVM_TRACE_MMIO_READ_UNSATISFIED,
 			mmio.len, fault_ipa,
-			(mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0);
+			(mmio.is_write) ? data : 0);
 
 	if (mmio.is_write)
-		memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len);
+		mmio_write_buf(mmio.data, mmio.len, data);
 
 	if (vgic_handle_mmio(vcpu, run, &mmio))
 		return 1;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 6df93cdc652b..291f87cf457f 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -182,4 +182,52 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
 	return vcpu_sys_reg(vcpu, MPIDR_EL1);
 }
 
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
+
+	return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+}
+
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return be16_to_cpu(data & 0xffff);
+		case 4:
+			return be32_to_cpu(data & 0xffffffff);
+		default:
+			return be64_to_cpu(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_be16(data & 0xffff);
+		case 4:
+			return cpu_to_be32(data & 0xffffffff);
+		default:
+			return cpu_to_be64(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */

From 53e38964402dd81c8528f3d2c6fd119c4091a390 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 5 Nov 2013 14:12:15 +0000
Subject: [PATCH 0540/1185] arm/arm64: KVM: PSCI: propagate caller endianness
 to the incoming vcpu

When booting a vcpu using PSCI, make sure we start it with the
endianness of the caller. Otherwise, secondaries can be pretty
unhappy to execute a BE kernel in LE mode...

This conforms to PSCI spec Rev B, 5.13.3.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit ce94fe93d566bf381c6ecbd45010d36c5f04d692)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   | 5 +++++
 arch/arm/kvm/psci.c                  | 4 ++++
 arch/arm64/include/asm/kvm_emulate.h | 8 ++++++++
 3 files changed, 17 insertions(+)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index b79cd8d3d6ee..0fa90c962ac8 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -162,6 +162,11 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cp15[c0_MPIDR];
 }
 
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
+}
+
 static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
 {
 	return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT);
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 311263124acf..0881bf169fbc 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -71,6 +71,10 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 		vcpu_set_thumb(vcpu);
 	}
 
+	/* Propagate caller endianness */
+	if (kvm_vcpu_is_be(source_vcpu))
+		kvm_vcpu_set_be(vcpu);
+
 	*vcpu_pc(vcpu) = target_pc;
 	vcpu->arch.pause = false;
 	smp_mb();		/* Make sure the above is visible */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 291f87cf457f..dd8ecfc3f995 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -182,6 +182,14 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
 	return vcpu_sys_reg(vcpu, MPIDR_EL1);
 }
 
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		*vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
+	else
+		vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
+}
+
 static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
 {
 	if (vcpu_mode_is_32bit(vcpu))

From 52031ff6011942b7e29788b9ce5066cb45fe3e4e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 15 Nov 2013 13:14:12 -0800
Subject: [PATCH 0541/1185] arm/arm64: KVM: Fix hyp mappings of vmalloc regions

Using virt_to_phys on percpu mappings is horribly wrong as it may be
backed by vmalloc.  Introduce kvm_kaddr_to_phys which translates both
types of valid kernel addresses to the corresponding physical address.

At the same time resolves a typing issue where we were storing the
physical address as a 32 bit unsigned long (on arm), truncating the
physical address for addresses above the 4GB limit.  This caused
breakage on Keystone.

Cc: <stable@vger.kernel.org>	[3.10+]
Reported-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 40c2729bab48e2832b17c1fa8af9db60e776131b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 371958370de4..580906989db1 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -334,6 +334,17 @@ static int __create_hyp_mappings(pgd_t *pgdp,
 	return err;
 }
 
+static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
+{
+	if (!is_vmalloc_addr(kaddr)) {
+		BUG_ON(!virt_addr_valid(kaddr));
+		return __pa(kaddr);
+	} else {
+		return page_to_phys(vmalloc_to_page(kaddr)) +
+		       offset_in_page(kaddr);
+	}
+}
+
 /**
  * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
  * @from:	The virtual kernel start address of the range
@@ -345,16 +356,27 @@ static int __create_hyp_mappings(pgd_t *pgdp,
  */
 int create_hyp_mappings(void *from, void *to)
 {
-	unsigned long phys_addr = virt_to_phys(from);
+	phys_addr_t phys_addr;
+	unsigned long virt_addr;
 	unsigned long start = KERN_TO_HYP((unsigned long)from);
 	unsigned long end = KERN_TO_HYP((unsigned long)to);
 
-	/* Check for a valid kernel memory mapping */
-	if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
-		return -EINVAL;
+	start = start & PAGE_MASK;
+	end = PAGE_ALIGN(end);
 
-	return __create_hyp_mappings(hyp_pgd, start, end,
-				     __phys_to_pfn(phys_addr), PAGE_HYP);
+	for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
+		int err;
+
+		phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
+		err = __create_hyp_mappings(hyp_pgd, virt_addr,
+					    virt_addr + PAGE_SIZE,
+					    __phys_to_pfn(phys_addr),
+					    PAGE_HYP);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 /**

From 4ae68e1ee672530df9236b07ff267f06e107bc9a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 18 Nov 2013 10:35:55 +0100
Subject: [PATCH 0542/1185] KVM: kvm_clear_guest_page(): fix empty_zero_page
 usage

Using the address of 'empty_zero_page' as source address in order to
clear a page is wrong. On some architectures empty_zero_page is only the
pointer to the struct page of the empty_zero_page.  Therefore the clear
page operation would copy the contents of a couple of struct pages instead
of clearing a page.  For kvm only arm/arm64 are affected by this bug.

To fix this use the ZERO_PAGE macro instead which will return the struct
page address of the empty_zero_page on all architectures.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 8a3caa6d74597c2a083f7c87f866891a0b12540b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dd9ce144cf99..e2b9a0670639 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1613,8 +1613,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
 
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
 {
-	return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
-				    offset, len);
+	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+
+	return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
 

From 4479f3b0060731a9d61b30570db282b5587c885f Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Tue, 19 Nov 2013 14:59:12 -0500
Subject: [PATCH 0543/1185] arm/arm64: kvm: Use virt_to_idmap instead of
 virt_to_phys for idmap mappings

KVM initialisation fails on architectures implementing virt_to_idmap()
because virt_to_phys() on such architectures won't fetch you the correct
idmap page.

So update the KVM ARM code to use the virt_to_idmap() to fix the issue.
Since the KVM code is shared between arm and arm64, we create
kvm_virt_to_phys() and handle the redirection in respective headers.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4fda342cc7f577599c53fd27b99c953c7b1da18a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   | 1 +
 arch/arm/kvm/mmu.c               | 8 ++++----
 arch/arm64/include/asm/kvm_mmu.h | 1 +
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 77de4a41cc50..2d122adcdb22 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -140,6 +140,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
 }
 
 #define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
+#define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 580906989db1..659db0ed1370 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -916,9 +916,9 @@ int kvm_mmu_init(void)
 {
 	int err;
 
-	hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
-	hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
-	hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
+	hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
+	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
+	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
 
 	if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
 		/*
@@ -945,7 +945,7 @@ int kvm_mmu_init(void)
 		 */
 		kvm_flush_dcache_to_poc(init_bounce_page, len);
 
-		phys_base = virt_to_phys(init_bounce_page);
+		phys_base = kvm_virt_to_phys(init_bounce_page);
 		hyp_idmap_vector += phys_base - hyp_idmap_start;
 		hyp_idmap_start = phys_base;
 		hyp_idmap_end = phys_base + len;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 680f74e67497..7f1f9408ff66 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -136,6 +136,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
 }
 
 #define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+#define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */

From bc071b4cc769735a12fc9fa17cb54dd42f3fb0e6 Mon Sep 17 00:00:00 2001
From: Andy Honig <ahonig@google.com>
Date: Mon, 18 Nov 2013 16:09:22 -0800
Subject: [PATCH 0544/1185] KVM: Improve create VCPU parameter (CVE-2013-4587)

In multiple functions the vcpu_id is used as an offset into a bitfield.  Ag
malicious user could specify a vcpu_id greater than 255 in order to set or
clear bits in kernel memory.  This could be used to elevate priveges in the
kernel.  This patch verifies that the vcpu_id provided is less than 255.
The api documentation already specifies that the vcpu_id must be less than
max_vcpus, but this is currently not checked.

Reported-by: Andrew Honig <ahonig@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Honig <ahonig@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 338c7dbadd2671189cec7faf64c84d01071b3f96)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e2b9a0670639..2dd59b957164 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1894,6 +1894,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	int r;
 	struct kvm_vcpu *vcpu, *v;
 
+	if (id >= KVM_MAX_VCPUS)
+		return -EINVAL;
+
 	vcpu = kvm_arch_vcpu_create(kvm, id);
 	if (IS_ERR(vcpu))
 		return PTR_ERR(vcpu);

From 580ab4613364270a998a44850d49c4d8c5df5144 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 13 Dec 2013 15:07:21 +0900
Subject: [PATCH 0545/1185] KVM: Use cond_resched() directly and remove useless
 kvm_resched()

Since the commit 15ad7146 ("KVM: Use the scheduler preemption notifiers
to make kvm preemptible"), the remaining stuff in this function is a
simple cond_resched() call with an extra need_resched() check which was
there to avoid dropping VCPUs unnecessarily.  Now it is meaningless.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c08ac06ab3f3cdb8d34376c3a8a5e46a31a62c8f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/ia64/kvm/kvm-ia64.c     | 2 +-
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 arch/x86/kvm/x86.c           | 2 +-
 include/linux/kvm_host.h     | 1 -
 virt/kvm/kvm_main.c          | 8 --------
 5 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 985bf80c622e..53f44bee9ebb 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -702,7 +702,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 out:
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	if (r > 0) {
-		kvm_resched(vcpu);
+		cond_resched();
 		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		goto again;
 	}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 550f5928b394..717e5b525f3b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1253,7 +1253,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	kvm_guest_exit();
 
 	preempt_enable();
-	kvm_resched(vcpu);
+	cond_resched();
 
 	spin_lock(&vc->lock);
 	now = get_tb();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5276618579d3..805c8e92cf66 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5947,7 +5947,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		}
 		if (need_resched()) {
 			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-			kvm_resched(vcpu);
+			cond_resched();
 			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 		}
 	}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6d24a2671d29..9c0f8545df73 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -569,7 +569,6 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
-void kvm_resched(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2dd59b957164..cb9a865c8e01 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1706,14 +1706,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
-void kvm_resched(struct kvm_vcpu *vcpu)
-{
-	if (!need_resched())
-		return;
-	cond_resched();
-}
-EXPORT_SYMBOL_GPL(kvm_resched);
-
 bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
 	struct pid *pid;

From 8215ed10b4803d159b3f10784140a79eafd94382 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 5 Aug 2013 15:04:46 +0100
Subject: [PATCH 0546/1185] arm: kvm: implement CPU PM notifier

Upon CPU shutdown and consequent warm-reboot, the hypervisor CPU state
must be re-initialized. This patch implements a CPU PM notifier that
upon warm-boot calls a KVM hook to reinitialize properly the hypervisor
state so that the CPU can be safely resumed.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
(cherry picked from commit 1fcf7ce0c60213994269fb59569ec161eb6e08d6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e312e4a53f8d..b04013608e7e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/cpu.h>
+#include <linux/cpu_pm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
@@ -853,6 +854,33 @@ static struct notifier_block hyp_init_cpu_nb = {
 	.notifier_call = hyp_init_cpu_notify,
 };
 
+#ifdef CONFIG_CPU_PM
+static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
+				    unsigned long cmd,
+				    void *v)
+{
+	if (cmd == CPU_PM_EXIT) {
+		cpu_init_hyp_mode(NULL);
+		return NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block hyp_init_cpu_pm_nb = {
+	.notifier_call = hyp_init_cpu_pm_notifier,
+};
+
+static void __init hyp_cpu_pm_init(void)
+{
+	cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
+}
+#else
+static inline void hyp_cpu_pm_init(void)
+{
+}
+#endif
+
 /**
  * Inits Hyp-mode on all online CPUs
  */
@@ -1013,6 +1041,8 @@ int kvm_arch_init(void *opaque)
 		goto out_err;
 	}
 
+	hyp_cpu_pm_init();
+
 	kvm_coproc_table_init();
 	return 0;
 out_err:

From 90715c6a824fcc239e7da97c4078b85c7944df7c Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 9 Dec 2013 00:22:53 +0900
Subject: [PATCH 0547/1185] treewide: Fix typos in printk

Correct spelling typo in various part of kernel

[ cdall: Pickes KVM/arm64 specific part not already merged into LSK ]

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
(cherry picked from commit 77d84ff87e9d38072abcca665ca22cb1da41cb86)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/handle_exit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 8da56067c304..42a0f1bddfe7 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -90,7 +90,7 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 
 	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
 	    !arm_exit_handlers[hsr_ec]) {
-		kvm_err("Unkown exception class: hsr: %#08x\n",
+		kvm_err("Unknown exception class: hsr: %#08x\n",
 			(unsigned int)kvm_vcpu_get_hsr(vcpu));
 		BUG();
 	}

From 7e2c9ce0199412b559bbf527956c004a05a64619 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Nov 2013 17:43:19 -0800
Subject: [PATCH 0548/1185] arm: KVM: Don't return PSCI_INVAL if waitqueue is
 inactive

The current KVM implementation of PSCI returns INVALID_PARAMETERS if the
waitqueue for the corresponding CPU is not active.  This does not seem
correct, since KVM should not care what the specific thread is doing,
for example, user space may not have called KVM_RUN on this VCPU yet or
the thread may be busy looping to user space because it received a
signal; this is really up to the user space implementation.  Instead we
should check specifically that the CPU is marked as being turned off,
regardless of the VCPU thread state, and if it is, we shall
simply clear the pause flag on the CPU and wake up the thread if it
happens to be blocked for us.

Further, the implementation seems to be racy when executing multiple
VCPU threads.  There really isn't a reasonable user space programming
scheme to ensure all secondary CPUs have reached kvm_vcpu_first_run_init
before turning on the boot CPU.

Therefore, set the pause flag on the vcpu at VCPU init time (which can
reasonably be expected to be completed for all CPUs by user space before
running any VCPUs) and clear both this flag and the feature (in case the
feature can somehow get set again in the future) and ping the waitqueue
on turning on a VCPU using PSCI.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 478a8237f656d86d25b3e4e4bf3c48f590156294)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c  | 30 +++++++++++++++++++-----------
 arch/arm/kvm/psci.c | 11 ++++++-----
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index b04013608e7e..bb43b8cc1231 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -479,15 +479,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 			return ret;
 	}
 
-	/*
-	 * Handle the "start in power-off" case by calling into the
-	 * PSCI code.
-	 */
-	if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
-		*vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
-		kvm_psci_call(vcpu);
-	}
-
 	return 0;
 }
 
@@ -701,6 +692,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 	return -EINVAL;
 }
 
+static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
+					 struct kvm_vcpu_init *init)
+{
+	int ret;
+
+	ret = kvm_vcpu_set_target(vcpu, init);
+	if (ret)
+		return ret;
+
+	/*
+	 * Handle the "start in power-off" case by marking the VCPU as paused.
+	 */
+	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+		vcpu->arch.pause = true;
+
+	return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -714,8 +723,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (copy_from_user(&init, argp, sizeof(init)))
 			return -EFAULT;
 
-		return kvm_vcpu_set_target(vcpu, &init);
-
+		return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
 	}
 	case KVM_SET_ONE_REG:
 	case KVM_GET_ONE_REG: {
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 0881bf169fbc..448f60e8d23c 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -54,15 +54,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 		}
 	}
 
-	if (!vcpu)
+	/*
+	 * Make sure the caller requested a valid CPU and that the CPU is
+	 * turned off.
+	 */
+	if (!vcpu || !vcpu->arch.pause)
 		return KVM_PSCI_RET_INVAL;
 
 	target_pc = *vcpu_reg(source_vcpu, 2);
 
-	wq = kvm_arch_vcpu_wq(vcpu);
-	if (!waitqueue_active(wq))
-		return KVM_PSCI_RET_INVAL;
-
 	kvm_reset_vcpu(vcpu);
 
 	/* Gracefully handle Thumb2 entry point */
@@ -79,6 +79,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	vcpu->arch.pause = false;
 	smp_mb();		/* Make sure the above is visible */
 
+	wq = kvm_arch_vcpu_wq(vcpu);
 	wake_up_interruptible(wq);
 
 	return KVM_PSCI_RET_SUCCESS;

From b5a94dd48dc470624dcb7d581432a439d7a289c0 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 16 Nov 2013 10:51:25 -0800
Subject: [PATCH 0549/1185] arm/arm64: KVM: arch_timer: Initialize cntvoff at
 kvm_init

Initialize the cntvoff at kvm_init_vm time, not before running the VCPUs
at the first time because that will overwrite any potentially restored
values from user space.

Cc: Andre Przywara <andre.przywara@linaro.org>
Acked-by: Marc Zynger <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit a1a64387adeeba7a34ce06f2774e81f496ee803b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c  | 2 ++
 virt/kvm/arm/vgic.c | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index bb43b8cc1231..71ad3a8706d9 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -138,6 +138,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (ret)
 		goto out_free_stage2_pgd;
 
+	kvm_timer_init(kvm);
+
 	/* Mark the initial VMID generation invalid */
 	kvm->arch.vmid_gen = 0;
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 685fc72fc751..81e9481184a7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1409,7 +1409,6 @@ int kvm_vgic_init(struct kvm *kvm)
 	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
 		vgic_set_target_reg(kvm, 0, i);
 
-	kvm_timer_init(kvm);
 	kvm->arch.vgic.ready = true;
 out:
 	mutex_unlock(&kvm->lock);

From 764ee339777196e8cf9fa090f5041b02efc93630 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@linaro.org>
Date: Fri, 13 Dec 2013 14:23:26 +0100
Subject: [PATCH 0550/1185] ARM/KVM: save and restore generic timer registers

For migration to work we need to save (and later restore) the state of
each core's virtual generic timer.
Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export
the three needed registers (control, counter, compare value).
Though they live in cp15 space, we don't use the existing list, since
they need special accessor functions and the arch timer is optional.

Acked-by: Marc Zynger <marc.zyngier@arm.com>
Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 39735a3a390431bcf60f9174b7d64f787fd6afa9)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   |  3 +
 arch/arm/include/uapi/asm/kvm.h   | 20 +++++++
 arch/arm/kvm/guest.c              | 92 ++++++++++++++++++++++++++++++-
 arch/arm64/include/uapi/asm/kvm.h | 18 ++++++
 virt/kvm/arm/arch_timer.c         | 34 ++++++++++++
 5 files changed, 166 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a6f6db14ee4..098f7dd6d564 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,4 +225,7 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
+int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c498b60c0505..835b8678de03 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -119,6 +119,26 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_32_CRN_MASK		0x0000000000007800
 #define KVM_REG_ARM_32_CRN_SHIFT	11
 
+#define ARM_CP15_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
+
+#define __ARM_CP15_REG(op1,crn,crm,op2) \
+	(KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
+	ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
+	ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
+	ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
+	ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
+
+#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
+
+#define __ARM_CP15_REG64(op1,crm) \
+	(__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
+#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14) 
+#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14) 
+
 /* Normal registers are mapped as coprocessor 16. */
 #define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
 #define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / 4)
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 20f8d97904af..2786eae10c0d 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -109,6 +109,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return -EINVAL;
 }
 
+#ifndef CONFIG_KVM_ARM_TIMER
+
+#define NUM_TIMER_REGS 0
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	return 0;
+}
+
+static bool is_timer_reg(u64 index)
+{
+	return false;
+}
+
+int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+	return 0;
+}
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+	return 0;
+}
+
+#else
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+	switch (index) {
+	case KVM_REG_ARM_TIMER_CTL:
+	case KVM_REG_ARM_TIMER_CNT:
+	case KVM_REG_ARM_TIMER_CVAL:
+		return true;
+	}
+	return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+		return -EFAULT;
+
+	return 0;
+}
+
+#endif
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int ret;
+
+	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+	if (ret != 0)
+		return ret;
+
+	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+
+	val = kvm_arm_timer_get_reg(vcpu, reg->id);
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
 static unsigned long num_core_regs(void)
 {
 	return sizeof(struct kvm_regs) / sizeof(u32);
@@ -121,7 +198,8 @@ static unsigned long num_core_regs(void)
  */
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 {
-	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu);
+	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
+		+ NUM_TIMER_REGS;
 }
 
 /**
@@ -133,6 +211,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
 	unsigned int i;
 	const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
+	int ret;
 
 	for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
 		if (put_user(core_reg | i, uindices))
@@ -140,6 +219,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 		uindices++;
 	}
 
+	ret = copy_timer_indices(vcpu, uindices);
+	if (ret)
+		return ret;
+	uindices += NUM_TIMER_REGS;
+
 	return kvm_arm_copy_coproc_indices(vcpu, uindices);
 }
 
@@ -153,6 +237,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return get_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return get_timer_reg(vcpu, reg);
+
 	return kvm_arm_coproc_get_reg(vcpu, reg);
 }
 
@@ -166,6 +253,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return set_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return set_timer_reg(vcpu, reg);
+
 	return kvm_arm_coproc_set_reg(vcpu, reg);
 }
 
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 5031f4263937..7c25ca8b02b3 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -129,6 +129,24 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM64_SYSREG_OP2_MASK	0x0000000000000007
 #define KVM_REG_ARM64_SYSREG_OP2_SHIFT	0
 
+#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
+	KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
+
+#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
+	(KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
+	ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+	ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+	ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+	ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+	ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM64_SYS_REG(3, 3, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
+#define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
 #define KVM_ARM_IRQ_TYPE_MASK		0xff
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index c2e1ef4604e8..5081e809821f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -182,6 +182,40 @@ static void kvm_timer_init_interrupt(void *info)
 	enable_percpu_irq(host_vtimer_irq, 0);
 }
 
+int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	switch (regid) {
+	case KVM_REG_ARM_TIMER_CTL:
+		timer->cntv_ctl = value;
+		break;
+	case KVM_REG_ARM_TIMER_CNT:
+		vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
+		break;
+	case KVM_REG_ARM_TIMER_CVAL:
+		timer->cntv_cval = value;
+		break;
+	default:
+		return -1;
+	}
+	return 0;
+}
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	switch (regid) {
+	case KVM_REG_ARM_TIMER_CTL:
+		return timer->cntv_ctl;
+	case KVM_REG_ARM_TIMER_CNT:
+		return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+	case KVM_REG_ARM_TIMER_CVAL:
+		return timer->cntv_cval;
+	}
+	return (u64)-1;
+}
 
 static int kvm_timer_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *cpu)

From d90651fa17fcc2112dcfe05832a6c1169d26dee3 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:55 -0700
Subject: [PATCH 0551/1185] ARM: KVM: Allow creating the VGIC after VCPUs

Rework the VGIC initialization slightly to allow initialization of the
vgic cpu-specific state even if the irqchip (the VGIC) hasn't been
created by user space yet.  This is safe, because the vgic data
structures are already allocated when the CPU is allocated if VGIC
support is compiled into the kernel.  Further, the init process does not
depend on any other information and the sacrifice is a slight
performance degradation for creating VMs in the no-VGIC case.

The reason is that the new device control API doesn't mandate creating
the VGIC before creating the VCPU and it is unreasonable to require user
space to create the VGIC before creating the VCPUs.

At the same time move the irqchip_in_kernel check out of
kvm_vcpu_first_run_init and into the init function to make the per-vcpu
and global init functions symmetric and add comments on the exported
functions making it a bit easier to understand the init flow by only
looking at vgic.c.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e1ba0207a1b3714bb3f000e506285ae5123cdfa7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c  |  7 ++++---
 virt/kvm/arm/vgic.c | 22 +++++++++++++++++++---
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 71ad3a8706d9..ea8da1f4fe49 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -465,6 +465,8 @@ static void update_vttbr(struct kvm *kvm)
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	if (likely(vcpu->arch.has_run_once))
 		return 0;
 
@@ -474,9 +476,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	 * Initialize the VGIC before running a vcpu the first time on
 	 * this VM.
 	 */
-	if (irqchip_in_kernel(vcpu->kvm) &&
-	    unlikely(!vgic_initialized(vcpu->kvm))) {
-		int ret = kvm_vgic_init(vcpu->kvm);
+	if (unlikely(!vgic_initialized(vcpu->kvm))) {
+		ret = kvm_vgic_init(vcpu->kvm);
 		if (ret)
 			return ret;
 	}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 81e9481184a7..5e9df47778fb 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1243,15 +1243,19 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+/**
+ * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
+ * @vcpu: pointer to the vcpu struct
+ *
+ * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
+ * this vcpu and enable the VGIC for this VCPU
+ */
 int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int i;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return 0;
-
 	if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
 		return -EBUSY;
 
@@ -1383,10 +1387,22 @@ int kvm_vgic_hyp_init(void)
 	return ret;
 }
 
+/**
+ * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
+ * @kvm: pointer to the kvm struct
+ *
+ * Map the virtual CPU interface into the VM before running any VCPUs.  We
+ * can't do this at creation time, because user space must first set the
+ * virtual CPU interface address in the guest physical address space.  Also
+ * initialize the ITARGETSRn regs to 0 on the emulated distributor.
+ */
 int kvm_vgic_init(struct kvm *kvm)
 {
 	int ret = 0, i;
 
+	if (!irqchip_in_kernel(kvm))
+		return 0;
+
 	mutex_lock(&kvm->lock);
 
 	if (vgic_initialized(kvm))

From 1032acb686aaabafb124e12ae15a6adc9b3da557 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 25 Oct 2013 17:29:18 +0100
Subject: [PATCH 0552/1185] KVM: arm-vgic: Support KVM_CREATE_DEVICE for VGIC

Support creating the ARM VGIC device through the KVM_CREATE_DEVICE
ioctl, which can then later be leveraged to use the
KVM_{GET/SET}_DEVICE_ATTR, which is useful both for setting addresses in
a more generic API than the ARM-specific one and is useful for
save/restore of VGIC state.

Adds KVM_CAP_DEVICE_CTRL to ARM capabilities.

Note that we change the check for creating a VGIC from bailing out if
any VCPUs were created, to bailing out if any VCPUs were ever run.  This
is an important distinction that shouldn't break anything, but allows
creating the VGIC after the VCPUs have been created.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 7330672befe6269e575f79b924a7068b26c144b4)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 .../virtual/kvm/devices/arm-vgic.txt          | 10 +++
 arch/arm/kvm/arm.c                            |  1 +
 include/linux/kvm_host.h                      |  1 +
 include/uapi/linux/kvm.h                      |  1 +
 virt/kvm/arm/vgic.c                           | 63 ++++++++++++++++++-
 virt/kvm/kvm_main.c                           |  5 ++
 6 files changed, 79 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/virtual/kvm/devices/arm-vgic.txt

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
new file mode 100644
index 000000000000..38f27f709a99
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -0,0 +1,10 @@
+ARM Virtual Generic Interrupt Controller (VGIC)
+===============================================
+
+Device types supported:
+  KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
+
+Only one VGIC instance may be instantiated through either this API or the
+legacy KVM_CREATE_IRQCHIP api.  The created VGIC will act as the VM interrupt
+controller, requiring emulated user-space devices to inject interrupts to the
+VGIC instead of directly to CPUs.
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ea8da1f4fe49..fcb68bb96176 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -191,6 +191,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IRQCHIP:
 		r = vgic_present;
 		break;
+	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9c0f8545df73..1dfc17255ee0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1045,6 +1045,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
+extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d5b8501cebfd..56347ab54cf7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -848,6 +848,7 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
+#define KVM_DEV_TYPE_ARM_VGIC_V2	5
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 5e9df47778fb..b15d6c17a090 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1433,20 +1433,45 @@ int kvm_vgic_init(struct kvm *kvm)
 
 int kvm_vgic_create(struct kvm *kvm)
 {
-	int ret = 0;
+	int i, vcpu_lock_idx = -1, ret = 0;
+	struct kvm_vcpu *vcpu;
 
 	mutex_lock(&kvm->lock);
 
-	if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
+	if (kvm->arch.vgic.vctrl_base) {
 		ret = -EEXIST;
 		goto out;
 	}
 
+	/*
+	 * Any time a vcpu is run, vcpu_load is called which tries to grab the
+	 * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
+	 * that no other VCPUs are run while we create the vgic.
+	 */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!mutex_trylock(&vcpu->mutex))
+			goto out_unlock;
+		vcpu_lock_idx = i;
+	}
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.has_run_once) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+	}
+
 	spin_lock_init(&kvm->arch.vgic.lock);
 	kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 
+out_unlock:
+	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+		mutex_unlock(&vcpu->mutex);
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return ret;
@@ -1510,3 +1535,37 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
 	mutex_unlock(&kvm->lock);
 	return r;
 }
+
+static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static void vgic_destroy(struct kvm_device *dev)
+{
+	kfree(dev);
+}
+
+static int vgic_create(struct kvm_device *dev, u32 type)
+{
+	return kvm_vgic_create(dev->kvm);
+}
+
+struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+	.name = "kvm-arm-vgic",
+	.create = vgic_create,
+	.destroy = vgic_destroy,
+	.set_attr = vgic_set_attr,
+	.get_attr = vgic_get_attr,
+	.has_attr = vgic_has_attr,
+};
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cb9a865c8e01..e9a43b6455be 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2271,6 +2271,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	case KVM_DEV_TYPE_VFIO:
 		ops = &kvm_vfio_ops;
 		break;
+#endif
+#ifdef CONFIG_KVM_ARM_VGIC
+	case KVM_DEV_TYPE_ARM_VGIC_V2:
+		ops = &kvm_arm_vgic_v2_ops;
+		break;
 #endif
 	default:
 		return -ENODEV;

From c4ad31ff7d94f6504c757ddcf742f0597c494080 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:56 -0700
Subject: [PATCH 0553/1185] KVM: arm-vgic: Set base addr through device API

Support setting the distributor and cpu interface base addresses in the
VM physical address space through the KVM_{SET,GET}_DEVICE_ATTR API
in addition to the ARM specific API.

This has the added benefit of being able to share more code in user
space and do things in a uniform manner.

Also deprecate the older API at the same time, but backwards
compatibility will be maintained.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit ce01e4e8874d410738f4b4733b26642d6611a331)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt             |  8 +-
 .../virtual/kvm/devices/arm-vgic.txt          | 11 +++
 arch/arm/include/uapi/asm/kvm.h               |  2 +
 arch/arm/kvm/arm.c                            |  2 +-
 include/kvm/arm_vgic.h                        |  2 +-
 virt/kvm/arm/vgic.c                           | 87 ++++++++++++++++---
 6 files changed, 96 insertions(+), 16 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index d196ebe8956e..3c75a17555a8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2324,7 +2324,7 @@ This ioctl returns the guest registers that are supported for the
 KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
 
 
-4.80 KVM_ARM_SET_DEVICE_ADDR
+4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
 
 Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
 Architectures: arm, arm64
@@ -2362,7 +2362,11 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling
 KVM_RUN on any of the VCPUs.  Calling this ioctl twice for any of the
 base addresses will return -EEXIST.
 
-4.82 KVM_PPC_RTAS_DEFINE_TOKEN
+Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API
+should be used instead.
+
+
+4.86 KVM_PPC_RTAS_DEFINE_TOKEN
 
 Capability: KVM_CAP_PPC_RTAS
 Architectures: ppc
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 38f27f709a99..c9febb2a0c3e 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -8,3 +8,14 @@ Only one VGIC instance may be instantiated through either this API or the
 legacy KVM_CREATE_IRQCHIP api.  The created VGIC will act as the VM interrupt
 controller, requiring emulated user-space devices to inject interrupts to the
 VGIC instead of directly to CPUs.
+
+Groups:
+  KVM_DEV_ARM_VGIC_GRP_ADDR
+  Attributes:
+    KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
+      Base address in the guest physical address space of the GIC distributor
+      register mappings.
+
+    KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
+      Base address in the guest physical address space of the GIC virtual cpu
+      interface register mappings.
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 835b8678de03..76a742769e2b 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -163,6 +163,8 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_VFP_FPINST		0x1009
 #define KVM_REG_ARM_VFP_FPINST2		0x100A
 
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index fcb68bb96176..e71f6e15d3cd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -785,7 +785,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 	case KVM_ARM_DEVICE_VGIC_V2:
 		if (!vgic_present)
 			return -ENXIO;
-		return kvm_vgic_set_addr(kvm, type, dev_addr->addr);
+		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 	default:
 		return -ENODEV;
 	}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7e2d15837b02..be85127bfed3 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -144,7 +144,7 @@ struct kvm_run;
 struct kvm_exit_mmio;
 
 #ifdef CONFIG_KVM_ARM_VGIC
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b15d6c17a090..45db48de4282 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1495,6 +1495,12 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
 {
 	int ret;
 
+	if (addr & ~KVM_PHYS_MASK)
+		return -E2BIG;
+
+	if (addr & (SZ_4K - 1))
+		return -EINVAL;
+
 	if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
 		return -EEXIST;
 	if (addr + size < addr)
@@ -1507,26 +1513,41 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
 	return ret;
 }
 
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
+/**
+ * kvm_vgic_addr - set or get vgic VM base addresses
+ * @kvm:   pointer to the vm struct
+ * @type:  the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX
+ * @addr:  pointer to address value
+ * @write: if true set the address in the VM address space, if false read the
+ *          address
+ *
+ * Set or get the vgic base addresses for the distributor and the virtual CPU
+ * interface in the VM physical address space.  These addresses are properties
+ * of the emulated core/SoC and therefore user space initially knows this
+ * information.
+ */
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 {
 	int r = 0;
 	struct vgic_dist *vgic = &kvm->arch.vgic;
 
-	if (addr & ~KVM_PHYS_MASK)
-		return -E2BIG;
-
-	if (addr & (SZ_4K - 1))
-		return -EINVAL;
-
 	mutex_lock(&kvm->lock);
 	switch (type) {
 	case KVM_VGIC_V2_ADDR_TYPE_DIST:
-		r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
-				       addr, KVM_VGIC_V2_DIST_SIZE);
+		if (write) {
+			r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
+					       *addr, KVM_VGIC_V2_DIST_SIZE);
+		} else {
+			*addr = vgic->vgic_dist_base;
+		}
 		break;
 	case KVM_VGIC_V2_ADDR_TYPE_CPU:
-		r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
-				       addr, KVM_VGIC_V2_CPU_SIZE);
+		if (write) {
+			r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
+					       *addr, KVM_VGIC_V2_CPU_SIZE);
+		} else {
+			*addr = vgic->vgic_cpu_base;
+		}
 		break;
 	default:
 		r = -ENODEV;
@@ -1538,16 +1559,58 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
 
 static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
+	int r;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		u64 addr;
+		unsigned long type = (unsigned long)attr->attr;
+
+		if (copy_from_user(&addr, uaddr, sizeof(addr)))
+			return -EFAULT;
+
+		r = kvm_vgic_addr(dev->kvm, type, &addr, true);
+		return (r == -ENODEV) ? -ENXIO : r;
+	}
+	}
+
 	return -ENXIO;
 }
 
 static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
-	return -ENXIO;
+	int r = -ENXIO;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		u64 addr;
+		unsigned long type = (unsigned long)attr->attr;
+
+		r = kvm_vgic_addr(dev->kvm, type, &addr, false);
+		if (r)
+			return (r == -ENODEV) ? -ENXIO : r;
+
+		if (copy_to_user(uaddr, &addr, sizeof(addr)))
+			return -EFAULT;
+	}
+	}
+
+	return r;
 }
 
 static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
+		switch (attr->attr) {
+		case KVM_VGIC_V2_ADDR_TYPE_DIST:
+		case KVM_VGIC_V2_ADDR_TYPE_CPU:
+			return 0;
+		}
+		break;
+	}
 	return -ENXIO;
 }
 

From 10b316b72589c7ba6f74c7263a289ffbaa1bf80b Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:56 -0700
Subject: [PATCH 0554/1185] irqchip: arm-gic: Define additional MMIO offsets
 and masks

Define CPU interface offsets for the GICC_ABPR, GICC_APR, and GICC_IIDR
registers.  Define distributor registers for the GICD_SPENDSGIR and the
GICD_CPENDSGIR.  KVM/ARM needs to know about these definitions to fully
support save/restore of the VGIC.

Also define some masks and shifts for the various GICH_VMCR fields.

Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 0307e1770fdeff2732cf7a35d0f7f49db67c6621)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/irqchip/arm-gic.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 3e203eb23cc7..4483adb61c88 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -17,6 +17,9 @@
 #define GIC_CPU_EOI			0x10
 #define GIC_CPU_RUNNINGPRI		0x14
 #define GIC_CPU_HIGHPRI			0x18
+#define GIC_CPU_ALIAS_BINPOINT		0x1c
+#define GIC_CPU_ACTIVEPRIO		0xd0
+#define GIC_CPU_IDENT			0xfc
 
 #define GIC_DIST_CTRL			0x000
 #define GIC_DIST_CTR			0x004
@@ -54,6 +57,15 @@
 #define GICH_LR_ACTIVE_BIT		(1 << 29)
 #define GICH_LR_EOI			(1 << 19)
 
+#define GICH_VMCR_CTRL_SHIFT		0
+#define GICH_VMCR_CTRL_MASK		(0x21f << GICH_VMCR_CTRL_SHIFT)
+#define GICH_VMCR_PRIMASK_SHIFT		27
+#define GICH_VMCR_PRIMASK_MASK		(0x1f << GICH_VMCR_PRIMASK_SHIFT)
+#define GICH_VMCR_BINPOINT_SHIFT	21
+#define GICH_VMCR_BINPOINT_MASK		(0x7 << GICH_VMCR_BINPOINT_SHIFT)
+#define GICH_VMCR_ALIAS_BINPOINT_SHIFT	18
+#define GICH_VMCR_ALIAS_BINPOINT_MASK	(0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT)
+
 #define GICH_MISR_EOI			(1 << 0)
 #define GICH_MISR_U			(1 << 1)
 

From 405003b808d4ff3de73eebc18dfb6f1e708626c4 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:56 -0700
Subject: [PATCH 0555/1185] KVM: arm-vgic: Make vgic mmio functions more
 generic

Rename the vgic_ranges array to vgic_dist_ranges to be more specific and
to prepare for handling CPU interface register access as well (for
save/restore of VGIC state).

Pass offset from distributor or interface MMIO base to
find_matching_range function instead of the physical address of the
access in the VM memory map.  This allows other callers unaware of the
VM specifics, but with generic VGIC knowledge to reuse the function.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1006e8cb22e861260688917ca4cfe6cde8ad69eb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 45db48de4282..e2596f618281 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -602,7 +602,7 @@ struct mmio_range {
 			    phys_addr_t offset);
 };
 
-static const struct mmio_range vgic_ranges[] = {
+static const struct mmio_range vgic_dist_ranges[] = {
 	{
 		.base		= GIC_DIST_CTRL,
 		.len		= 12,
@@ -669,14 +669,13 @@ static const struct mmio_range vgic_ranges[] = {
 static const
 struct mmio_range *find_matching_range(const struct mmio_range *ranges,
 				       struct kvm_exit_mmio *mmio,
-				       phys_addr_t base)
+				       phys_addr_t offset)
 {
 	const struct mmio_range *r = ranges;
-	phys_addr_t addr = mmio->phys_addr - base;
 
 	while (r->len) {
-		if (addr >= r->base &&
-		    (addr + mmio->len) <= (r->base + r->len))
+		if (offset >= r->base &&
+		    (offset + mmio->len) <= (r->base + r->len))
 			return r;
 		r++;
 	}
@@ -713,7 +712,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		return true;
 	}
 
-	range = find_matching_range(vgic_ranges, mmio, base);
+	offset = mmio->phys_addr - base;
+	range = find_matching_range(vgic_dist_ranges, mmio, offset);
 	if (unlikely(!range || !range->handle_mmio)) {
 		pr_warn("Unhandled access %d %08llx %d\n",
 			mmio->is_write, mmio->phys_addr, mmio->len);

From f05c65c6daf2d2c33c31ddc3b48df2693f8e9594 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 11 Dec 2013 20:29:11 -0800
Subject: [PATCH 0556/1185] arm/arm64: kvm: Set vcpu->cpu to -1 on vcpu_put

The arch-generic KVM code expects the cpu field of a vcpu to be -1 if
the vcpu is no longer assigned to a cpu.  This is used for the optimized
make_all_cpus_request path and will be used by the vgic code to check
that no vcpus are running.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e9b152cb957cb194437f37e79f0f3c9d34fe53d6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e71f6e15d3cd..169c718ddd90 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -343,6 +343,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * The arch-generic KVM code expects the cpu field of a vcpu to be -1
+	 * if the vcpu is no longer assigned to a cpu.  This is used for the
+	 * optimized make_all_cpus_request path.
+	 */
+	vcpu->cpu = -1;
+
 	kvm_arm_set_running_vcpu(NULL);
 }
 

From 0b3a540dcc4087698c95c6d1bda4071424283345 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 25 Oct 2013 21:17:31 +0100
Subject: [PATCH 0557/1185] KVM: arm-vgic: Add vgic reg access from dev attr

Add infrastructure to handle distributor and cpu interface register
accesses through the KVM_{GET/SET}_DEVICE_ATTR interface by adding the
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS groups
and defining the semantics of the attr field to be the MMIO offset as
specified in the GICv2 specs.

Missing register accesses or other changes in individual register access
functions to support save/restore of the VGIC state is added in
subsequent patches.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 .../virtual/kvm/devices/arm-vgic.txt          |  52 +++++
 arch/arm/include/uapi/asm/kvm.h               |   6 +
 virt/kvm/arm/vgic.c                           | 178 ++++++++++++++++++
 3 files changed, 236 insertions(+)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index c9febb2a0c3e..7f4e91b1316b 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -19,3 +19,55 @@ Groups:
     KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
       Base address in the guest physical address space of the GIC virtual cpu
       interface register mappings.
+
+  KVM_DEV_ARM_VGIC_GRP_DIST_REGS
+  Attributes:
+    The attr field of kvm_device_attr encodes two values:
+    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
+    values:   |    reserved   |   cpu id   |      offset     |
+
+    All distributor regs are (rw, 32-bit)
+
+    The offset is relative to the "Distributor base address" as defined in the
+    GICv2 specs.  Getting or setting such a register has the same effect as
+    reading or writing the register on the actual hardware from the cpu
+    specified with cpu id field.  Note that most distributor fields are not
+    banked, but return the same value regardless of the cpu id used to access
+    the register.
+  Limitations:
+    - Priorities are not implemented, and registers are RAZ/WI
+  Errors:
+    -ENODEV: Getting or setting this register is not yet supported
+    -EBUSY: One or more VCPUs are running
+
+  KVM_DEV_ARM_VGIC_GRP_CPU_REGS
+  Attributes:
+    The attr field of kvm_device_attr encodes two values:
+    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
+    values:   |    reserved   |   cpu id   |      offset     |
+
+    All CPU interface regs are (rw, 32-bit)
+
+    The offset specifies the offset from the "CPU interface base address" as
+    defined in the GICv2 specs.  Getting or setting such a register has the
+    same effect as reading or writing the register on the actual hardware.
+
+    The Active Priorities Registers APRn are implementation defined, so we set a
+    fixed format for our implementation that fits with the model of a "GICv2
+    implementation without the security extensions" which we present to the
+    guest.  This interface always exposes four register APR[0-3] describing the
+    maximum possible 128 preemption levels.  The semantics of the register
+    indicate if any interrupts in a given preemption level are in the active
+    state by setting the corresponding bit.
+
+    Thus, preemption level X has one or more active interrupts if and only if:
+
+      APRn[X mod 32] == 0b1,  where n = X / 32
+
+    Bits for undefined preemption levels are RAZ/WI.
+
+  Limitations:
+    - Priorities are not implemented, and registers are RAZ/WI
+  Errors:
+    -ENODEV: Getting or setting this register is not yet supported
+    -EBUSY: One or more VCPUs are running
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 76a742769e2b..ef0c8785ba16 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -165,6 +165,12 @@ struct kvm_arch_memory_slot {
 
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e2596f618281..88599b585362 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -589,6 +589,20 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
 	return false;
 }
 
+static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset)
+{
+	return false;
+}
+
+static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
+				struct kvm_exit_mmio *mmio,
+				phys_addr_t offset)
+{
+	return false;
+}
+
 /*
  * I would have liked to use the kvm_bus_io_*() API instead, but it
  * cannot cope with banked registers (only the VM pointer is passed
@@ -663,6 +677,16 @@ static const struct mmio_range vgic_dist_ranges[] = {
 		.len		= 4,
 		.handle_mmio	= handle_mmio_sgi_reg,
 	},
+	{
+		.base		= GIC_DIST_SGI_PENDING_CLEAR,
+		.len		= VGIC_NR_SGIS,
+		.handle_mmio	= handle_mmio_sgi_clear,
+	},
+	{
+		.base		= GIC_DIST_SGI_PENDING_SET,
+		.len		= VGIC_NR_SGIS,
+		.handle_mmio	= handle_mmio_sgi_set,
+	},
 	{}
 };
 
@@ -1557,6 +1581,114 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 	return r;
 }
 
+static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
+				 struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	return true;
+}
+
+static const struct mmio_range vgic_cpu_ranges[] = {
+	{
+		.base		= GIC_CPU_CTRL,
+		.len		= 12,
+		.handle_mmio	= handle_cpu_mmio_misc,
+	},
+	{
+		.base		= GIC_CPU_ALIAS_BINPOINT,
+		.len		= 4,
+		.handle_mmio	= handle_cpu_mmio_misc,
+	},
+	{
+		.base		= GIC_CPU_ACTIVEPRIO,
+		.len		= 16,
+		.handle_mmio	= handle_cpu_mmio_misc,
+	},
+	{
+		.base		= GIC_CPU_IDENT,
+		.len		= 4,
+		.handle_mmio	= handle_cpu_mmio_misc,
+	},
+};
+
+static int vgic_attr_regs_access(struct kvm_device *dev,
+				 struct kvm_device_attr *attr,
+				 u32 *reg, bool is_write)
+{
+	const struct mmio_range *r = NULL, *ranges;
+	phys_addr_t offset;
+	int ret, cpuid, c;
+	struct kvm_vcpu *vcpu, *tmp_vcpu;
+	struct vgic_dist *vgic;
+	struct kvm_exit_mmio mmio;
+
+	offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+	cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
+		KVM_DEV_ARM_VGIC_CPUID_SHIFT;
+
+	mutex_lock(&dev->kvm->lock);
+
+	if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	vcpu = kvm_get_vcpu(dev->kvm, cpuid);
+	vgic = &dev->kvm->arch.vgic;
+
+	mmio.len = 4;
+	mmio.is_write = is_write;
+	if (is_write)
+		mmio_data_write(&mmio, ~0, *reg);
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+		mmio.phys_addr = vgic->vgic_dist_base + offset;
+		ranges = vgic_dist_ranges;
+		break;
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+		mmio.phys_addr = vgic->vgic_cpu_base + offset;
+		ranges = vgic_cpu_ranges;
+		break;
+	default:
+		BUG();
+	}
+	r = find_matching_range(ranges, &mmio, offset);
+
+	if (unlikely(!r || !r->handle_mmio)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+
+	spin_lock(&vgic->lock);
+
+	/*
+	 * Ensure that no other VCPU is running by checking the vcpu->cpu
+	 * field.  If no other VPCUs are running we can safely access the VGIC
+	 * state, because even if another VPU is run after this point, that
+	 * VCPU will not touch the vgic state, because it will block on
+	 * getting the vgic->lock in kvm_vgic_sync_hwstate().
+	 */
+	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
+		if (unlikely(tmp_vcpu->cpu != -1)) {
+			ret = -EBUSY;
+			goto out_vgic_unlock;
+		}
+	}
+
+	offset -= r->base;
+	r->handle_mmio(vcpu, &mmio, offset);
+
+	if (!is_write)
+		*reg = mmio_data_read(&mmio, ~0);
+
+	ret = 0;
+out_vgic_unlock:
+	spin_unlock(&vgic->lock);
+out:
+	mutex_unlock(&dev->kvm->lock);
+	return ret;
+}
+
 static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	int r;
@@ -1573,6 +1705,18 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		r = kvm_vgic_addr(dev->kvm, type, &addr, true);
 		return (r == -ENODEV) ? -ENXIO : r;
 	}
+
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		u32 reg;
+
+		if (get_user(reg, uaddr))
+			return -EFAULT;
+
+		return vgic_attr_regs_access(dev, attr, &reg, true);
+	}
+
 	}
 
 	return -ENXIO;
@@ -1594,14 +1738,42 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
 		if (copy_to_user(uaddr, &addr, sizeof(addr)))
 			return -EFAULT;
+		break;
 	}
+
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		u32 reg = 0;
+
+		r = vgic_attr_regs_access(dev, attr, &reg, false);
+		if (r)
+			return r;
+		r = put_user(reg, uaddr);
+		break;
+	}
+
 	}
 
 	return r;
 }
 
+static int vgic_has_attr_regs(const struct mmio_range *ranges,
+			      phys_addr_t offset)
+{
+	struct kvm_exit_mmio dev_attr_mmio;
+
+	dev_attr_mmio.len = 4;
+	if (find_matching_range(ranges, &dev_attr_mmio, offset))
+		return 0;
+	else
+		return -ENXIO;
+}
+
 static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
+	phys_addr_t offset;
+
 	switch (attr->group) {
 	case KVM_DEV_ARM_VGIC_GRP_ADDR:
 		switch (attr->attr) {
@@ -1610,6 +1782,12 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 			return 0;
 		}
 		break;
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+		return vgic_has_attr_regs(vgic_dist_ranges, offset);
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+		return vgic_has_attr_regs(vgic_cpu_ranges, offset);
 	}
 	return -ENXIO;
 }

From 944b8a6f9f400ded36fcdc61acd801fbd7413985 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 15 Nov 2013 20:51:31 -0800
Subject: [PATCH 0558/1185] KVM: arm-vgic: Support unqueueing of LRs to the
 dist

To properly access the VGIC state from user space it is very unpractical
to have to loop through all the LRs in all register access functions.
Instead, support moving all pending state from LRs to the distributor,
but leave active state LRs alone.

Note that to accurately present the active and pending state to VCPUs
reading these distributor registers from a live VM, we would have to
stop all other VPUs than the calling VCPU and ask each CPU to unqueue
their LR state onto the distributor and add fields to track active state
on the distributor side as well.  We don't have any users of such
functionality yet and there are other inaccuracies of the GIC emulation,
so don't provide accurate synchronized access to this state just yet.
However, when the time comes, having this function should help.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit cbd333a4bfd0d93bba36d46a0e4e7979228873a6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 88 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 83 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 88599b585362..d08ba28e729a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -589,6 +589,80 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
 	return false;
 }
 
+#define LR_CPUID(lr)	\
+	(((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
+#define LR_IRQID(lr)	\
+	((lr) & GICH_LR_VIRTUALID)
+
+static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
+{
+	clear_bit(lr_nr, vgic_cpu->lr_used);
+	vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE;
+	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+}
+
+/**
+ * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
+ * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
+ *
+ * Move any pending IRQs that have already been assigned to LRs back to the
+ * emulated distributor state so that the complete emulated state can be read
+ * from the main emulation structures without investigating the LRs.
+ *
+ * Note that IRQs in the active state in the LRs get their pending state moved
+ * to the distributor but the active state stays in the LRs, because we don't
+ * track the active state on the distributor side.
+ */
+static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	int vcpu_id = vcpu->vcpu_id;
+	int i, irq, source_cpu;
+	u32 *lr;
+
+	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+		lr = &vgic_cpu->vgic_lr[i];
+		irq = LR_IRQID(*lr);
+		source_cpu = LR_CPUID(*lr);
+
+		/*
+		 * There are three options for the state bits:
+		 *
+		 * 01: pending
+		 * 10: active
+		 * 11: pending and active
+		 *
+		 * If the LR holds only an active interrupt (not pending) then
+		 * just leave it alone.
+		 */
+		if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT)
+			continue;
+
+		/*
+		 * Reestablish the pending state on the distributor and the
+		 * CPU interface.  It may have already been pending, but that
+		 * is fine, then we are only setting a few bits that were
+		 * already set.
+		 */
+		vgic_dist_irq_set(vcpu, irq);
+		if (irq < VGIC_NR_SGIS)
+			dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu;
+		*lr &= ~GICH_LR_PENDING_BIT;
+
+		/*
+		 * If there's no state left on the LR (it could still be
+		 * active), then the LR does not hold any useful info and can
+		 * be marked as free for other use.
+		 */
+		if (!(*lr & GICH_LR_STATE))
+			vgic_retire_lr(i, irq, vgic_cpu);
+
+		/* Finally update the VGIC state. */
+		vgic_update_state(vcpu->kvm);
+	}
+}
+
 static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
 				  struct kvm_exit_mmio *mmio,
 				  phys_addr_t offset)
@@ -848,8 +922,6 @@ static void vgic_update_state(struct kvm *kvm)
 	}
 }
 
-#define LR_CPUID(lr)	\
-	(((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
 #define MK_LR_PEND(src, irq)	\
 	(GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
 
@@ -871,9 +943,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 		int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
 
 		if (!vgic_irq_is_enabled(vcpu, irq)) {
-			vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-			clear_bit(lr, vgic_cpu->lr_used);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE;
+			vgic_retire_lr(lr, irq, vgic_cpu);
 			if (vgic_irq_is_active(vcpu, irq))
 				vgic_irq_clear_active(vcpu, irq);
 		}
@@ -1675,6 +1745,14 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 		}
 	}
 
+	/*
+	 * Move all pending IRQs from the LRs on all VCPUs so the pending
+	 * state can be properly represented in the register state accessible
+	 * through this API.
+	 */
+	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
+		vgic_unqueue_irqs(tmp_vcpu);
+
 	offset -= r->base;
 	r->handle_mmio(vcpu, &mmio, offset);
 

From 8a280d12e7988c920e8805de50c0a7e129032d1b Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 25 Oct 2013 21:22:31 +0100
Subject: [PATCH 0559/1185] KVM: arm-vgic: Add GICD_SPENDSGIR and
 GICD_CPENDSGIR handlers

Handle MMIO accesses to the two registers which should support both the
case where the VMs want to read/write either of these registers and the
case where user space reads/writes these registers to do save/restore of
the VGIC state.

Note that the added complexity compared to simple set/clear enable
registers stems from the bookkeping of source cpu ids.  It may be
possible to change the underlying data structure to simplify the
complexity, but since this is not in the critical path at all, this will
do.

Also note that reading this register from a live guest will not be
accurate compared to on hardware, because some state may be living on
the CPU LRs and the only way to give a consistent read would be to force
stop all the VCPUs and request them to unqueu the LR state onto the
distributor.  Until we have an actual user of live reading this
register, we can live with the difference.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 90a5355ee7639e92c0492ec592cba5c31bd80687)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 70 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 66 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index d08ba28e729a..e59aaa4c64e5 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -663,18 +663,80 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 	}
 }
 
-static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
-				  struct kvm_exit_mmio *mmio,
-				  phys_addr_t offset)
+/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
+static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
+					struct kvm_exit_mmio *mmio,
+					phys_addr_t offset)
 {
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	int sgi;
+	int min_sgi = (offset & ~0x3) * 4;
+	int max_sgi = min_sgi + 3;
+	int vcpu_id = vcpu->vcpu_id;
+	u32 reg = 0;
+
+	/* Copy source SGIs from distributor side */
+	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
+		int shift = 8 * (sgi - min_sgi);
+		reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
+	}
+
+	mmio_data_write(mmio, ~0, reg);
 	return false;
 }
 
+static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
+					 struct kvm_exit_mmio *mmio,
+					 phys_addr_t offset, bool set)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	int sgi;
+	int min_sgi = (offset & ~0x3) * 4;
+	int max_sgi = min_sgi + 3;
+	int vcpu_id = vcpu->vcpu_id;
+	u32 reg;
+	bool updated = false;
+
+	reg = mmio_data_read(mmio, ~0);
+
+	/* Clear pending SGIs on the distributor */
+	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
+		u8 mask = reg >> (8 * (sgi - min_sgi));
+		if (set) {
+			if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
+				updated = true;
+			dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
+		} else {
+			if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
+				updated = true;
+			dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
+		}
+	}
+
+	if (updated)
+		vgic_update_state(vcpu->kvm);
+
+	return updated;
+}
+
 static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
 				struct kvm_exit_mmio *mmio,
 				phys_addr_t offset)
 {
-	return false;
+	if (!mmio->is_write)
+		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
+	else
+		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
+}
+
+static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset)
+{
+	if (!mmio->is_write)
+		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
+	else
+		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
 }
 
 /*

From a0a11ba68172ffeb62b0a7be8dc4162a0fc2ad6d Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:57 -0700
Subject: [PATCH 0560/1185] KVM: arm-vgic: Support CPU interface reg access

Implement support for the CPU interface register access driven by MMIO
address offsets from the CPU interface base address.  Useful for user
space to support save/restore of the VGIC state.

This commit adds support only for the same logic as the current VGIC
support, and no more.  For example, the active priority registers are
handled as RAZ/WI, just like setting priorities on the emulated
distributor.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit fa20f5aea56f271f83e91b9cde00f043a5a14990)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 81 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 73 insertions(+), 8 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e59aaa4c64e5..be456ce264d0 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -71,6 +71,10 @@
 #define VGIC_ADDR_UNDEF		(-1)
 #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
 
+#define PRODUCT_ID_KVM		0x4b	/* ASCII code K */
+#define IMPLEMENTER_ARM		0x43b
+#define GICC_ARCH_VERSION_V2	0x2
+
 /* Physical address of vgic virtual cpu interface */
 static phys_addr_t vgic_vcpu_base;
 
@@ -312,7 +316,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
 	u32 word_offset = offset & 3;
 
 	switch (offset & ~3) {
-	case 0:			/* CTLR */
+	case 0:			/* GICD_CTLR */
 		reg = vcpu->kvm->arch.vgic.enabled;
 		vgic_reg_access(mmio, &reg, word_offset,
 				ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
@@ -323,15 +327,15 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
 		}
 		break;
 
-	case 4:			/* TYPER */
+	case 4:			/* GICD_TYPER */
 		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
 		reg |= (VGIC_NR_IRQS >> 5) - 1;
 		vgic_reg_access(mmio, &reg, word_offset,
 				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
 		break;
 
-	case 8:			/* IIDR */
-		reg = 0x4B00043B;
+	case 8:			/* GICD_IIDR */
+		reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
 		vgic_reg_access(mmio, &reg, word_offset,
 				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
 		break;
@@ -1716,9 +1720,70 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
 				 struct kvm_exit_mmio *mmio, phys_addr_t offset)
 {
-	return true;
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	u32 reg, mask = 0, shift = 0;
+	bool updated = false;
+
+	switch (offset & ~0x3) {
+	case GIC_CPU_CTRL:
+		mask = GICH_VMCR_CTRL_MASK;
+		shift = GICH_VMCR_CTRL_SHIFT;
+		break;
+	case GIC_CPU_PRIMASK:
+		mask = GICH_VMCR_PRIMASK_MASK;
+		shift = GICH_VMCR_PRIMASK_SHIFT;
+		break;
+	case GIC_CPU_BINPOINT:
+		mask = GICH_VMCR_BINPOINT_MASK;
+		shift = GICH_VMCR_BINPOINT_SHIFT;
+		break;
+	case GIC_CPU_ALIAS_BINPOINT:
+		mask = GICH_VMCR_ALIAS_BINPOINT_MASK;
+		shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+		break;
+	}
+
+	if (!mmio->is_write) {
+		reg = (vgic_cpu->vgic_vmcr & mask) >> shift;
+		mmio_data_write(mmio, ~0, reg);
+	} else {
+		reg = mmio_data_read(mmio, ~0);
+		reg = (reg << shift) & mask;
+		if (reg != (vgic_cpu->vgic_vmcr & mask))
+			updated = true;
+		vgic_cpu->vgic_vmcr &= ~mask;
+		vgic_cpu->vgic_vmcr |= reg;
+	}
+	return updated;
 }
 
+static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
+			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
+}
+
+static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset)
+{
+	u32 reg;
+
+	if (mmio->is_write)
+		return false;
+
+	/* GICC_IIDR */
+	reg = (PRODUCT_ID_KVM << 20) |
+	      (GICC_ARCH_VERSION_V2 << 16) |
+	      (IMPLEMENTER_ARM << 0);
+	mmio_data_write(mmio, ~0, reg);
+	return false;
+}
+
+/*
+ * CPU Interface Register accesses - these are not accessed by the VM, but by
+ * user space for saving and restoring VGIC state.
+ */
 static const struct mmio_range vgic_cpu_ranges[] = {
 	{
 		.base		= GIC_CPU_CTRL,
@@ -1728,17 +1793,17 @@ static const struct mmio_range vgic_cpu_ranges[] = {
 	{
 		.base		= GIC_CPU_ALIAS_BINPOINT,
 		.len		= 4,
-		.handle_mmio	= handle_cpu_mmio_misc,
+		.handle_mmio	= handle_mmio_abpr,
 	},
 	{
 		.base		= GIC_CPU_ACTIVEPRIO,
 		.len		= 16,
-		.handle_mmio	= handle_cpu_mmio_misc,
+		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_CPU_IDENT,
 		.len		= 4,
-		.handle_mmio	= handle_cpu_mmio_misc,
+		.handle_mmio	= handle_cpu_mmio_ident,
 	},
 };
 

From c23fe6933f79809bae75b5e8c16c36d1ffc79e41 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Thu, 12 Dec 2013 16:12:22 +0000
Subject: [PATCH 0561/1185] arm64: KVM: Add Kconfig option for max VCPUs
 per-Guest

Current max VCPUs per-Guest is set to 4 which is preventing
us from creating a Guest (or VM) with 8 VCPUs on Host (e.g.
X-Gene Storm SOC) with 8 Host CPUs.

The correct value of max VCPUs per-Guest should be same as
the max CPUs supported by GICv2 which is 8 but, increasing
value of max VCPUs per-Guest can make things slower hence
we add Kconfig option to let KVM users select appropriate
max VCPUs per-Guest.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit da7814700a0c408bead58ce4714b7625ffbaade1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  7 ++++++-
 arch/arm64/kvm/Kconfig            | 11 +++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5d85a02d1231..0a1d69751562 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -26,7 +26,12 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 
-#define KVM_MAX_VCPUS 4
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
+#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 4480ab339a00..8ba85e9ea388 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -36,6 +36,17 @@ config KVM_ARM_HOST
 	---help---
 	  Provides host support for ARM processors.
 
+config KVM_ARM_MAX_VCPUS
+	int "Number maximum supported virtual CPUs per VM"
+	depends on KVM_ARM_HOST
+	default 4
+	help
+	  Static number of max supported virtual CPUs per VM.
+
+	  If you choose a high number, the vcpu structures will be quite
+	  large, so only choose a reasonable number that you expect to
+	  actually use.
+
 config KVM_ARM_VGIC
 	bool
 	depends on KVM_ARM_HOST && OF

From af4604a0f9c909b60d043cebb20110804639460e Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Thu, 14 Nov 2013 15:20:08 +0000
Subject: [PATCH 0562/1185] arm64: KVM: Support X-Gene guest VCPU on APM X-Gene
 host

This patch allows us to have X-Gene guest VCPU when using KVM arm64
on APM X-Gene host.

We add KVM_ARM_TARGET_XGENE_POTENZA for X-Gene Potenza compatible
guest VCPU and we return KVM_ARM_TARGET_XGENE_POTENZA in kvm_target_cpu()
when running on X-Gene host with Potenza core.

[maz: sanitized the commit log]

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit e28100bd8ed9e37b7cd4578140a1e7f95bd40835)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/uapi/asm/kvm.h    |  3 ++-
 arch/arm64/kvm/guest.c               | 32 +++++++++++++++++-----------
 arch/arm64/kvm/sys_regs_generic_v8.c |  3 +++
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 7c25ca8b02b3..495ab6f84a61 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -55,8 +55,9 @@ struct kvm_regs {
 #define KVM_ARM_TARGET_AEM_V8		0
 #define KVM_ARM_TARGET_FOUNDATION_V8	1
 #define KVM_ARM_TARGET_CORTEX_A57	2
+#define KVM_ARM_TARGET_XGENE_POTENZA	3
 
-#define KVM_ARM_NUM_TARGETS		3
+#define KVM_ARM_NUM_TARGETS		4
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 3f0731e53274..08745578d54d 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -207,20 +207,26 @@ int __attribute_const__ kvm_target_cpu(void)
 	unsigned long implementor = read_cpuid_implementor();
 	unsigned long part_number = read_cpuid_part_number();
 
-	if (implementor != ARM_CPU_IMP_ARM)
-		return -EINVAL;
+	switch (implementor) {
+	case ARM_CPU_IMP_ARM:
+		switch (part_number) {
+		case ARM_CPU_PART_AEM_V8:
+			return KVM_ARM_TARGET_AEM_V8;
+		case ARM_CPU_PART_FOUNDATION:
+			return KVM_ARM_TARGET_FOUNDATION_V8;
+		case ARM_CPU_PART_CORTEX_A57:
+			return KVM_ARM_TARGET_CORTEX_A57;
+		};
+		break;
+	case ARM_CPU_IMP_APM:
+		switch (part_number) {
+		case APM_CPU_PART_POTENZA:
+			return KVM_ARM_TARGET_XGENE_POTENZA;
+		};
+		break;
+	};
 
-	switch (part_number) {
-	case ARM_CPU_PART_AEM_V8:
-		return KVM_ARM_TARGET_AEM_V8;
-	case ARM_CPU_PART_FOUNDATION:
-		return KVM_ARM_TARGET_FOUNDATION_V8;
-	case ARM_CPU_PART_CORTEX_A57:
-		/* Currently handled by the generic backend */
-		return KVM_ARM_TARGET_CORTEX_A57;
-	default:
-		return -EINVAL;
-	}
+	return -EINVAL;
 }
 
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 4268ab9356b1..8fe6f76b0edc 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -90,6 +90,9 @@ static int __init sys_reg_genericv8_init(void)
 					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
 					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
+					  &genericv8_target_table);
+
 	return 0;
 }
 late_initcall(sys_reg_genericv8_init);

From dfbd506266a97ad252336ea96e41791fdd0c0828 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Thu, 12 Dec 2013 16:12:23 +0000
Subject: [PATCH 0563/1185] arm64: KVM: Force undefined exception for Guest SMC
 intructions

The SMC-based PSCI emulation for Guest is going to be very different
from the in-kernel HVC-based PSCI emulation hence for now just inject
undefined exception when Guest executes SMC instruction.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit e5cf9dcdbfd26cd4e1991db08755da900454efeb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/handle_exit.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 42a0f1bddfe7..7bc41eab4c64 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -39,9 +39,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
-		return 1;
-
 	kvm_inject_undefined(vcpu);
 	return 1;
 }

From 2aad15258f0795432073b14b616a634e452432f0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sun, 29 Dec 2013 12:12:29 -0800
Subject: [PATCH 0564/1185] kvm: make local functions static

Running 'make namespacecheck' found lots of functions that
should be declared static, since only used in one file.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
(cherry picked from commit 7940876e1330671708186ac3386aa521ffb5c182)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h | 16 ----------------
 virt/kvm/ioapic.c        |  2 +-
 virt/kvm/ioapic.h        |  1 -
 virt/kvm/kvm_main.c      | 35 ++++++++++++++++++-----------------
 4 files changed, 19 insertions(+), 35 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1dfc17255ee0..95625d5cb3f0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -449,8 +449,6 @@ void kvm_exit(void);
 
 void kvm_get_kvm(struct kvm *kvm);
 void kvm_put_kvm(struct kvm *kvm);
-void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
-		     u64 last_generation);
 
 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
 {
@@ -523,7 +521,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
-void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
@@ -535,7 +532,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
 
-void kvm_release_pfn_dirty(pfn_t pfn);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
 void kvm_set_pfn_accessed(pfn_t pfn);
@@ -562,8 +558,6 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
-			     gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
@@ -590,8 +584,6 @@ int kvm_get_dirty_log(struct kvm *kvm,
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				struct kvm_dirty_log *log);
 
-int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem);
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 			bool line_status);
 long kvm_arch_vm_ioctl(struct file *filp,
@@ -639,8 +631,6 @@ void kvm_arch_check_processor_compat(void *rtn);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 
-void kvm_free_physmem(struct kvm *kvm);
-
 void *kvm_kvzalloc(unsigned long size);
 void kvm_kvfree(const void *addr);
 
@@ -1067,12 +1057,6 @@ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
 static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
 {
 }
-
-static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
-{
-	return true;
-}
-
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
 #endif
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 2d682977ce82..ce9ed99ad7dc 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -520,7 +520,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	return 0;
 }
 
-void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
+static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 {
 	int i;
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 615d8c995c3c..90d43e95dcf8 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -91,7 +91,6 @@ void kvm_ioapic_destroy(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 		       int level, bool line_status);
 void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
-void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e9a43b6455be..2162bd5d17d7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,6 +95,12 @@ static int hardware_enable_all(void);
 static void hardware_disable_all(void);
 
 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+static void update_memslots(struct kvm_memslots *slots,
+			    struct kvm_memory_slot *new, u64 last_generation);
+
+static void kvm_release_pfn_dirty(pfn_t pfn);
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+				    struct kvm_memory_slot *memslot, gfn_t gfn);
 
 bool kvm_rebooting;
 EXPORT_SYMBOL_GPL(kvm_rebooting);
@@ -552,7 +558,7 @@ static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
 	free->npages = 0;
 }
 
-void kvm_free_physmem(struct kvm *kvm)
+static void kvm_free_physmem(struct kvm *kvm)
 {
 	struct kvm_memslots *slots = kvm->memslots;
 	struct kvm_memory_slot *memslot;
@@ -674,8 +680,9 @@ static void sort_memslots(struct kvm_memslots *slots)
 		slots->id_to_index[slots->memslots[i].id] = i;
 }
 
-void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
-		     u64 last_generation)
+static void update_memslots(struct kvm_memslots *slots,
+			    struct kvm_memory_slot *new,
+			    u64 last_generation)
 {
 	if (new) {
 		int id = new->id;
@@ -923,8 +930,8 @@ int kvm_set_memory_region(struct kvm *kvm,
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
 
-int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem)
+static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+					  struct kvm_userspace_memory_region *mem)
 {
 	if (mem->slot >= KVM_USER_MEM_SLOTS)
 		return -EINVAL;
@@ -1045,7 +1052,7 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
 }
 
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
-				 gfn_t gfn)
+					gfn_t gfn)
 {
 	return gfn_to_hva_many(slot, gfn, NULL);
 }
@@ -1385,18 +1392,11 @@ void kvm_release_page_dirty(struct page *page)
 }
 EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
 
-void kvm_release_pfn_dirty(pfn_t pfn)
+static void kvm_release_pfn_dirty(pfn_t pfn)
 {
 	kvm_set_pfn_dirty(pfn);
 	kvm_release_pfn_clean(pfn);
 }
-EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
-
-void kvm_set_page_dirty(struct page *page)
-{
-	kvm_set_pfn_dirty(page_to_pfn(page));
-}
-EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
 
 void kvm_set_pfn_dirty(pfn_t pfn)
 {
@@ -1638,8 +1638,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
-			     gfn_t gfn)
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+				    struct kvm_memory_slot *memslot,
+				    gfn_t gfn)
 {
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
@@ -1753,7 +1754,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
  *  locking does not harm. It may result in trying to yield to  same VCPU, fail
  *  and continue with next VCPU and so on.
  */
-bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 {
 	bool eligible;
 

From c9dad332f97620c9221a724e20d2bc4db51bb7a0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 13 Dec 2013 16:56:06 +0000
Subject: [PATCH 0565/1185] arm/arm64: KVM: relax the requirements of VMA
 alignment for THP

The THP code in KVM/ARM is a bit restrictive in not allowing a THP
to be used if the VMA is not 2MB aligned. Actually, it is not so much
the VMA that matters, but the associated memslot:

A process can perfectly mmap a region with no particular alignment
restriction, and then pass a 2MB aligned address to KVM. In this
case, KVM will only use this 2MB aligned region, and will ignore
the range between vma->vm_start and memslot->userspace_addr.

It can also choose to place this memslot at whatever alignment it
wants in the IPA space. In the end, what matters is the relative
alignment of the user space and IPA mappings with respect to a
2M page. They absolutely must be the same if you want to use THP.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 136d737fd20102f1be9b02356590fd55e3a40d0e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 659db0ed1370..7789857d1470 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -667,14 +667,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
 	} else {
 		/*
-		 * Pages belonging to VMAs not aligned to the PMD mapping
-		 * granularity cannot be mapped using block descriptors even
-		 * if the pages belong to a THP for the process, because the
-		 * stage-2 block descriptor will cover more than a single THP
-		 * and we loose atomicity for unmapping, updates, and splits
-		 * of the THP or other pages in the stage-2 block range.
+		 * Pages belonging to memslots that don't have the same
+		 * alignment for userspace and IPA cannot be mapped using
+		 * block descriptors even if the pages belong to a THP for
+		 * the process, because the stage-2 block descriptor will
+		 * cover more than a single THP and we loose atomicity for
+		 * unmapping, updates, and splits of the THP or other pages
+		 * in the stage-2 block range.
 		 */
-		if (vma->vm_start & ~PMD_MASK)
+		if ((memslot->userspace_addr & ~PMD_MASK) !=
+		    ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
 			force_pte = true;
 	}
 	up_read(&current->mm->mmap_sem);

From 3b254a9515ba76de3746f4c28041845ba27b3745 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Tue, 7 Jan 2014 13:45:15 +0530
Subject: [PATCH 0566/1185] KVM: ARM: Remove duplicate include

trace.h was included twice. Remove duplicate inclusion.

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 61466710de078c697106fa5b70ec7afc9feab520)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/handle_exit.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index a92079011a83..0de91fc6de0f 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -26,8 +26,6 @@
 
 #include "trace.h"
 
-#include "trace.h"
-
 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)

From 328c366d43fda9d85c758c455b5f522747fa3d7e Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Thu, 9 Jan 2014 18:43:16 -0600
Subject: [PATCH 0567/1185] kvm: Provide kvm_vcpu_eligible_for_directed_yield()
 stub

Commit 7940876e1330671708186ac3386aa521ffb5c182 ("kvm: make local
functions static") broke KVM PPC builds due to removing (rather than
moving) the stub version of kvm_vcpu_eligible_for_directed_yield().

This patch reintroduces it.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Alexander Graf <agraf@suse.de>
[Move the #ifdef inside the function. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 4a55dd7273c95b4a19fbcf0ae1bbd1cfd09dfc36)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2162bd5d17d7..2d57b23683a4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1731,7 +1731,6 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
 
-#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 /*
  * Helper that checks whether a VCPU is eligible for directed yield.
  * Most eligible candidate to yield is decided by following heuristics:
@@ -1756,6 +1755,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
  */
 static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 {
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 	bool eligible;
 
 	eligible = !vcpu->spin_loop.in_spin_loop ||
@@ -1766,8 +1766,10 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 		kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
 
 	return eligible;
-}
+#else
+	return true;
 #endif
+}
 
 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 {

From 031cbab9d6d2a6beca9e19472a40f5e22ecfbe07 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 29 Jan 2014 16:16:39 +0300
Subject: [PATCH 0568/1185] KVM: return an error code in
 kvm_vm_ioctl_register_coalesced_mmio()

If kvm_io_bus_register_dev() fails then it returns success but it should
return an error code.

I also did a little cleanup like removing an impossible NULL test.

Cc: stable@vger.kernel.org
Fixes: 2b3c246a682c ('KVM: Make coalesced mmio use a device per zone')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit aac5c4226e7136c331ed384c25d5560204da10a0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/coalesced_mmio.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 88b2fe3ddf42..00d86427af0f 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -154,17 +154,13 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 	list_add_tail(&dev->list, &kvm->coalesced_zones);
 	mutex_unlock(&kvm->slots_lock);
 
-	return ret;
+	return 0;
 
 out_free_dev:
 	mutex_unlock(&kvm->slots_lock);
-
 	kfree(dev);
 
-	if (dev == NULL)
-		return -ENXIO;
-
-	return 0;
+	return ret;
 }
 
 int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,

From e9aa8f627c7c504c3880acdfa57849bd3d75dc83 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Thu, 6 Jun 2013 15:32:37 +0200
Subject: [PATCH 0569/1185] KVM: async_pf: Provide additional direct page
 notification

By setting a Kconfig option, the architecture can control when
guest notifications will be presented by the apf backend.
There is the default batch mechanism, working as before, where the vcpu
thread should pull in this information.
Opposite to this, there is now the direct mechanism, that will push the
information to the guest.
This way s390 can use an already existing architecture interface.

Still the vcpu thread should call check_completion to cleanup leftovers.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
(cherry picked from commit e0ead41a6dac09f86675ce07a66e4b253a9b7bd5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/x86/kvm/mmu.c       |  2 +-
 include/linux/kvm_host.h |  2 +-
 virt/kvm/Kconfig         |  4 ++++
 virt/kvm/async_pf.c      | 20 ++++++++++++++++++--
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3c1877bbfe6a..f47d2e11108e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3232,7 +3232,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 	arch.direct_map = vcpu->arch.mmu.direct_map;
 	arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
 
-	return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
+	return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
 }
 
 static bool can_do_async_pf(struct kvm_vcpu *vcpu)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 95625d5cb3f0..f6801d10e04c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -180,7 +180,7 @@ struct kvm_async_pf {
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
 void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 		       struct kvm_arch_async_pf *arch);
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index fbe1a48bd629..13f2d19793e3 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -22,6 +22,10 @@ config KVM_MMIO
 config KVM_ASYNC_PF
        bool
 
+# Toggle to switch between direct notification and batch job
+config KVM_ASYNC_PF_SYNC
+       bool
+
 config HAVE_KVM_MSI
        bool
 
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8631d9c14320..00980ab02c45 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -28,6 +28,21 @@
 #include "async_pf.h"
 #include <trace/events/kvm.h>
 
+static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
+					       struct kvm_async_pf *work)
+{
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+	kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
+						struct kvm_async_pf *work)
+{
+#ifndef CONFIG_KVM_ASYNC_PF_SYNC
+	kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+
 static struct kmem_cache *async_pf_cache;
 
 int kvm_async_pf_init(void)
@@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work)
 	down_read(&mm->mmap_sem);
 	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
+	kvm_async_page_present_sync(vcpu, apf);
 	unuse_mm(mm);
 
 	spin_lock(&vcpu->async_pf.lock);
@@ -138,7 +154,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 	}
 }
 
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 		       struct kvm_arch_async_pf *arch)
 {
 	struct kvm_async_pf *work;
@@ -159,7 +175,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 	work->wakeup_all = false;
 	work->vcpu = vcpu;
 	work->gva = gva;
-	work->addr = gfn_to_hva(vcpu->kvm, gfn);
+	work->addr = hva;
 	work->arch = *arch;
 	work->mm = current->mm;
 	atomic_inc(&work->mm->mm_count);

From 65d762a03f29316956725f348546794de0160fcc Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Tue, 3 Sep 2013 12:31:16 +0200
Subject: [PATCH 0570/1185] KVM: async_pf: Allow to wait for outstanding work

On s390 we are not able to cancel work. Instead we will flush the work and wait for
completion.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
(cherry picked from commit 9f2ceda49c6b8827c795731c204f6c2587886e2c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 00980ab02c45..889aad022014 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -113,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 			list_entry(vcpu->async_pf.queue.next,
 				   typeof(*work), queue);
 		list_del(&work->queue);
+
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+		flush_work(&work->work);
+#else
 		if (cancel_work_sync(&work->work)) {
 			mmdrop(work->mm);
 			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
 			kmem_cache_free(async_pf_cache, work);
 		}
+#endif
 	}
 
 	spin_lock(&vcpu->async_pf.lock);

From c76f5b54ec88ab03043cdd2ab2c148daa0bccd7c Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Fri, 31 Jan 2014 14:32:46 +0100
Subject: [PATCH 0571/1185] KVM: async_pf: Add missing call for async page
 present

Commit KVM: async_pf: Provide additional direct page notification
missed the call from kvm_check_async_pf_completion to the new introduced function.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 1179ba539541347d5427cde8bcfdaa5ead14f3aa)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 889aad022014..10df100c4514 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -151,7 +151,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 		spin_unlock(&vcpu->async_pf.lock);
 
 		kvm_arch_async_page_ready(vcpu, work);
-		kvm_arch_async_page_present(vcpu, work);
+		kvm_async_page_present_async(vcpu, work);
 
 		list_del(&work->queue);
 		vcpu->async_pf.queued--;

From 8cf654c3d7b883a4ffc64e6301b4af5a9f3b6b93 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 5 Feb 2014 10:24:12 +0000
Subject: [PATCH 0572/1185] arm64: fix typo: s/SERRROR/SERROR/

Somehow SERROR has acquired an additional 'R' in a couple of headers.
This patch removes them before they spread further. As neither instance
is in use yet, no other sites need to be fixed up.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit bfb67a5606376bb32cb6f93dc05cda2e8c2038a5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index c98ef4771c73..0eb398655378 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -231,7 +231,7 @@
 #define ESR_EL2_EC_SP_ALIGN	(0x26)
 #define ESR_EL2_EC_FP_EXC32	(0x28)
 #define ESR_EL2_EC_FP_EXC64	(0x2C)
-#define ESR_EL2_EC_SERRROR	(0x2F)
+#define ESR_EL2_EC_SERROR	(0x2F)
 #define ESR_EL2_EC_BREAKPT	(0x30)
 #define ESR_EL2_EC_BREAKPT_HYP	(0x31)
 #define ESR_EL2_EC_SOFTSTP	(0x32)

From 4635cac7cb81e457a59eb1cbea32d828f6c25a05 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 8 Feb 2014 08:51:57 +0100
Subject: [PATCH 0573/1185] asmlinkage, kvm: Make kvm_rebooting visible

kvm_rebooting is referenced from assembler code, thus
needs to be visible.

Cc: Gleb Natapov <gleb@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1391845930-28580-1-git-send-email-ak@linux.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
(cherry picked from commit 52480137d82062bb8d0fb778cb9934667958e367)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2d57b23683a4..88660cbe70a4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,7 +102,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn);
 static void mark_page_dirty_in_slot(struct kvm *kvm,
 				    struct kvm_memory_slot *memslot, gfn_t gfn);
 
-bool kvm_rebooting;
+__visible bool kvm_rebooting;
 EXPORT_SYMBOL_GPL(kvm_rebooting);
 
 static bool largepages_enabled = true;

From 312c49473d6543ad3888172fc63992e5a326c622 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sun, 2 Feb 2014 13:41:02 -0800
Subject: [PATCH 0574/1185] arm64: KVM: Add VGIC device control for arm64

This fixes the build breakage introduced by
c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f and adds support for the device
control API and save/restore of the VGIC state for ARMv8.

The defines were simply missing from the arm64 header files and
uaccess.h must be implicitly imported from somewhere else on arm.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 2a2f3e269c75edf916de5967079069aeb6a601cb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/uapi/asm/kvm.h | 9 +++++++++
 virt/kvm/arm/vgic.c               | 1 +
 2 files changed, 10 insertions(+)

diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 495ab6f84a61..eaf54a30bedc 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -148,6 +148,15 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
 #define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
 
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
 #define KVM_ARM_IRQ_TYPE_MASK		0xff
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index be456ce264d0..8ca405cd7c1a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -24,6 +24,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/uaccess.h>
 
 #include <linux/irqchip/arm-gic.h>
 

From 2aaea8b6ef1ba1dd5de6646abf86a4bb2a3a0642 Mon Sep 17 00:00:00 2001
From: Michael Mueller <mimu@linux.vnet.ibm.com>
Date: Wed, 26 Feb 2014 16:14:18 +0100
Subject: [PATCH 0575/1185] KVM: add kvm_arch_vcpu_runnable() test to
 kvm_vcpu_on_spin() loop

Use the arch specific function kvm_arch_vcpu_runnable() to add a further
criterium to identify a suitable vcpu to yield to during undirected yield
processing.

Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 98f4a14676127397c54cab7d6119537ed4d113a2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 88660cbe70a4..9f5fab0a4fda 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1800,7 +1800,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 				continue;
 			if (vcpu == me)
 				continue;
-			if (waitqueue_active(&vcpu->wq))
+			if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
 				continue;
 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
 				continue;

From 24e14ecb92fafddb1f3c9cce1b4aed3596f86859 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 26 Feb 2014 18:47:36 +0000
Subject: [PATCH 0576/1185] arm/arm64: KVM: detect CPU reset on CPU_PM_EXIT

Commit 1fcf7ce0c602 (arm: kvm: implement CPU PM notifier) added
support for CPU power-management, using a cpu_notifier to re-init
KVM on a CPU that entered CPU idle.

The code assumed that a CPU entering idle would actually be powered
off, loosing its state entierely, and would then need to be
reinitialized. It turns out that this is not always the case, and
some HW performs CPU PM without actually killing the core. In this
case, we try to reinitialize KVM while it is still live. It ends up
badly, as reported by Andre Przywara (using a Calxeda Midway):

[    3.663897] Kernel panic - not syncing: unexpected prefetch abort in Hyp mode at: 0x685760
[    3.663897] unexpected data abort in Hyp mode at: 0xc067d150
[    3.663897] unexpected HVC/SVC trap in Hyp mode at: 0xc0901dd0

The trick here is to detect if we've been through a full re-init or
not by looking at HVBAR (VBAR_EL2 on arm64). This involves
implementing the backend for __hyp_get_vectors in the main KVM HYP
code (rather small), and checking the return value against the
default one when the CPU notifier is called on CPU_PM_EXIT.

Reported-by: Andre Przywara <osp@andrep.de>
Tested-by: Andre Przywara <osp@andrep.de>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Rob Herring <rob.herring@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b20c9f29c5c25921c6ad18b50d4b61e6d181c3cc)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c        |  3 ++-
 arch/arm/kvm/interrupts.S | 11 ++++++++++-
 arch/arm64/kvm/hyp.S      | 27 +++++++++++++++++++++++++--
 3 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 169c718ddd90..9804406ff37e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -878,7 +878,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
 				    unsigned long cmd,
 				    void *v)
 {
-	if (cmd == CPU_PM_EXIT) {
+	if (cmd == CPU_PM_EXIT &&
+	    __hyp_get_vectors() == hyp_default_vectors) {
 		cpu_init_hyp_mode(NULL);
 		return NOTIFY_OK;
 	}
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index ddc15539bad2..0d68d4073068 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -220,6 +220,10 @@ after_vfp_restore:
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 and r1.
  *
+ * A function pointer with a value of 0xffffffff has a special meaning,
+ * and is used to implement __hyp_get_vectors in the same way as in
+ * arch/arm/kernel/hyp_stub.S.
+ *
  * The calling convention follows the standard AAPCS:
  *   r0 - r3: caller save
  *   r12:     caller save
@@ -363,6 +367,11 @@ hyp_hvc:
 host_switch_to_hyp:
 	pop	{r0, r1, r2}
 
+	/* Check for __hyp_get_vectors */
+	cmp	r0, #-1
+	mrceq	p15, 4, r0, c12, c0, 0	@ get HVBAR
+	beq	1f
+
 	push	{lr}
 	mrs	lr, SPSR
 	push	{lr}
@@ -378,7 +387,7 @@ THUMB(	orr	lr, #1)
 	pop	{lr}
 	msr	SPSR_csxf, lr
 	pop	{lr}
-	eret
+1:	eret
 
 guest_trap:
 	load_vcpu			@ Load VCPU pointer to r0
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 3b47c36e10ff..2c56012cb2d2 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -694,6 +694,24 @@ __hyp_panic_str:
 
 	.align	2
 
+/*
+ * u64 kvm_call_hyp(void *hypfn, ...);
+ *
+ * This is not really a variadic function in the classic C-way and care must
+ * be taken when calling this to ensure parameters are passed in registers
+ * only, since the stack will change between the caller and the callee.
+ *
+ * Call the function with the first argument containing a pointer to the
+ * function you wish to call in Hyp mode, and subsequent arguments will be
+ * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
+ * function pointer can be passed).  The function being called must be mapped
+ * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
+ * passed in r0 and r1.
+ *
+ * A function pointer with a value of 0 has a special meaning, and is
+ * used to implement __hyp_get_vectors in the same way as in
+ * arch/arm64/kernel/hyp_stub.S.
+ */
 ENTRY(kvm_call_hyp)
 	hvc	#0
 	ret
@@ -737,7 +755,12 @@ el1_sync:					// Guest trapped into EL2
 	pop	x2, x3
 	pop	x0, x1
 
-	push	lr, xzr
+	/* Check for __hyp_get_vectors */
+	cbnz	x0, 1f
+	mrs	x0, vbar_el2
+	b	2f
+
+1:	push	lr, xzr
 
 	/*
 	 * Compute the function address in EL2, and shuffle the parameters.
@@ -750,7 +773,7 @@ el1_sync:					// Guest trapped into EL2
 	blr	lr
 
 	pop	lr, xzr
-	eret
+2:	eret
 
 el1_trap:
 	/*

From fca920166b7c07b3932b2a8a24916e5002a57900 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 Jan 2014 19:13:10 +0000
Subject: [PATCH 0577/1185] arm64: KVM: force cache clean on page fault when
 caches are off

In order for the guest with caches off to observe data written
contained in a given page, we need to make sure that page is
committed to memory, and not just hanging in the cache (as
guest accesses are completely bypassing the cache until it
decides to enable it).

For this purpose, hook into the coherent_icache_guest_page
function and flush the region if the guest SCTLR_EL1
register doesn't show the MMU  and caches as being enabled.
The function also get renamed to coherent_cache_guest_page.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 2d58b733c87689d3d5144e4ac94ea861cc729145)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   |  4 ++--
 arch/arm/kvm/mmu.c               |  4 ++--
 arch/arm64/include/asm/kvm_mmu.h | 16 ++++++++++++----
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 2d122adcdb22..6d0f3d3023b7 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -116,8 +116,8 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
-					      unsigned long size)
+static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+					     unsigned long size)
 {
 	/*
 	 * If we are going to insert an instruction page and the icache is
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 7789857d1470..fc71a8df0e13 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -715,7 +715,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pmd_writable(&new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
+		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
@@ -723,7 +723,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
+		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
 	}
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7f1f9408ff66..6eaf69b5e42c 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -106,7 +106,6 @@ static inline bool kvm_is_write_fault(unsigned long esr)
 	return true;
 }
 
-static inline void kvm_clean_dcache_area(void *addr, size_t size) {}
 static inline void kvm_clean_pgd(pgd_t *pgd) {}
 static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
 static inline void kvm_clean_pte(pte_t *pte) {}
@@ -124,9 +123,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
-					      unsigned long size)
+#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 {
+	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+}
+
+static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+					     unsigned long size)
+{
+	if (!vcpu_has_cache_enabled(vcpu))
+		kvm_flush_dcache_to_poc((void *)hva, size);
+
 	if (!icache_is_aliasing()) {		/* PIPT */
 		flush_icache_range(hva, hva + size);
 	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
@@ -135,7 +144,6 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
 	}
 }
 
-#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
 #define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
 
 #endif /* __ASSEMBLY__ */

From 96d03922c8cb37a48451b02f95e80cd749266bbe Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 21 Jan 2014 10:55:17 +0000
Subject: [PATCH 0578/1185] arm64: KVM: allows discrimination of AArch32 sysreg
 access

The current handling of AArch32 trapping is slightly less than
perfect, as it is not possible (from a handler point of view)
to distinguish it from an AArch64 access, nor to tell a 32bit
from a 64bit access either.

Fix this by introducing two additional flags:
- is_aarch32: true if the access was made in AArch32 mode
- is_32bit: true if is_aarch32 == true and a MCR/MRC instruction
  was used to perform the access (as opposed to MCRR/MRRC).

This allows a handler to cover all the possible conditions in which
a system register gets trapped.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 2072d29c46b73e39b3c6c56c6027af77086f45fd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 6 ++++++
 arch/arm64/kvm/sys_regs.h | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 02e9d09e1d80..bf03e0fadf1f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -437,6 +437,8 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
 	int Rt2 = (hsr >> 10) & 0xf;
 
+	params.is_aarch32 = true;
+	params.is_32bit = false;
 	params.CRm = (hsr >> 1) & 0xf;
 	params.Rt = (hsr >> 5) & 0xf;
 	params.is_write = ((hsr & 1) == 0);
@@ -480,6 +482,8 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
 
+	params.is_aarch32 = true;
+	params.is_32bit = true;
 	params.CRm = (hsr >> 1) & 0xf;
 	params.Rt  = (hsr >> 5) & 0xf;
 	params.is_write = ((hsr & 1) == 0);
@@ -549,6 +553,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	struct sys_reg_params params;
 	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
 
+	params.is_aarch32 = false;
+	params.is_32bit = false;
 	params.Op0 = (esr >> 20) & 3;
 	params.Op1 = (esr >> 14) & 0x7;
 	params.CRn = (esr >> 10) & 0xf;
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index d50d3722998e..d411e251412c 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -30,6 +30,8 @@ struct sys_reg_params {
 	u8	Op2;
 	u8	Rt;
 	bool	is_write;
+	bool	is_aarch32;
+	bool	is_32bit;	/* Only valid if is_aarch32 is true */
 };
 
 struct sys_reg_desc {

From 8d8d8cd59448d8f2f9cba73d6cd5558ebe7d5a43 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 Jan 2014 18:00:55 +0000
Subject: [PATCH 0579/1185] arm64: KVM: trap VM system registers until MMU and
 caches are ON

In order to be able to detect the point where the guest enables
its MMU and caches, trap all the VM related system registers.

Once we see the guest enabling both the MMU and the caches, we
can go back to a saner mode of operation, which is to leave these
registers in complete control of the guest.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4d44923b17bff283c002ed961373848284aaff1b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h |  3 +-
 arch/arm64/include/asm/kvm_asm.h |  3 +-
 arch/arm64/kvm/sys_regs.c        | 90 +++++++++++++++++++++++++++-----
 3 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 0eb398655378..00fbaa75dc7b 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -62,6 +62,7 @@
  * RW:		64bit by default, can be overriden for 32bit VMs
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
+ * TVM:		Trap VM ops (until M+C set in SCTLR_EL1)
  * TSW:		Trap cache operations by set/way
  * TWE:		Trap WFE
  * TWI:		Trap WFI
@@ -74,7 +75,7 @@
  * SWIO:	Turn set/way invalidates into set/way clean+invalidate
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
-			 HCR_BSU_IS | HCR_FB | HCR_TAC | \
+			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
 			 HCR_AMO | HCR_IMO | HCR_FMO | \
 			 HCR_SWIO | HCR_TIDCP | HCR_RW)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index b25763bc0ec4..9fcd54b1e16d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -79,7 +79,8 @@
 #define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
 #define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
 #define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
-#define c10_AMAIR	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
 #define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
 #define NR_CP15_REGS	(NR_SYS_REGS * 2)
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index bf03e0fadf1f..2097e5ecba42 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -120,6 +120,46 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+/*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+			  const struct sys_reg_params *p,
+			  const struct sys_reg_desc *r)
+{
+	unsigned long val;
+
+	BUG_ON(!p->is_write);
+
+	val = *vcpu_reg(vcpu, p->Rt);
+	if (!p->is_aarch32) {
+		vcpu_sys_reg(vcpu, r->reg) = val;
+	} else {
+		vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL;
+		if (!p->is_32bit)
+			vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
+	}
+	return true;
+}
+
+/*
+ * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set.  If the
+ * guest enables the MMU, we stop trapping the VM sys_regs and leave
+ * it in complete control of the caches.
+ */
+static bool access_sctlr(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	access_vm_reg(vcpu, p, r);
+
+	if (vcpu_has_cache_enabled(vcpu))	/* MMU+Caches enabled? */
+		vcpu->arch.hcr_el2 &= ~HCR_TVM;
+
+	return true;
+}
+
 /*
  * We could trap ID_DFR0 and tell the guest we don't support performance
  * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
@@ -185,32 +225,32 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  NULL, reset_mpidr, MPIDR_EL1 },
 	/* SCTLR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
-	  NULL, reset_val, SCTLR_EL1, 0x00C50078 },
+	  access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 },
 	/* CPACR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
 	  NULL, reset_val, CPACR_EL1, 0 },
 	/* TTBR0_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
-	  NULL, reset_unknown, TTBR0_EL1 },
+	  access_vm_reg, reset_unknown, TTBR0_EL1 },
 	/* TTBR1_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
-	  NULL, reset_unknown, TTBR1_EL1 },
+	  access_vm_reg, reset_unknown, TTBR1_EL1 },
 	/* TCR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
-	  NULL, reset_val, TCR_EL1, 0 },
+	  access_vm_reg, reset_val, TCR_EL1, 0 },
 
 	/* AFSR0_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
-	  NULL, reset_unknown, AFSR0_EL1 },
+	  access_vm_reg, reset_unknown, AFSR0_EL1 },
 	/* AFSR1_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
-	  NULL, reset_unknown, AFSR1_EL1 },
+	  access_vm_reg, reset_unknown, AFSR1_EL1 },
 	/* ESR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
-	  NULL, reset_unknown, ESR_EL1 },
+	  access_vm_reg, reset_unknown, ESR_EL1 },
 	/* FAR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
-	  NULL, reset_unknown, FAR_EL1 },
+	  access_vm_reg, reset_unknown, FAR_EL1 },
 	/* PAR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
 	  NULL, reset_unknown, PAR_EL1 },
@@ -224,17 +264,17 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
 	/* MAIR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
-	  NULL, reset_unknown, MAIR_EL1 },
+	  access_vm_reg, reset_unknown, MAIR_EL1 },
 	/* AMAIR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
-	  NULL, reset_amair_el1, AMAIR_EL1 },
+	  access_vm_reg, reset_amair_el1, AMAIR_EL1 },
 
 	/* VBAR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
 	  NULL, reset_val, VBAR_EL1, 0 },
 	/* CONTEXTIDR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
-	  NULL, reset_val, CONTEXTIDR_EL1, 0 },
+	  access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
 	/* TPIDR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
 	  NULL, reset_unknown, TPIDR_EL1 },
@@ -305,14 +345,32 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  NULL, reset_val, FPEXC32_EL2, 0x70 },
 };
 
-/* Trapped cp15 registers */
+/*
+ * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
+ * depending on the way they are accessed (as a 32bit or a 64bit
+ * register).
+ */
 static const struct sys_reg_desc cp15_regs[] = {
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
+	{ Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
+	{ Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
+	{ Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
+	{ Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
+	{ Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
+	{ Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
+	{ Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
+
 	/*
 	 * DC{C,I,CI}SW operations:
 	 */
 	{ Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
 	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
 	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
+
 	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
 	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
 	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
@@ -326,6 +384,14 @@ static const struct sys_reg_desc cp15_regs[] = {
 	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
 	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
 	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
+
+	{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
+	{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
+	{ Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
+	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
+	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+
+	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
 };
 
 /* Target specific emulation tables */

From 086ffbacac938e5d6208d286ffdf01867753a071 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 18 Feb 2014 14:29:03 +0000
Subject: [PATCH 0580/1185] ARM: KVM: introduce kvm_p*d_addr_end

The use of p*d_addr_end with stage-2 translation is slightly dodgy,
as the IPA is 40bits, while all the p*d_addr_end helpers are
taking an unsigned long (arm64 is fine with that as unligned long
is 64bit).

The fix is to introduce 64bit clean versions of the same helpers,
and use them in the stage-2 page table code.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit a3c8bd31af260a17d626514f636849ee1cd1f63e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   | 13 +++++++++++++
 arch/arm/kvm/mmu.c               | 10 +++++-----
 arch/arm64/include/asm/kvm_mmu.h |  4 ++++
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 6d0f3d3023b7..891afe78311a 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -114,6 +114,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 	pmd_val(*pmd) |= L_PMD_S2_RDWR;
 }
 
+/* Open coded p*d_addr_end that can deal with 64bit addresses */
+#define kvm_pgd_addr_end(addr, end)					\
+({	u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;		\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+
+#define kvm_pud_addr_end(addr,end)		(end)
+
+#define kvm_pmd_addr_end(addr, end)					\
+({	u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK;		\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+
 struct kvm;
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index fc71a8df0e13..c1c08b240f35 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -145,7 +145,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 		pgd = pgdp + pgd_index(addr);
 		pud = pud_offset(pgd, addr);
 		if (pud_none(*pud)) {
-			addr = pud_addr_end(addr, end);
+			addr = kvm_pud_addr_end(addr, end);
 			continue;
 		}
 
@@ -155,13 +155,13 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 			 * move on.
 			 */
 			clear_pud_entry(kvm, pud, addr);
-			addr = pud_addr_end(addr, end);
+			addr = kvm_pud_addr_end(addr, end);
 			continue;
 		}
 
 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd)) {
-			addr = pmd_addr_end(addr, end);
+			addr = kvm_pmd_addr_end(addr, end);
 			continue;
 		}
 
@@ -176,10 +176,10 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 		 */
 		if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
 			clear_pmd_entry(kvm, pmd, addr);
-			next = pmd_addr_end(addr, end);
+			next = kvm_pmd_addr_end(addr, end);
 			if (page_empty(pmd) && !page_empty(pud)) {
 				clear_pud_entry(kvm, pud, addr);
-				next = pud_addr_end(addr, end);
+				next = kvm_pud_addr_end(addr, end);
 			}
 		}
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6eaf69b5e42c..00c0cc8b8045 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -121,6 +121,10 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 	pmd_val(*pmd) |= PMD_S2_RDWR;
 }
 
+#define kvm_pgd_addr_end(addr, end)	pgd_addr_end(addr, end)
+#define kvm_pud_addr_end(addr, end)	pud_addr_end(addr, end)
+#define kvm_pmd_addr_end(addr, end)	pmd_addr_end(addr, end)
+
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))

From 5ac10a803ba46a03fccfbd078b60fa0cd83422c0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 15 Jan 2014 12:50:23 +0000
Subject: [PATCH 0581/1185] arm64: KVM: flush VM pages before letting the guest
 enable caches

When the guest runs with caches disabled (like in an early boot
sequence, for example), all the writes are diectly going to RAM,
bypassing the caches altogether.

Once the MMU and caches are enabled, whatever sits in the cache
becomes suddenly visible, which isn't what the guest expects.

A way to avoid this potential disaster is to invalidate the cache
when the MMU is being turned on. For this, we hook into the SCTLR_EL1
trapping code, and scan the stage-2 page tables, invalidating the
pages/sections that have already been mapped in.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 9d218a1fcf4c6b759d442ef702842fae92e1ea61)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   |  2 +
 arch/arm/kvm/mmu.c               | 93 ++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/kvm_mmu.h |  2 +
 arch/arm64/kvm/sys_regs.c        |  5 +-
 4 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 891afe78311a..eb85b81eea6f 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -155,6 +155,8 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 #define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
 #define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
+void stage2_flush_vm(struct kvm *kvm);
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index c1c08b240f35..d7e998c6a08f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -187,6 +187,99 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 	}
 }
 
+static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pte_t *pte;
+
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+			kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pmd_t *pmd;
+	phys_addr_t next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = kvm_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (kvm_pmd_huge(*pmd)) {
+				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+				kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
+			} else {
+				stage2_flush_ptes(kvm, pmd, addr, next);
+			}
+		}
+	} while (pmd++, addr = next, addr != end);
+}
+
+static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pud_t *pud;
+	phys_addr_t next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = kvm_pud_addr_end(addr, end);
+		if (!pud_none(*pud)) {
+			if (pud_huge(*pud)) {
+				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+				kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
+			} else {
+				stage2_flush_pmds(kvm, pud, addr, next);
+			}
+		}
+	} while (pud++, addr = next, addr != end);
+}
+
+static void stage2_flush_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
+	phys_addr_t next;
+	pgd_t *pgd;
+
+	pgd = kvm->arch.pgd + pgd_index(addr);
+	do {
+		next = kvm_pgd_addr_end(addr, end);
+		stage2_flush_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the stage 2 page tables and invalidate any cache lines
+ * backing memory already mapped to the VM.
+ */
+void stage2_flush_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_flush_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * free_boot_hyp_pgd - free HYP boot page tables
  *
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 00c0cc8b8045..7d29847a893b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -150,5 +150,7 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 
 #define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
 
+void stage2_flush_vm(struct kvm *kvm);
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2097e5ecba42..03244582bc55 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -27,6 +27,7 @@
 #include <asm/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <trace/events/kvm.h>
@@ -154,8 +155,10 @@ static bool access_sctlr(struct kvm_vcpu *vcpu,
 {
 	access_vm_reg(vcpu, p, r);
 
-	if (vcpu_has_cache_enabled(vcpu))	/* MMU+Caches enabled? */
+	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */
 		vcpu->arch.hcr_el2 &= ~HCR_TVM;
+		stage2_flush_vm(vcpu->kvm);
+	}
 
 	return true;
 }

From ebdea56c8b2acd4fd7a421d1d670e6bf10f37e60 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 Jan 2014 19:13:10 +0000
Subject: [PATCH 0582/1185] ARM: KVM: force cache clean on page fault when
 caches are off

In order for a guest with caches disabled to observe data written
contained in a given page, we need to make sure that page is
committed to memory, and not just hanging in the cache (as guest
accesses are completely bypassing the cache until it decides to
enable it).

For this purpose, hook into the coherent_cache_guest_page
function and flush the region if the guest SCTLR
register doesn't show the MMU and caches as being enabled.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 159793001d7d85af17855630c94f0a176848e16b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index eb85b81eea6f..5c7aa3c1519f 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -129,9 +129,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 
 struct kvm;
 
+#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
+}
+
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 					     unsigned long size)
 {
+	if (!vcpu_has_cache_enabled(vcpu))
+		kvm_flush_dcache_to_poc((void *)hva, size);
+	
 	/*
 	 * If we are going to insert an instruction page and the icache is
 	 * either VIPT or PIPT, there is a potential problem where the host
@@ -152,7 +162,6 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 	}
 }
 
-#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
 #define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
 void stage2_flush_vm(struct kvm *kvm);

From d773d11dd453f43793082c5206799c818a4a4b55 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 21 Jan 2014 18:56:26 +0000
Subject: [PATCH 0583/1185] ARM: KVM: fix handling of trapped 64bit coprocessor
 accesses

Commit 240e99cbd00a (ARM: KVM: Fix 64-bit coprocessor handling)
changed the way we match the 64bit coprocessor access from
user space, but didn't update the trap handler for the same
set of registers.

The effect is that a trapped 64bit access is never matched, leading
to a fault being injected into the guest. This went unnoticed as we
didn't really trap any 64bit register so far.

Placing the CRm field of the access into the CRn field of the matching
structure fixes the problem. Also update the debug feature to emit the
expected string in case of failing match.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 46c214dd595381c880794413facadfa07fba5c95)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c | 4 ++--
 arch/arm/kvm/coproc.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 78c0885d6501..126c90d18387 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -443,7 +443,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	struct coproc_params params;
 
-	params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
+	params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
 	params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
 	params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
 	params.is_64bit = true;
@@ -451,7 +451,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
 	params.Op2 = 0;
 	params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
-	params.CRn = 0;
+	params.CRm = 0;
 
 	return emulate_cp15(vcpu, &params);
 }
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 0461d5c8d3de..c5ad7ff40c96 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p)
 {
 	/* Look, we even formatted it for you to paste into the table! */
 	if (p->is_64bit) {
-		kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n",
-			      p->CRm, p->Op1, p->is_write ? "write" : "read");
+		kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n",
+			      p->CRn, p->Op1, p->is_write ? "write" : "read");
 	} else {
 		kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
 			      " func_%s },\n",

From ba3c65fc739a98f9eae4e8997d8a6e66d130b0cf Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 21 Jan 2014 18:56:26 +0000
Subject: [PATCH 0584/1185] ARM: KVM: fix ordering of 64bit coprocessor
 accesses

Commit 240e99cbd00a (ARM: KVM: Fix 64-bit coprocessor handling)
added an ordering dependency for the 64bit registers.

The order described is: CRn, CRm, Op1, Op2, 64bit-first.

Unfortunately, the implementation is: CRn, 64bit-first, CRm...

Move the 64bit test to be last in order to match the documentation.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 547f781378a22b65c2ab468f235c23001b5924da)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index c5ad7ff40c96..8dda870e84f9 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -135,13 +135,13 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 		return -1;
 	if (i1->CRn != i2->CRn)
 		return i1->CRn - i2->CRn;
-	if (i1->is_64 != i2->is_64)
-		return i2->is_64 - i1->is_64;
 	if (i1->CRm != i2->CRm)
 		return i1->CRm - i2->CRm;
 	if (i1->Op1 != i2->Op1)
 		return i1->Op1 - i2->Op1;
-	return i1->Op2 - i2->Op2;
+	if (i1->Op2 != i2->Op2)
+		return i1->Op2 - i2->Op2;
+	return i2->is_64 - i1->is_64;
 }
 
 
From ff860e2b7761196f6379888acd6668ecddb703df Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 22 Jan 2014 09:43:38 +0000
Subject: [PATCH 0585/1185] ARM: KVM: introduce per-vcpu HYP Configuration
 Register

So far, KVM/ARM used a fixed HCR configuration per guest, except for
the VI/VF/VA bits to control the interrupt in absence of VGIC.

With the upcoming need to dynamically reconfigure trapping, it becomes
necessary to allow the HCR to be changed on a per-vcpu basis.

The fix here is to mimic what KVM/arm64 already does: a per vcpu HCR
field, initialized at setup time.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit ac30a11e8e92a03dbe236b285c5cbae0bf563141)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h  | 1 -
 arch/arm/include/asm/kvm_host.h | 9 ++++++---
 arch/arm/kernel/asm-offsets.c   | 1 +
 arch/arm/kvm/guest.c            | 1 +
 arch/arm/kvm/interrupts_head.S  | 9 +++------
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 1d3153c7eb41..a843e74a384c 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -69,7 +69,6 @@
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
 			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
 			HCR_TWE | HCR_SWIO | HCR_TIDCP)
-#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
 /* System Control Register (SCTLR) bits */
 #define SCTLR_TE	(1 << 30)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 098f7dd6d564..09af14999c9b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -101,6 +101,12 @@ struct kvm_vcpu_arch {
 	/* The CPU type we expose to the VM */
 	u32 midr;
 
+	/* HYP trapping configuration */
+	u32 hcr;
+
+	/* Interrupt related fields */
+	u32 irq_lines;		/* IRQ and FIQ levels */
+
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
@@ -128,9 +134,6 @@ struct kvm_vcpu_arch {
 	/* IO related fields */
 	struct kvm_decode mmio_decode;
 
-	/* Interrupt related fields */
-	u32 irq_lines;		/* IRQ and FIQ levels */
-
 	/* Cache some mmu pages needed inside spinlock regions */
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index ee68cce6b48e..aa2acc1dd986 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -168,6 +168,7 @@ int main(void)
   DEFINE(VCPU_FIQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
   DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
   DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
+  DEFINE(VCPU_HCR,		offsetof(struct kvm_vcpu, arch.hcr));
   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
   DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.fault.hsr));
   DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.fault.hxfar));
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 2786eae10c0d..b23a59c1c522 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.hcr = HCR_GUEST_MASK;
 	return 0;
 }
 
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 6f18695a09cb..a37270d7d4d6 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -597,17 +597,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
 .macro configure_hyp_role operation
-	mrc	p15, 4, r2, c1, c1, 0	@ HCR
-	bic	r2, r2, #HCR_VIRT_EXCP_MASK
-	ldr	r3, =HCR_GUEST_MASK
 	.if \operation == vmentry
-	orr	r2, r2, r3
+	ldr	r2, [vcpu, #VCPU_HCR]
 	ldr	r3, [vcpu, #VCPU_IRQ_LINES]
 	orr	r2, r2, r3
 	.else
-	bic	r2, r2, r3
+	mov	r2, #0
 	.endif
-	mcr	p15, 4, r2, c1, c1, 0
+	mcr	p15, 4, r2, c1, c1, 0	@ HCR
 .endm
 
 .macro load_vcpu

From 6076945ed741ff33724410d0bb9dfa535b475e79 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 22 Jan 2014 10:20:09 +0000
Subject: [PATCH 0586/1185] ARM: KVM: add world-switch for AMAIR{0,1}

HCR.TVM traps (among other things) accesses to AMAIR0 and AMAIR1.
In order to minimise the amount of surprise a guest could generate by
trying to access these registers with caches off, add them to the
list of registers we switch/handle.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit af20814ee927ed888288d98917a766b4179c4fe0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h |  4 +++-
 arch/arm/kvm/coproc.c          |  6 ++++++
 arch/arm/kvm/interrupts_head.S | 12 ++++++++++--
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 661da11f76f4..53b3c4a50d5c 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -48,7 +48,9 @@
 #define c13_TID_URO	26	/* Thread ID, User R/O */
 #define c13_TID_PRIV	27	/* Thread ID, Privileged */
 #define c14_CNTKCTL	28	/* Timer Control Register (PL1) */
-#define NR_CP15_REGS	29	/* Number of regs (incl. invalid) */
+#define c10_AMAIR0	29	/* Auxilary Memory Attribute Indirection Reg0 */
+#define c10_AMAIR1	30	/* Auxilary Memory Attribute Indirection Reg1 */
+#define NR_CP15_REGS	31	/* Number of regs (incl. invalid) */
 
 #define ARM_EXCEPTION_RESET	  0
 #define ARM_EXCEPTION_UNDEFINED   1
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 126c90d18387..a5a54a48d51b 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -328,6 +328,12 @@ static const struct coproc_reg cp15_regs[] = {
 	{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
 			NULL, reset_unknown, c10_NMRR},
 
+	/* AMAIR0/AMAIR1: swapped by interrupt.S. */
+	{ CRn(10), CRm( 3), Op1( 0), Op2( 0), is32,
+			access_vm_reg, reset_unknown, c10_AMAIR0},
+	{ CRn(10), CRm( 3), Op1( 0), Op2( 1), is32,
+			access_vm_reg, reset_unknown, c10_AMAIR1},
+
 	/* VBAR: swapped by interrupt.S. */
 	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c12_VBAR, 0x00000000 },
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index a37270d7d4d6..76af93025574 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -303,13 +303,17 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	mrc	p15, 0, r2, c14, c1, 0	@ CNTKCTL
 	mrrc	p15, 0, r4, r5, c7	@ PAR
+	mrc	p15, 0, r6, c10, c3, 0	@ AMAIR0
+	mrc	p15, 0, r7, c10, c3, 1	@ AMAIR1
 
 	.if \store_to_vcpu == 0
-	push	{r2,r4-r5}
+	push	{r2,r4-r7}
 	.else
 	str	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
 	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
 	strd	r4, r5, [r12]
+	str	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
+	str	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
 	.endif
 .endm
 
@@ -322,15 +326,19 @@ vcpu	.req	r0		@ vcpu pointer always in r0
  */
 .macro write_cp15_state read_from_vcpu
 	.if \read_from_vcpu == 0
-	pop	{r2,r4-r5}
+	pop	{r2,r4-r7}
 	.else
 	ldr	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
 	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
 	ldrd	r4, r5, [r12]
+	ldr	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
+	ldr	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
 	.endif
 
 	mcr	p15, 0, r2, c14, c1, 0	@ CNTKCTL
 	mcrr	p15, 0, r4, r5, c7	@ PAR
+	mcr	p15, 0, r6, c10, c3, 0	@ AMAIR0
+	mcr	p15, 0, r7, c10, c3, 1	@ AMAIR1
 
 	.if \read_from_vcpu == 0
 	pop	{r2-r12}

From 11178dce0398cd8419c639c8ab81357cb4f2bbf0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 Jan 2014 18:00:55 +0000
Subject: [PATCH 0587/1185] ARM: KVM: trap VM system registers until MMU and
 caches are ON

In order to be able to detect the point where the guest enables
its MMU and caches, trap all the VM related system registers.

Once we see the guest enabling both the MMU and the caches, we
can go back to a saner mode of operation, which is to leave these
registers in complete control of the guest.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 8034699a42d68043b495c7e0cfafccd920707ec8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h |  3 +-
 arch/arm/kvm/coproc.c          | 74 ++++++++++++++++++++++++++--------
 arch/arm/kvm/coproc.h          |  4 ++
 arch/arm/kvm/coproc_a15.c      |  2 +-
 arch/arm/kvm/coproc_a7.c       |  2 +-
 5 files changed, 66 insertions(+), 19 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index a843e74a384c..816db0bf2dd8 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -55,6 +55,7 @@
  * The bits we set in HCR:
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
+ * TVM:		Trap VM ops (until MMU and caches are on)
  * TSW:		Trap cache operations by set/way
  * TWI:		Trap WFI
  * TWE:		Trap WFE
@@ -68,7 +69,7 @@
  */
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
 			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
-			HCR_TWE | HCR_SWIO | HCR_TIDCP)
+			HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP)
 
 /* System Control Register (SCTLR) bits */
 #define SCTLR_TE	(1 << 30)
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index a5a54a48d51b..c58a35116f63 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -23,6 +23,7 @@
 #include <asm/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <trace/events/kvm.h>
@@ -204,6 +205,44 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+/*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+			  const struct coproc_params *p,
+			  const struct coproc_reg *r)
+{
+	BUG_ON(!p->is_write);
+
+	vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
+	if (p->is_64bit)
+		vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
+
+	return true;
+}
+
+/*
+ * SCTLR accessor. Only called as long as HCR_TVM is set.  If the
+ * guest enables the MMU, we stop trapping the VM sys_regs and leave
+ * it in complete control of the caches.
+ *
+ * Used by the cpu-specific code.
+ */
+bool access_sctlr(struct kvm_vcpu *vcpu,
+		  const struct coproc_params *p,
+		  const struct coproc_reg *r)
+{
+	access_vm_reg(vcpu, p, r);
+
+	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */
+		vcpu->arch.hcr &= ~HCR_TVM;
+		stage2_flush_vm(vcpu->kvm);
+	}
+
+	return true;
+}
+
 /*
  * We could trap ID_DFR0 and tell the guest we don't support performance
  * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
@@ -261,33 +300,36 @@ static const struct coproc_reg cp15_regs[] = {
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
 			NULL, reset_val, c1_CPACR, 0x00000000 },
 
-	/* TTBR0/TTBR1: swapped by interrupt.S. */
-	{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
-	{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
-
-	/* TTBCR: swapped by interrupt.S. */
+	/* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */
+	{ CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 },
+	{ CRn(2), CRm( 0), Op1( 0), Op2( 0), is32,
+			access_vm_reg, reset_unknown, c2_TTBR0 },
+	{ CRn(2), CRm( 0), Op1( 0), Op2( 1), is32,
+			access_vm_reg, reset_unknown, c2_TTBR1 },
 	{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_val, c2_TTBCR, 0x00000000 },
+			access_vm_reg, reset_val, c2_TTBCR, 0x00000000 },
+	{ CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 },
+
 
 	/* DACR: swapped by interrupt.S. */
 	{ CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c3_DACR },
+			access_vm_reg, reset_unknown, c3_DACR },
 
 	/* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */
 	{ CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c5_DFSR },
+			access_vm_reg, reset_unknown, c5_DFSR },
 	{ CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c5_IFSR },
+			access_vm_reg, reset_unknown, c5_IFSR },
 	{ CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c5_ADFSR },
+			access_vm_reg, reset_unknown, c5_ADFSR },
 	{ CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c5_AIFSR },
+			access_vm_reg, reset_unknown, c5_AIFSR },
 
 	/* DFAR/IFAR: swapped by interrupt.S. */
 	{ CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c6_DFAR },
+			access_vm_reg, reset_unknown, c6_DFAR },
 	{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_unknown, c6_IFAR },
+			access_vm_reg, reset_unknown, c6_IFAR },
 
 	/* PAR swapped by interrupt.S */
 	{ CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
@@ -324,9 +366,9 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */
 	{ CRn(10), CRm( 2), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c10_PRRR},
+			access_vm_reg, reset_unknown, c10_PRRR},
 	{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c10_NMRR},
+			access_vm_reg, reset_unknown, c10_NMRR},
 
 	/* AMAIR0/AMAIR1: swapped by interrupt.S. */
 	{ CRn(10), CRm( 3), Op1( 0), Op2( 0), is32,
@@ -340,7 +382,7 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 1), is32,
-			NULL, reset_val, c13_CID, 0x00000000 },
+			access_vm_reg, reset_val, c13_CID, 0x00000000 },
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,
 			NULL, reset_unknown, c13_TID_URW },
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 3), is32,
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 8dda870e84f9..1a44bbe39643 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -153,4 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 #define is64		.is_64 = true
 #define is32		.is_64 = false
 
+bool access_sctlr(struct kvm_vcpu *vcpu,
+		  const struct coproc_params *p,
+		  const struct coproc_reg *r);
+
 #endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index bb0cac1410cc..e6f4ae48bda9 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -34,7 +34,7 @@
 static const struct coproc_reg a15_regs[] = {
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_val, c1_SCTLR, 0x00C50078 },
+			access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
 };
 
 static struct kvm_coproc_target_table a15_target_table = {
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
index 1df767331588..17fc7cd479d3 100644
--- a/arch/arm/kvm/coproc_a7.c
+++ b/arch/arm/kvm/coproc_a7.c
@@ -37,7 +37,7 @@
 static const struct coproc_reg a7_regs[] = {
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_val, c1_SCTLR, 0x00C50878 },
+			access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
 };
 
 static struct kvm_coproc_target_table a7_target_table = {

From 0ddf276ba9c11c5bd993c7c87226dde6fc5ffa51 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 30 Jan 2014 17:38:33 +0000
Subject: [PATCH 0588/1185] ARM: KVM: fix warning in mmu.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Compiling with THP enabled leads to the following warning:

arch/arm/kvm/mmu.c: In function ‘unmap_range’:
arch/arm/kvm/mmu.c:177:39: warning: ‘pte’ may be used uninitialized in this function [-Wmaybe-uninitialized]
   if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
                                        ^
Code inspection reveals that these two cases are mutually exclusive,
so GCC is a bit overzealous here. Silence it anyway by initializing
pte to NULL and testing it later on.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 56041bf920d2937b7cadcb30cb206f0372eee814)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index d7e998c6a08f..80bb1e6c2c29 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -144,6 +144,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 	while (addr < end) {
 		pgd = pgdp + pgd_index(addr);
 		pud = pud_offset(pgd, addr);
+		pte = NULL;
 		if (pud_none(*pud)) {
 			addr = kvm_pud_addr_end(addr, end);
 			continue;
@@ -174,7 +175,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 		/*
 		 * If the pmd entry is to be cleared, walk back up the ladder
 		 */
-		if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
+		if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) {
 			clear_pmd_entry(kvm, pmd, addr);
 			next = kvm_pmd_addr_end(addr, end);
 			if (page_empty(pmd) && !page_empty(pud)) {

From 500d9e60bb1f98d24a2780542a68d6a1ffc32315 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 6 Mar 2014 03:30:46 +0000
Subject: [PATCH 0589/1185] ARM: KVM: fix non-VGIC compilation

Add a stub for kvm_vgic_addr when compiling without
CONFIG_KVM_ARM_VGIC. The usefulness of this configurarion is extremely
doubtful, but let's fix it anyway (until we decide that we'll always
support a VGIC).

Reported-by: Michele Paolino <m.paolino@virtualopensystems.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 6cbde8253a8143ada18ec0d1711230747a7c1934)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index be85127bfed3..f27000f55a83 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -171,6 +171,11 @@ static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 add
 	return 0;
 }
 
+static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
+{
+	return -ENXIO;
+}
+
 static inline int kvm_vgic_init(struct kvm *kvm)
 {
 	return 0;

From 1fd8c219ef68ef56a4a0b0effa66c38e409697eb Mon Sep 17 00:00:00 2001
From: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Date: Fri, 7 Mar 2014 08:49:25 +0000
Subject: [PATCH 0590/1185] arm64: Add boot time configuration of Intermediate
 Physical Address size

ARMv8 supports a range of physical address bit sizes. The PARange bits
from ID_AA64MMFR0_EL1 register are read during boot-time and the
intermediate physical address size bits are written in the translation
control registers (TCR_EL1 and VTCR_EL2).

There is no change in the VA bits and levels of translation.

Signed-off-by: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Reviewed-by: Will Deacon <Will.deacon@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 87366d8cf7b3f6dc34633938aa8766e5a390ce33)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h       | 15 ++++++---------
 arch/arm64/include/asm/pgtable-hwdef.h |  5 ++---
 arch/arm64/kvm/hyp-init.S              |  6 ++++++
 arch/arm64/mm/proc.S                   |  8 +++++++-
 4 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 00fbaa75dc7b..3d6903006a8a 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -107,7 +107,6 @@
 
 /* VTCR_EL2 Registers bits */
 #define VTCR_EL2_PS_MASK	(7 << 16)
-#define VTCR_EL2_PS_40B		(2 << 16)
 #define VTCR_EL2_TG0_MASK	(1 << 14)
 #define VTCR_EL2_TG0_4K		(0 << 14)
 #define VTCR_EL2_TG0_64K	(1 << 14)
@@ -130,10 +129,9 @@
  * 64kB pages (TG0 = 1)
  * 2 level page tables (SL = 1)
  */
-#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
-				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
-				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
-				 VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
+				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
 #define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
 #else
 /*
@@ -143,10 +141,9 @@
  * 4kB pages (TG0 = 0)
  * 3 level page tables (SL = 1)
  */
-#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
-				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
-				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
-				 VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
+				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
 #define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
 #endif
 
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index b1d2e26c3c88..f7af66b54cb2 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -100,9 +100,9 @@
 #define PTE_HYP			PTE_USER
 
 /*
- * 40-bit physical address supported.
+ * Highest possible physical address supported.
  */
-#define PHYS_MASK_SHIFT		(40)
+#define PHYS_MASK_SHIFT		(48)
 #define PHYS_MASK		((UL(1) << PHYS_MASK_SHIFT) - 1)
 
 /*
@@ -122,7 +122,6 @@
 #define TCR_SHARED		((UL(3) << 12) | (UL(3) << 28))
 #define TCR_TG0_64K		(UL(1) << 14)
 #define TCR_TG1_64K		(UL(1) << 30)
-#define TCR_IPS_40BIT		(UL(2) << 32)
 #define TCR_ASID16		(UL(1) << 36)
 #define TCR_TBI0		(UL(1) << 37)
 
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 2b0244d65c16..d968796f4b2d 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -68,6 +68,12 @@ __do_hyp_init:
 	msr	tcr_el2, x4
 
 	ldr	x4, =VTCR_EL2_FLAGS
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
+	 * VTCR_EL2.
+	 */
+	mrs	x5, ID_AA64MMFR0_EL1
+	bfi	x4, x5, #16, #3
 	msr	vtcr_el2, x4
 
 	mrs	x4, mair_el1
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 8e0158f198d7..55a40f6dbf78 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -140,8 +140,14 @@ ENTRY(__cpu_setup)
 	 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
 	 * both user and kernel.
 	 */
-	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \
+	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \
 		      TCR_ASID16 | TCR_TBI0 | (1 << 31)
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
+	 * TCR_EL1.
+	 */
+	mrs	x9, ID_AA64MMFR0_EL1
+	bfi	x10, x9, #32, #3
 #ifdef CONFIG_ARM64_64K_PAGES
 	orr	x10, x10, TCR_TG0_64K
 	orr	x10, x10, TCR_TG1_64K

From 7fe3c71dbc4c635cc5e4b527b748749fc771eb57 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Mon, 7 Apr 2014 01:36:08 +0800
Subject: [PATCH 0591/1185] arm, kvm: fix double lock on cpu_add_remove_lock

Commit 8146875de7d4 (arm, kvm: Fix CPU hotplug callback registration)
holds the lock before calling the two functions:

	kvm_vgic_hyp_init()
	kvm_timer_hyp_init()

and both the two functions are calling register_cpu_notifier()
to register cpu notifier, so cause double lock on cpu_add_remove_lock.

Considered that both two functions are only called inside
kvm_arch_init() with holding cpu_add_remove_lock, so simply use
__register_cpu_notifier() to fix the problem.

Fixes: 8146875de7d4 (arm, kvm: Fix CPU hotplug callback registration)
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 553f809e23f00976caea7a1ebdabaa58a6383e7d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c | 2 +-
 virt/kvm/arm/vgic.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 5081e809821f..22fa819a9b6a 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -277,7 +277,7 @@ int kvm_timer_hyp_init(void)
 
 	host_vtimer_irq = ppi;
 
-	err = register_cpu_notifier(&kvm_timer_cpu_nb);
+	err = __register_cpu_notifier(&kvm_timer_cpu_nb);
 	if (err) {
 		kvm_err("Cannot register timer CPU notifier\n");
 		goto out_free;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 8ca405cd7c1a..47b29834a6b6 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1496,7 +1496,7 @@ int kvm_vgic_hyp_init(void)
 		goto out;
 	}
 
-	ret = register_cpu_notifier(&vgic_cpu_nb);
+	ret = __register_cpu_notifier(&vgic_cpu_nb);
 	if (ret) {
 		kvm_err("Cannot register vgic CPU notifier\n");
 		goto out_free_irq;

From 9b937709747526fc1fcac118d605705f78424d3c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:09:21 +0100
Subject: [PATCH 0592/1185] KVM: add kvm_is_error_gpa() helper

It's quite common (in the s390 guest access code) to test if a guest
physical address points to a valid guest memory area or not.
So add a simple helper function in common code, since this might be
of interest for other architectures as well.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
(cherry picked from commit dfeec843fb237d73947e818f961e8d6f0df22b01)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f6801d10e04c..eef946f92c0c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -840,6 +840,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn)
 	return (hpa_t)pfn << PAGE_SHIFT;
 }
 
+static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+{
+	unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+	return kvm_is_error_hva(hva);
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);

From d80e22fea4940bd2ad63648a941620b6fccd59a3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 25 Apr 2014 11:46:04 +0100
Subject: [PATCH 0593/1185] ARM: KVM: disable KVM in Kconfig on big-endian
 systems

KVM currently crashes and burns on big-endian hosts, so don't allow it
to be selected until we've got that fixed.

Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4e4468fac4381b92eb333d94256e7fb8350f3de3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 466bd299b1a8..4be5bb150bdd 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -23,7 +23,7 @@ config KVM
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
-	depends on ARM_VIRT_EXT && ARM_LPAE
+	depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN
 	---help---
 	  Support hosting virtualized guest machines. You will also
 	  need to select one or more of the processor modules below.

From 16b80f7131f5d763118811815439c82e4ac45e50 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Fri, 28 Mar 2014 14:25:19 +0000
Subject: [PATCH 0594/1185] arm: KVM: fix possible misalignment of PGDs and
 bounce page

The kvm/mmu code shared by arm and arm64 uses kalloc() to allocate
a bounce page (if hypervisor init code crosses page boundary) and
hypervisor PGDs. The problem is that kalloc() does not guarantee
the proper alignment. In the case of the bounce page, the page sized
buffer allocated may also cross a page boundary negating the purpose
and leading to a hang during kvm initialization. Likewise the PGDs
allocated may not meet the minimum alignment requirements of the
underlying MMU. This patch uses __get_free_page() to guarantee the
worst case alignment needs of the bounce page and PGDs on both arm
and arm64.

Cc: <stable@vger.kernel.org> # 3.10+
Signed-off-by: Mark Salter <msalter@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 5d4e08c45a6cf8f1ab3c7fa375007635ac569165)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 80bb1e6c2c29..16f804938b8f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -42,6 +42,8 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
+#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+
 #define kvm_pmd_huge(_x)	(pmd_huge(_x) || pmd_trans_huge(_x))
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
@@ -293,14 +295,14 @@ void free_boot_hyp_pgd(void)
 	if (boot_hyp_pgd) {
 		unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
 		unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
-		kfree(boot_hyp_pgd);
+		free_pages((unsigned long)boot_hyp_pgd, pgd_order);
 		boot_hyp_pgd = NULL;
 	}
 
 	if (hyp_pgd)
 		unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 
-	kfree(init_bounce_page);
+	free_page((unsigned long)init_bounce_page);
 	init_bounce_page = NULL;
 
 	mutex_unlock(&kvm_hyp_pgd_mutex);
@@ -330,7 +332,7 @@ void free_hyp_pgds(void)
 		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
 			unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 
-		kfree(hyp_pgd);
+		free_pages((unsigned long)hyp_pgd, pgd_order);
 		hyp_pgd = NULL;
 	}
 
@@ -1024,7 +1026,7 @@ int kvm_mmu_init(void)
 		size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
 		phys_addr_t phys_base;
 
-		init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		init_bounce_page = (void *)__get_free_page(GFP_KERNEL);
 		if (!init_bounce_page) {
 			kvm_err("Couldn't allocate HYP init bounce page\n");
 			err = -ENOMEM;
@@ -1050,8 +1052,9 @@ int kvm_mmu_init(void)
 			 (unsigned long)phys_base);
 	}
 
-	hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
-	boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+
 	if (!hyp_pgd || !boot_hyp_pgd) {
 		kvm_err("Hyp mode PGD not allocated\n");
 		err = -ENOMEM;

From 738a77ae39541495f4d5396ed093bccdebc589f1 Mon Sep 17 00:00:00 2001
From: Haibin Wang <wanghaibin.wang@huawei.com>
Date: Thu, 10 Apr 2014 13:14:32 +0100
Subject: [PATCH 0595/1185] KVM: ARM: vgic: Fix sgi dispatch problem

When dispatch SGI(mode == 0), that is the vcpu of VM should send
sgi to the cpu which the target_cpus list.
So, there must add the "break" to branch of case 0.

Cc: <stable@vger.kernel.org> # 3.10+
Signed-off-by: Haibin Wang <wanghaibin.wang@huawei.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 91021a6c8ffdc55804dab5acdfc7de4f278b9ac3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 47b29834a6b6..7e8b44efb739 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -916,6 +916,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 	case 0:
 		if (!target_cpus)
 			return;
+		break;
 
 	case 1:
 		target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;

From 6e349e33cec7724f9ae0520a1539ab7cf1d24893 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 21 Apr 2014 15:26:01 +0200
Subject: [PATCH 0596/1185] KVM: async_pf: mm->mm_users can not pin apf->mm

get_user_pages(mm) is simply wrong if mm->mm_users == 0 and exit_mmap/etc
was already called (or is in progress), mm->mm_count can only pin mm->pgd
and mm_struct itself.

Change kvm_setup_async_pf/async_pf_execute to inc/dec mm->mm_users.

kvm_create_vm/kvm_destroy_vm play with ->mm_count too but this case looks
fine at first glance, it seems that this ->mm is only used to verify that
current->mm == kvm->mm.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 41c22f626254b9dc0376928cae009e73d1b6a49a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 10df100c4514..06e6401d6ef4 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -101,7 +101,7 @@ static void async_pf_execute(struct work_struct *work)
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
 
-	mmdrop(mm);
+	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
 }
 
@@ -118,7 +118,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 		flush_work(&work->work);
 #else
 		if (cancel_work_sync(&work->work)) {
-			mmdrop(work->mm);
+			mmput(work->mm);
 			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
 			kmem_cache_free(async_pf_cache, work);
 		}
@@ -183,7 +183,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 	work->addr = hva;
 	work->arch = *arch;
 	work->mm = current->mm;
-	atomic_inc(&work->mm->mm_count);
+	atomic_inc(&work->mm->mm_users);
 	kvm_get_kvm(work->vcpu->kvm);
 
 	/* this can't really happen otherwise gfn_to_pfn_async
@@ -201,7 +201,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 	return 1;
 retry_sync:
 	kvm_put_kvm(work->vcpu->kvm);
-	mmdrop(work->mm);
+	mmput(work->mm);
 	kmem_cache_free(async_pf_cache, work);
 	return 0;
 }

From bc38de2312a2a078f2d5ad596888a131d811b143 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Fri, 11 Apr 2014 00:07:18 +0200
Subject: [PATCH 0597/1185] KVM: arm/arm64: vgic: fix GICD_ICFGR register
 accesses

Since KVM internally represents the ICFGR registers by stuffing two
of them into one word, the offset for accessing the internal
representation and the one for the MMIO based access are different.
So keep the original offset around, but adjust the internal array
offset by one bit.

Reported-by: Haibin Wang <wanghaibin.wang@huawei.com>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit f2ae85b2ab3776b9e4e42e5b6fa090f40d396794)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 7e8b44efb739..f9af48c9eb37 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -548,11 +548,10 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
 	u32 val;
 	u32 *reg;
 
-	offset >>= 1;
 	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
-				  vcpu->vcpu_id, offset);
+				  vcpu->vcpu_id, offset >> 1);
 
-	if (offset & 2)
+	if (offset & 4)
 		val = *reg >> 16;
 	else
 		val = *reg & 0xffff;
@@ -561,13 +560,13 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
 	vgic_reg_access(mmio, &val, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
 	if (mmio->is_write) {
-		if (offset < 4) {
+		if (offset < 8) {
 			*reg = ~0U; /* Force PPIs/SGIs to 1 */
 			return false;
 		}
 
 		val = vgic_cfg_compress(val);
-		if (offset & 2) {
+		if (offset & 4) {
 			*reg &= 0xffff;
 			*reg |= val << 16;
 		} else {

From 2a1992b43b4e9f453e77e62740181ef665426c93 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 21 Apr 2014 15:25:58 +0200
Subject: [PATCH 0598/1185] KVM: async_pf: kill the unnecessary use_mm/unuse_mm
 async_pf_execute()

async_pf_execute() has no reasons to adopt apf->mm, gup(current, mm)
should work just fine even if current has another or NULL ->mm.

Recently kvm_async_page_present_sync() was added insedie the "use_mm"
section, but it seems that it doesn't need current->mm too.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d72d946d0b649b79709b99b9d5cb7269fff8afaa)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 06e6401d6ef4..cda703e512d3 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,12 +80,10 @@ static void async_pf_execute(struct work_struct *work)
 
 	might_sleep();
 
-	use_mm(mm);
 	down_read(&mm->mmap_sem);
 	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
 	kvm_async_page_present_sync(vcpu, apf);
-	unuse_mm(mm);
 
 	spin_lock(&vcpu->async_pf.lock);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);

From 4c158ade8e722cd59ba1a6d8ca46be8427c8ac90 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 28 Apr 2014 17:03:00 +0200
Subject: [PATCH 0599/1185] KVM: async_pf: change async_pf_execute() to use
 get_user_pages(tsk => NULL)

async_pf_execute() passes tsk == current to gup(), this is doesn't
hurt but unnecessary and misleading. "tsk" is only used to account
the number of faults and current is the random workqueue thread.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e9545b9f8aeb63e05818e4b3250057260bc072aa)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index cda703e512d3..d6a3d0993d88 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -81,7 +81,7 @@ static void async_pf_execute(struct work_struct *work)
 	might_sleep();
 
 	down_read(&mm->mmap_sem);
-	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
+	get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
 	kvm_async_page_present_sync(vcpu, apf);
 

From de055bd5eb6f2daf69c81f6f62241f0b977a2ae4 Mon Sep 17 00:00:00 2001
From: Haibin Wang <wanghaibin.wang@huawei.com>
Date: Tue, 29 Apr 2014 14:49:17 +0800
Subject: [PATCH 0600/1185] KVM: ARM: vgic: Fix the overlap check action about
 setting the GICD & GICC base address.

Currently below check in vgic_ioaddr_overlap will always succeed,
because the vgic dist base and vgic cpu base are still kept UNDEF
after initialization. The code as follows will be return forever.

	if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
                return 0;

So, before invoking the vgic_ioaddr_overlap, it needs to set the
corresponding base address firstly.

Signed-off-by: Haibin Wang <wanghaibin.wang@huawei.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 30c2117085bc4e05d091cee6eba79f069b41a9cd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index f9af48c9eb37..56ff9bebb577 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1667,10 +1667,11 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
 	if (addr + size < addr)
 		return -EINVAL;
 
+	*ioaddr = addr;
 	ret = vgic_ioaddr_overlap(kvm);
 	if (ret)
-		return ret;
-	*ioaddr = addr;
+		*ioaddr = VGIC_ADDR_UNDEF;
+
 	return ret;
 }
 

From 7d46aca1cf97c1ff323d799fd82ffd49bc294bd1 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:14 +0530
Subject: [PATCH 0601/1185] KVM: Add capability to advertise PSCI v0.2 support

User space (i.e. QEMU or KVMTOOL) should be able to check whether KVM
ARM/ARM64 supports in-kernel PSCI v0.2 emulation. For this purpose, we
define KVM_CAP_ARM_PSCI_0_2 in KVM user space interface header.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 717abd208dff75b343243aa5ed688f62190dda5e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/uapi/linux/kvm.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 56347ab54cf7..cb7ebcc7a9db 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -669,6 +669,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_IRQ_XICS 92
 #define KVM_CAP_ARM_EL1_32BIT 93
 #define KVM_CAP_EXT_EMUL_CPUID 95
+#define KVM_CAP_ARM_PSCI_0_2 102
 
 #ifdef KVM_CAP_IRQ_ROUTING
 

From 041d3f918f8daf3715fc87f1516c40466f94e6dd Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:15 +0530
Subject: [PATCH 0602/1185] ARM/ARM64: KVM: Add common header for PSCI related
 defines

We need a common place to share PSCI related defines among ARM kernel,
ARM64 kernel, KVM ARM/ARM64 PSCI emulation, and user space.

We introduce uapi/linux/psci.h for this purpose. This newly added
header will be first used by KVM ARM/ARM64 in-kernel PSCI emulation
and user space (i.e. QEMU or KVMTOOL).

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e546eea74ec66698e29c583639cf6e2a11e46490)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/uapi/linux/Kbuild |  1 +
 include/uapi/linux/psci.h | 90 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)
 create mode 100644 include/uapi/linux/psci.h

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index bdc6e87ff3eb..405887bec8b3 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -311,6 +311,7 @@ header-y += ppp-ioctl.h
 header-y += ppp_defs.h
 header-y += pps.h
 header-y += prctl.h
+header-y += psci.h
 header-y += ptp_clock.h
 header-y += ptrace.h
 header-y += qnx4_fs.h
diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
new file mode 100644
index 000000000000..310d83e0a91b
--- /dev/null
+++ b/include/uapi/linux/psci.h
@@ -0,0 +1,90 @@
+/*
+ * ARM Power State and Coordination Interface (PSCI) header
+ *
+ * This header holds common PSCI defines and macros shared
+ * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space.
+ *
+ * Copyright (C) 2014 Linaro Ltd.
+ * Author: Anup Patel <anup.patel@linaro.org>
+ */
+
+#ifndef _UAPI_LINUX_PSCI_H
+#define _UAPI_LINUX_PSCI_H
+
+/*
+ * PSCI v0.1 interface
+ *
+ * The PSCI v0.1 function numbers are implementation defined.
+ *
+ * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED,
+ * INVALID_PARAMS, and DENIED defined below are applicable
+ * to PSCI v0.1.
+ */
+
+/* PSCI v0.2 interface */
+#define PSCI_0_2_FN_BASE			0x84000000
+#define PSCI_0_2_FN(n)				(PSCI_0_2_FN_BASE + (n))
+#define PSCI_0_2_64BIT				0x40000000
+#define PSCI_0_2_FN64_BASE			\
+					(PSCI_0_2_FN_BASE + PSCI_0_2_64BIT)
+#define PSCI_0_2_FN64(n)			(PSCI_0_2_FN64_BASE + (n))
+
+#define PSCI_0_2_FN_PSCI_VERSION		PSCI_0_2_FN(0)
+#define PSCI_0_2_FN_CPU_SUSPEND			PSCI_0_2_FN(1)
+#define PSCI_0_2_FN_CPU_OFF			PSCI_0_2_FN(2)
+#define PSCI_0_2_FN_CPU_ON			PSCI_0_2_FN(3)
+#define PSCI_0_2_FN_AFFINITY_INFO		PSCI_0_2_FN(4)
+#define PSCI_0_2_FN_MIGRATE			PSCI_0_2_FN(5)
+#define PSCI_0_2_FN_MIGRATE_INFO_TYPE		PSCI_0_2_FN(6)
+#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU		PSCI_0_2_FN(7)
+#define PSCI_0_2_FN_SYSTEM_OFF			PSCI_0_2_FN(8)
+#define PSCI_0_2_FN_SYSTEM_RESET		PSCI_0_2_FN(9)
+
+#define PSCI_0_2_FN64_CPU_SUSPEND		PSCI_0_2_FN64(1)
+#define PSCI_0_2_FN64_CPU_ON			PSCI_0_2_FN64(3)
+#define PSCI_0_2_FN64_AFFINITY_INFO		PSCI_0_2_FN64(4)
+#define PSCI_0_2_FN64_MIGRATE			PSCI_0_2_FN64(5)
+#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU	PSCI_0_2_FN64(7)
+
+/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
+#define PSCI_0_2_POWER_STATE_ID_MASK		0xffff
+#define PSCI_0_2_POWER_STATE_ID_SHIFT		0
+#define PSCI_0_2_POWER_STATE_TYPE_SHIFT		16
+#define PSCI_0_2_POWER_STATE_TYPE_MASK		\
+				(0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+#define PSCI_0_2_POWER_STATE_AFFL_SHIFT		24
+#define PSCI_0_2_POWER_STATE_AFFL_MASK		\
+				(0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+
+/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */
+#define PSCI_0_2_AFFINITY_LEVEL_ON		0
+#define PSCI_0_2_AFFINITY_LEVEL_OFF		1
+#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING	2
+
+/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */
+#define PSCI_0_2_TOS_UP_MIGRATE			0
+#define PSCI_0_2_TOS_UP_NO_MIGRATE		1
+#define PSCI_0_2_TOS_MP				2
+
+/* PSCI version decoding (independent of PSCI version) */
+#define PSCI_VERSION_MAJOR_SHIFT		16
+#define PSCI_VERSION_MINOR_MASK			\
+		((1U << PSCI_VERSION_MAJOR_SHIFT) - 1)
+#define PSCI_VERSION_MAJOR_MASK			~PSCI_VERSION_MINOR_MASK
+#define PSCI_VERSION_MAJOR(ver)			\
+		(((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT)
+#define PSCI_VERSION_MINOR(ver)			\
+		((ver) & PSCI_VERSION_MINOR_MASK)
+
+/* PSCI return values (inclusive of all PSCI versions) */
+#define PSCI_RET_SUCCESS			0
+#define PSCI_RET_NOT_SUPPORTED			-1
+#define PSCI_RET_INVALID_PARAMS			-2
+#define PSCI_RET_DENIED				-3
+#define PSCI_RET_ALREADY_ON			-4
+#define PSCI_RET_ON_PENDING			-5
+#define PSCI_RET_INTERNAL_FAILURE		-6
+#define PSCI_RET_NOT_PRESENT			-7
+#define PSCI_RET_DISABLED			-8
+
+#endif /* _UAPI_LINUX_PSCI_H */

From 4ccf6abe18fc14863159b2c51a4958214b4c13dc Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:16 +0530
Subject: [PATCH 0603/1185] ARM/ARM64: KVM: Add base for PSCI v0.2 emulation

Currently, the in-kernel PSCI emulation provides PSCI v0.1 interface to
VCPUs. This patch extends current in-kernel PSCI emulation to provide
PSCI v0.2 interface to VCPUs.

By default, ARM/ARM64 KVM will always provide PSCI v0.1 interface for
keeping the ABI backward-compatible.

To select PSCI v0.2 interface for VCPUs, the user space (i.e. QEMU or
KVMTOOL) will have to set KVM_ARM_VCPU_PSCI_0_2 feature when doing VCPU
init using KVM_ARM_VCPU_INIT ioctl.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 7d0f84aae9e231930985eaff63ac91b61aaa15d6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   |   2 +-
 arch/arm/include/asm/kvm_psci.h   |   4 ++
 arch/arm/include/uapi/asm/kvm.h   |  10 +--
 arch/arm/kvm/psci.c               | 105 +++++++++++++++++++++++-------
 arch/arm64/include/asm/kvm_host.h |   2 +-
 arch/arm64/include/asm/kvm_psci.h |   4 ++
 arch/arm64/include/uapi/asm/kvm.h |  10 +--
 7 files changed, 105 insertions(+), 32 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 09af14999c9b..193ceaf01bfd 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -36,7 +36,7 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HAVE_ONE_REG
 
-#define KVM_VCPU_MAX_FEATURES 1
+#define KVM_VCPU_MAX_FEATURES 2
 
 #include <kvm/arm_vgic.h>
 
diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
index 9a83d98bf170..4c0e3e1d1597 100644
--- a/arch/arm/include/asm/kvm_psci.h
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
 #ifndef __ARM_KVM_PSCI_H__
 #define __ARM_KVM_PSCI_H__
 
+#define KVM_ARM_PSCI_0_1	1
+#define KVM_ARM_PSCI_0_2	2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
 bool kvm_psci_call(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index ef0c8785ba16..e6ebdd3471e5 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -20,6 +20,7 @@
 #define __ARM_KVM_H__
 
 #include <linux/types.h>
+#include <linux/psci.h>
 #include <asm/ptrace.h>
 
 #define __KVM_HAVE_GUEST_DEBUG
@@ -83,6 +84,7 @@ struct kvm_regs {
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
 
 struct kvm_vcpu_init {
 	__u32 target;
@@ -201,9 +203,9 @@ struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
 
-#define KVM_PSCI_RET_SUCCESS		0
-#define KVM_PSCI_RET_NI			((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
 
 #endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 448f60e8d23c..8c42596cdbdf 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -59,7 +59,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	 * turned off.
 	 */
 	if (!vcpu || !vcpu->arch.pause)
-		return KVM_PSCI_RET_INVAL;
+		return PSCI_RET_INVALID_PARAMS;
 
 	target_pc = *vcpu_reg(source_vcpu, 2);
 
@@ -82,7 +82,82 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	wq = kvm_arch_vcpu_wq(vcpu);
 	wake_up_interruptible(wq);
 
-	return KVM_PSCI_RET_SUCCESS;
+	return PSCI_RET_SUCCESS;
+}
+
+int kvm_psci_version(struct kvm_vcpu *vcpu)
+{
+	if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+		return KVM_ARM_PSCI_0_2;
+
+	return KVM_ARM_PSCI_0_1;
+}
+
+static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
+{
+	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long val;
+
+	switch (psci_fn) {
+	case PSCI_0_2_FN_PSCI_VERSION:
+		/*
+		 * Bits[31:16] = Major Version = 0
+		 * Bits[15:0] = Minor Version = 2
+		 */
+		val = 2;
+		break;
+	case PSCI_0_2_FN_CPU_OFF:
+		kvm_psci_vcpu_off(vcpu);
+		val = PSCI_RET_SUCCESS;
+		break;
+	case PSCI_0_2_FN_CPU_ON:
+	case PSCI_0_2_FN64_CPU_ON:
+		val = kvm_psci_vcpu_on(vcpu);
+		break;
+	case PSCI_0_2_FN_CPU_SUSPEND:
+	case PSCI_0_2_FN_AFFINITY_INFO:
+	case PSCI_0_2_FN_MIGRATE:
+	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
+	case PSCI_0_2_FN_SYSTEM_OFF:
+	case PSCI_0_2_FN_SYSTEM_RESET:
+	case PSCI_0_2_FN64_CPU_SUSPEND:
+	case PSCI_0_2_FN64_AFFINITY_INFO:
+	case PSCI_0_2_FN64_MIGRATE:
+	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	default:
+		return false;
+	}
+
+	*vcpu_reg(vcpu, 0) = val;
+	return true;
+}
+
+static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
+{
+	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long val;
+
+	switch (psci_fn) {
+	case KVM_PSCI_FN_CPU_OFF:
+		kvm_psci_vcpu_off(vcpu);
+		val = PSCI_RET_SUCCESS;
+		break;
+	case KVM_PSCI_FN_CPU_ON:
+		val = kvm_psci_vcpu_on(vcpu);
+		break;
+	case KVM_PSCI_FN_CPU_SUSPEND:
+	case KVM_PSCI_FN_MIGRATE:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	default:
+		return false;
+	}
+
+	*vcpu_reg(vcpu, 0) = val;
+	return true;
 }
 
 /**
@@ -97,26 +172,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
  */
 bool kvm_psci_call(struct kvm_vcpu *vcpu)
 {
-	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
-	unsigned long val;
-
-	switch (psci_fn) {
-	case KVM_PSCI_FN_CPU_OFF:
-		kvm_psci_vcpu_off(vcpu);
-		val = KVM_PSCI_RET_SUCCESS;
-		break;
-	case KVM_PSCI_FN_CPU_ON:
-		val = kvm_psci_vcpu_on(vcpu);
-		break;
-	case KVM_PSCI_FN_CPU_SUSPEND:
-	case KVM_PSCI_FN_MIGRATE:
-		val = KVM_PSCI_RET_NI;
-		break;
-
+	switch (kvm_psci_version(vcpu)) {
+	case KVM_ARM_PSCI_0_2:
+		return kvm_psci_0_2_call(vcpu);
+	case KVM_ARM_PSCI_0_1:
+		return kvm_psci_0_1_call(vcpu);
 	default:
 		return false;
-	}
-
-	*vcpu_reg(vcpu, 0) = val;
-	return true;
+	};
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0a1d69751562..92242ce06309 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -39,7 +39,7 @@
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 
-#define KVM_VCPU_MAX_FEATURES 2
+#define KVM_VCPU_MAX_FEATURES 3
 
 struct kvm_vcpu;
 int kvm_target_cpu(void);
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
index e301a4816355..e25c658a757b 100644
--- a/arch/arm64/include/asm/kvm_psci.h
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
 #ifndef __ARM64_KVM_PSCI_H__
 #define __ARM64_KVM_PSCI_H__
 
+#define KVM_ARM_PSCI_0_1	1
+#define KVM_ARM_PSCI_0_2	2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
 bool kvm_psci_call(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index eaf54a30bedc..e6471daf3fb5 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -31,6 +31,7 @@
 #define KVM_NR_SPSR	5
 
 #ifndef __ASSEMBLY__
+#include <linux/psci.h>
 #include <asm/types.h>
 #include <asm/ptrace.h>
 
@@ -77,6 +78,7 @@ struct kvm_regs {
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
 
 struct kvm_vcpu_init {
 	__u32 target;
@@ -186,10 +188,10 @@ struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
 
-#define KVM_PSCI_RET_SUCCESS		0
-#define KVM_PSCI_RET_NI			((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
 
 #endif
 

From c6debeb5f9cf9a1c1f8867d991563302797e20c3 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:18 +0530
Subject: [PATCH 0604/1185] ARM/ARM64: KVM: Make kvm_psci_call() return
 convention more flexible

Currently, the kvm_psci_call() returns 'true' or 'false' based on whether
the PSCI function call was handled successfully or not. This does not help
us emulate system-level PSCI functions where the actual emulation work will
be done by user space (QEMU or KVMTOOL). Examples of such system-level PSCI
functions are: PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET.

This patch updates kvm_psci_call() to return three types of values:
1) > 0 (success)
2) = 0 (success but exit to user space)
3) < 0 (errors)

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e8e7fcc5e2710b31ef842ee799db99c07986c364)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_psci.h   |  2 +-
 arch/arm/kvm/handle_exit.c        | 10 +++++++---
 arch/arm/kvm/psci.c               | 28 ++++++++++++++++------------
 arch/arm64/include/asm/kvm_psci.h |  2 +-
 arch/arm64/kvm/handle_exit.c      | 12 ++++++++----
 5 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
index 4c0e3e1d1597..6bda945d31fa 100644
--- a/arch/arm/include/asm/kvm_psci.h
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -22,6 +22,6 @@
 #define KVM_ARM_PSCI_0_2	2
 
 int kvm_psci_version(struct kvm_vcpu *vcpu);
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 0de91fc6de0f..4c979d466cc1 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -38,14 +38,18 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
+	int ret;
+
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 		      kvm_vcpu_hvc_get_imm(vcpu));
 
-	if (kvm_psci_call(vcpu))
+	ret = kvm_psci_call(vcpu);
+	if (ret < 0) {
+		kvm_inject_undefined(vcpu);
 		return 1;
+	}
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	return ret;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 8c42596cdbdf..14e6fa6c8e35 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -93,7 +93,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu)
 	return KVM_ARM_PSCI_0_1;
 }
 
-static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
+static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 {
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
@@ -128,14 +128,14 @@ static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		val = PSCI_RET_NOT_SUPPORTED;
 		break;
 	default:
-		return false;
+		return -EINVAL;
 	}
 
 	*vcpu_reg(vcpu, 0) = val;
-	return true;
+	return 1;
 }
 
-static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
+static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 {
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
@@ -153,11 +153,11 @@ static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 		val = PSCI_RET_NOT_SUPPORTED;
 		break;
 	default:
-		return false;
+		return -EINVAL;
 	}
 
 	*vcpu_reg(vcpu, 0) = val;
-	return true;
+	return 1;
 }
 
 /**
@@ -165,12 +165,16 @@ static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
  * @vcpu: Pointer to the VCPU struct
  *
  * Handle PSCI calls from guests through traps from HVC instructions.
- * The calling convention is similar to SMC calls to the secure world where
- * the function number is placed in r0 and this function returns true if the
- * function number specified in r0 is withing the PSCI range, and false
- * otherwise.
+ * The calling convention is similar to SMC calls to the secure world
+ * where the function number is placed in r0.
+ *
+ * This function returns: > 0 (success), 0 (success but exit to user
+ * space), and < 0 (errors)
+ *
+ * Errors:
+ * -EINVAL: Unrecognized PSCI function
  */
-bool kvm_psci_call(struct kvm_vcpu *vcpu)
+int kvm_psci_call(struct kvm_vcpu *vcpu)
 {
 	switch (kvm_psci_version(vcpu)) {
 	case KVM_ARM_PSCI_0_2:
@@ -178,6 +182,6 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)
 	case KVM_ARM_PSCI_0_1:
 		return kvm_psci_0_1_call(vcpu);
 	default:
-		return false;
+		return -EINVAL;
 	};
 }
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
index e25c658a757b..bc39e557c56c 100644
--- a/arch/arm64/include/asm/kvm_psci.h
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -22,6 +22,6 @@
 #define KVM_ARM_PSCI_0_2	2
 
 int kvm_psci_version(struct kvm_vcpu *vcpu);
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 7bc41eab4c64..182415e1a952 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -30,11 +30,15 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
-		return 1;
+	int ret;
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	ret = kvm_psci_call(vcpu);
+	if (ret < 0) {
+		kvm_inject_undefined(vcpu);
+		return 1;
+	}
+
+	return ret;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)

From 7b0aa85882b902333c01f07bef88da07e5c53ebf Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:19 +0530
Subject: [PATCH 0605/1185] KVM: Add KVM_EXIT_SYSTEM_EVENT to user space API
 header

Currently, we don't have an exit reason to notify user space about
a system-level event (for e.g. system reset or shutdown) triggered
by the VCPU. This patch adds exit reason KVM_EXIT_SYSTEM_EVENT for
this purpose. We can also inform user space about the 'type' and
architecture specific 'flags' of a system-level event using the
kvm_run structure.

This newly added KVM_EXIT_SYSTEM_EVENT will be used by KVM ARM/ARM64
in-kernel PSCI v0.2 support to reset/shutdown VMs.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 8ad6b634928a25971dc42dce101808b1491f87ec)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt | 15 +++++++++++++++
 include/uapi/linux/kvm.h          |  8 ++++++++
 2 files changed, 23 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 3c75a17555a8..48e7888291f5 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2636,6 +2636,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
 external interrupt has just been delivered into the guest. User space
 should put the acknowledged interrupt vector into the 'epr' field.
 
+		/* KVM_EXIT_SYSTEM_EVENT */
+		struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN       1
+#define KVM_SYSTEM_EVENT_RESET          2
+			__u32 type;
+			__u64 flags;
+		} system_event;
+
+If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
+a system-level event using some architecture specific mechanism (hypercall
+or some special instruction). In case of ARM/ARM64, this is triggered using
+HVC instruction based PSCI call from the vcpu. The 'type' field describes
+the system-level event type. The 'flags' field describes architecture
+specific flags for the system-level event.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cb7ebcc7a9db..0a70874e4a63 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -171,6 +171,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_WATCHDOG         21
 #define KVM_EXIT_S390_TSCH        22
 #define KVM_EXIT_EPR              23
+#define KVM_EXIT_SYSTEM_EVENT     24
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -301,6 +302,13 @@ struct kvm_run {
 		struct {
 			__u32 epr;
 		} epr;
+		/* KVM_EXIT_SYSTEM_EVENT */
+		struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN       1
+#define KVM_SYSTEM_EVENT_RESET          2
+			__u32 type;
+			__u64 flags;
+		} system_event;
 		/* Fix the size of the union. */
 		char padding[256];
 	};

From 2834b56a4051c66feb99c5546723fd6afa434d0e Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:20 +0530
Subject: [PATCH 0606/1185] ARM/ARM64: KVM: Emulate PSCI v0.2 SYSTEM_OFF and
 SYSTEM_RESET

The PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET functions are system-level
functions hence cannot be fully emulated by in-kernel PSCI emulation code.

To tackle this, we forward PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET function
calls from vcpu to user space (i.e. QEMU or KVMTOOL) via kvm_run structure
using KVM_EXIT_SYSTEM_EVENT exit reasons.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4b1238269ed340d59ef829fd9c30a39cfb2923a8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 46 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 14e6fa6c8e35..59362131b79f 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -85,6 +85,23 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	return PSCI_RET_SUCCESS;
 }
 
+static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
+{
+	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+	vcpu->run->system_event.type = type;
+	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
+}
+
+static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
+}
+
 int kvm_psci_version(struct kvm_vcpu *vcpu)
 {
 	if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
@@ -95,6 +112,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu)
 
 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 {
+	int ret = 1;
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
 
@@ -114,13 +132,35 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 	case PSCI_0_2_FN64_CPU_ON:
 		val = kvm_psci_vcpu_on(vcpu);
 		break;
+	case PSCI_0_2_FN_SYSTEM_OFF:
+		kvm_psci_system_off(vcpu);
+		/*
+		 * We should'nt be going back to guest VCPU after
+		 * receiving SYSTEM_OFF request.
+		 *
+		 * If user space accidently/deliberately resumes
+		 * guest VCPU after SYSTEM_OFF request then guest
+		 * VCPU should see internal failure from PSCI return
+		 * value. To achieve this, we preload r0 (or x0) with
+		 * PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
+	case PSCI_0_2_FN_SYSTEM_RESET:
+		kvm_psci_system_reset(vcpu);
+		/*
+		 * Same reason as SYSTEM_OFF for preloading r0 (or x0)
+		 * with PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
 	case PSCI_0_2_FN_CPU_SUSPEND:
 	case PSCI_0_2_FN_AFFINITY_INFO:
 	case PSCI_0_2_FN_MIGRATE:
 	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
 	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
-	case PSCI_0_2_FN_SYSTEM_OFF:
-	case PSCI_0_2_FN_SYSTEM_RESET:
 	case PSCI_0_2_FN64_CPU_SUSPEND:
 	case PSCI_0_2_FN64_AFFINITY_INFO:
 	case PSCI_0_2_FN64_MIGRATE:
@@ -132,7 +172,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 	}
 
 	*vcpu_reg(vcpu, 0) = val;
-	return 1;
+	return ret;
 }
 
 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)

From 4c7726b875f0ae0aa10b553d0ddf1662ca1c8536 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:21 +0530
Subject: [PATCH 0607/1185] ARM/ARM64: KVM: Emulate PSCI v0.2 AFFINITY_INFO

This patch adds emulation of PSCI v0.2 AFFINITY_INFO function call
for KVM ARM/ARM64. This is a VCPU-level function call which will be
used to determine current state of given affinity level.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e6bc13c8a70eabc6a39098ccedf6129c734e3db3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 52 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 59362131b79f..3b6a0cf25c7d 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -27,6 +27,16 @@
  * as described in ARM document number ARM DEN 0022A.
  */
 
+#define AFFINITY_MASK(level)	~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
+
+static unsigned long psci_affinity_mask(unsigned long affinity_level)
+{
+	if (affinity_level <= 3)
+		return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
+
+	return 0;
+}
+
 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pause = true;
@@ -85,6 +95,42 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	return PSCI_RET_SUCCESS;
 }
 
+static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
+{
+	int i;
+	unsigned long mpidr;
+	unsigned long target_affinity;
+	unsigned long target_affinity_mask;
+	unsigned long lowest_affinity_level;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu *tmp;
+
+	target_affinity = *vcpu_reg(vcpu, 1);
+	lowest_affinity_level = *vcpu_reg(vcpu, 2);
+
+	/* Determine target affinity mask */
+	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
+	if (!target_affinity_mask)
+		return PSCI_RET_INVALID_PARAMS;
+
+	/* Ignore other bits of target affinity */
+	target_affinity &= target_affinity_mask;
+
+	/*
+	 * If one or more VCPU matching target affinity are running
+	 * then ON else OFF
+	 */
+	kvm_for_each_vcpu(i, tmp, kvm) {
+		mpidr = kvm_vcpu_get_mpidr(tmp);
+		if (((mpidr & target_affinity_mask) == target_affinity) &&
+		    !tmp->arch.pause) {
+			return PSCI_0_2_AFFINITY_LEVEL_ON;
+		}
+	}
+
+	return PSCI_0_2_AFFINITY_LEVEL_OFF;
+}
+
 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 {
 	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
@@ -132,6 +178,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 	case PSCI_0_2_FN64_CPU_ON:
 		val = kvm_psci_vcpu_on(vcpu);
 		break;
+	case PSCI_0_2_FN_AFFINITY_INFO:
+	case PSCI_0_2_FN64_AFFINITY_INFO:
+		val = kvm_psci_vcpu_affinity_info(vcpu);
+		break;
 	case PSCI_0_2_FN_SYSTEM_OFF:
 		kvm_psci_system_off(vcpu);
 		/*
@@ -157,12 +207,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		ret = 0;
 		break;
 	case PSCI_0_2_FN_CPU_SUSPEND:
-	case PSCI_0_2_FN_AFFINITY_INFO:
 	case PSCI_0_2_FN_MIGRATE:
 	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
 	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
 	case PSCI_0_2_FN64_CPU_SUSPEND:
-	case PSCI_0_2_FN64_AFFINITY_INFO:
 	case PSCI_0_2_FN64_MIGRATE:
 	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
 		val = PSCI_RET_NOT_SUPPORTED;

From f11d09d9c9ce2dcca77dd41fcb16c3811cc513d5 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:22 +0530
Subject: [PATCH 0608/1185] ARM/ARM64: KVM: Emulate PSCI v0.2 MIGRATE_INFO_TYPE
 and related functions

This patch adds emulation of PSCI v0.2 MIGRATE, MIGRATE_INFO_TYPE, and
MIGRATE_INFO_UP_CPU function calls for KVM ARM/ARM64.

KVM ARM/ARM64 being a hypervisor (and not a Trusted OS), we cannot provide
this functions hence we emulate these functions in following way:
1. MIGRATE - Returns "Not Supported"
2. MIGRATE_INFO_TYPE - Return 2 i.e. Trusted OS is not present
3. MIGRATE_INFO_UP_CPU - Returns "Not Supported"

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit bab0b43012a8ad64877fa46134370a7f5c6ce861)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 3b6a0cf25c7d..cce901a510fa 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -182,6 +182,22 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 	case PSCI_0_2_FN64_AFFINITY_INFO:
 		val = kvm_psci_vcpu_affinity_info(vcpu);
 		break;
+	case PSCI_0_2_FN_MIGRATE:
+	case PSCI_0_2_FN64_MIGRATE:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+		/*
+		 * Trusted OS is MP hence does not require migration
+	         * or
+		 * Trusted OS is not present
+		 */
+		val = PSCI_0_2_TOS_MP;
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
+	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
 	case PSCI_0_2_FN_SYSTEM_OFF:
 		kvm_psci_system_off(vcpu);
 		/*
@@ -207,12 +223,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		ret = 0;
 		break;
 	case PSCI_0_2_FN_CPU_SUSPEND:
-	case PSCI_0_2_FN_MIGRATE:
-	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
-	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
 	case PSCI_0_2_FN64_CPU_SUSPEND:
-	case PSCI_0_2_FN64_MIGRATE:
-	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
 		val = PSCI_RET_NOT_SUPPORTED;
 		break;
 	default:

From 69590f71e0ef474e0d44514484308c2eb826306d Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:23 +0530
Subject: [PATCH 0609/1185] ARM/ARM64: KVM: Fix CPU_ON emulation for PSCI v0.2

As-per PSCI v0.2, the source CPU provides physical address of
"entry point" and "context id" for starting a target CPU. Also,
if target CPU is already running then we should return ALREADY_ON.

Current emulation of CPU_ON function does not consider physical
address of "context id" and returns INVALID_PARAMETERS if target
CPU is already running.

This patch updates kvm_psci_vcpu_on() such that it works for both
PSCI v0.1 and PSCI v0.2.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit aa8aeefe5e567637bbec7d7a3031cc057e3af303)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index cce901a510fa..1067579c7336 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -48,6 +48,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	struct kvm_vcpu *vcpu = NULL, *tmp;
 	wait_queue_head_t *wq;
 	unsigned long cpu_id;
+	unsigned long context_id;
 	unsigned long mpidr;
 	phys_addr_t target_pc;
 	int i;
@@ -68,10 +69,17 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	 * Make sure the caller requested a valid CPU and that the CPU is
 	 * turned off.
 	 */
-	if (!vcpu || !vcpu->arch.pause)
+	if (!vcpu)
 		return PSCI_RET_INVALID_PARAMS;
+	if (!vcpu->arch.pause) {
+		if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
+			return PSCI_RET_ALREADY_ON;
+		else
+			return PSCI_RET_INVALID_PARAMS;
+	}
 
 	target_pc = *vcpu_reg(source_vcpu, 2);
+	context_id = *vcpu_reg(source_vcpu, 3);
 
 	kvm_reset_vcpu(vcpu);
 
@@ -86,6 +94,11 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 		kvm_vcpu_set_be(vcpu);
 
 	*vcpu_pc(vcpu) = target_pc;
+	/*
+	 * NOTE: We always update r0 (or x0) because for PSCI v0.1
+	 * the general puspose registers are undefined upon CPU_ON.
+	 */
+	*vcpu_reg(vcpu, 0) = context_id;
 	vcpu->arch.pause = false;
 	smp_mb();		/* Make sure the above is visible */
 

From 6824eda31449e4585085a49739d60e640b1c7583 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:24 +0530
Subject: [PATCH 0610/1185] ARM/ARM64: KVM: Emulate PSCI v0.2 CPU_SUSPEND

This patch adds emulation of PSCI v0.2 CPU_SUSPEND function call for
KVM ARM/ARM64. This is a CPU-level function call which can suspend
current CPU or current CPU cluster. We don't have VCPU clusters in
KVM so we only suspend the current VCPU.

The CPU_SUSPEND emulation is not tested much because currently there
is no CPUIDLE driver in Linux kernel that uses PSCI CPU_SUSPEND. The
PSCI CPU_SUSPEND implementation in ARM64 kernel was tested using a
Simple CPUIDLE driver which is not published due to unstable DT-bindings
for PSCI.
(For more info, http://lwn.net/Articles/574950/)

For simplicity, we implement CPU_SUSPEND emulation similar to WFI
(Wait-for-interrupt) emulation and we also treat power-down request
to be same as stand-by request. This is consistent with section
5.4.1 and section 5.4.2 of PSCI v0.2 specification.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit b376d02b53b87f8684f91f13ba4ee43331850fcd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 1067579c7336..09cf37737ee2 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -37,6 +37,26 @@ static unsigned long psci_affinity_mask(unsigned long affinity_level)
 	return 0;
 }
 
+static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * NOTE: For simplicity, we make VCPU suspend emulation to be
+	 * same-as WFI (Wait-for-interrupt) emulation.
+	 *
+	 * This means for KVM the wakeup events are interrupts and
+	 * this is consistent with intended use of StateID as described
+	 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
+	 *
+	 * Further, we also treat power-down request to be same as
+	 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
+	 * specification (ARM DEN 0022A). This means all suspend states
+	 * for KVM will preserve the register state.
+	 */
+	kvm_vcpu_block(vcpu);
+
+	return PSCI_RET_SUCCESS;
+}
+
 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pause = true;
@@ -183,6 +203,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		 */
 		val = 2;
 		break;
+	case PSCI_0_2_FN_CPU_SUSPEND:
+	case PSCI_0_2_FN64_CPU_SUSPEND:
+		val = kvm_psci_vcpu_suspend(vcpu);
+		break;
 	case PSCI_0_2_FN_CPU_OFF:
 		kvm_psci_vcpu_off(vcpu);
 		val = PSCI_RET_SUCCESS;
@@ -235,10 +259,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		val = PSCI_RET_INTERNAL_FAILURE;
 		ret = 0;
 		break;
-	case PSCI_0_2_FN_CPU_SUSPEND:
-	case PSCI_0_2_FN64_CPU_SUSPEND:
-		val = PSCI_RET_NOT_SUPPORTED;
-		break;
 	default:
 		return -EINVAL;
 	}

From f4aa5773a7ac1dad4bc6fc726b178547289e6045 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:25 +0530
Subject: [PATCH 0611/1185] ARM/ARM64: KVM: Advertise KVM_CAP_ARM_PSCI_0_2 to
 user space

We have PSCI v0.2 emulation available in KVM ARM/ARM64
hence advertise this to user space (i.e. QEMU or KVMTOOL)
via KVM_CHECK_EXTENSION ioctl.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4447a208f7fc2e2dff8c6a8df2a1fd6dd72fb3e2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9804406ff37e..354dc42b6395 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -197,6 +197,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ARM_PSCI:
+	case KVM_CAP_ARM_PSCI_0_2:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:

From 5ab6e0ee8a2375651052456289f6d397f78f737e Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 16 Jan 2014 13:44:20 +0100
Subject: [PATCH 0612/1185] kvm/irqchip: Speed up KVM_SET_GSI_ROUTING

When starting lots of dataplane devices the bootup takes very long on
Christian's s390 with irqfd patches. With larger setups he is even
able to trigger some timeouts in some components.  Turns out that the
KVM_SET_GSI_ROUTING ioctl takes very long (strace claims up to 0.1 sec)
when having multiple CPUs.  This is caused by the  synchronize_rcu and
the HZ=100 of s390.  By changing the code to use a private srcu we can
speed things up.  This patch reduces the boot time till mounting root
from 8 to 2 seconds on my s390 guest with 100 disks.

Uses of hlist_for_each_entry_rcu, hlist_add_head_rcu, hlist_del_init_rcu
are fine because they do not have lockdep checks (hlist_for_each_entry_rcu
uses rcu_dereference_raw rather than rcu_dereference, and write-sides
do not do rcu lockdep at all).

Note that we're hardly relying on the "sleepable" part of srcu.  We just
want SRCU's faster detection of grace periods.

Testing was done by Andrew Theurer using netperf tests STREAM, MAERTS
and RR.  The difference between results "before" and "after" the patch
has mean -0.2% and standard deviation 0.6%.  Using a paired t-test on the
data points says that there is a 2.5% probability that the patch is the
cause of the performance difference (rather than a random fluctuation).

(Restricting the t-test to RR, which is the most likely to be affected,
changes the numbers to respectively -0.3% mean, 0.7% stdev, and 8%
probability that the numbers actually say something about the patch.
The probability increases mostly because there are fewer data points).

Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com> # s390
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 719d93cd5f5c5c8775b7a38192069e8e1d1ac46e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/eventfd.c       | 25 +++++++++++++++----------
 virt/kvm/irq_comm.c      | 17 +++++++++--------
 virt/kvm/irqchip.c       | 31 ++++++++++++++++---------------
 virt/kvm/kvm_main.c      | 16 ++++++++++------
 5 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eef946f92c0c..a3c83491a791 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -346,6 +346,7 @@ struct kvm {
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
 	struct srcu_struct srcu;
+	struct srcu_struct irq_srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 64ee720b75c7..b51c19ffd8fd 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/eventfd.h>
 #include <linux/kernel.h>
+#include <linux/srcu.h>
 #include <linux/slab.h>
 
 #include "iodev.h"
@@ -118,19 +119,22 @@ static void
 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 {
 	struct _irqfd_resampler *resampler;
+	struct kvm *kvm;
 	struct _irqfd *irqfd;
+	int idx;
 
 	resampler = container_of(kian, struct _irqfd_resampler, notifier);
+	kvm = resampler->kvm;
 
-	kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
 		    resampler->notifier.gsi, 0, false);
 
-	rcu_read_lock();
+	idx = srcu_read_lock(&kvm->irq_srcu);
 
 	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
 		eventfd_signal(irqfd->resamplefd, 1);
 
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 static void
@@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
 	mutex_lock(&kvm->irqfds.resampler_lock);
 
 	list_del_rcu(&irqfd->resampler_link);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 
 	if (list_empty(&resampler->list)) {
 		list_del(&resampler->link);
@@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	unsigned long flags = (unsigned long)key;
 	struct kvm_kernel_irq_routing_entry *irq;
 	struct kvm *kvm = irqfd->kvm;
+	int idx;
 
 	if (flags & POLLIN) {
-		rcu_read_lock();
-		irq = rcu_dereference(irqfd->irq_entry);
+		idx = srcu_read_lock(&kvm->irq_srcu);
+		irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu);
 		/* An event has been signaled, inject an interrupt */
 		if (irq)
 			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
 					false);
 		else
 			schedule_work(&irqfd->inject);
-		rcu_read_unlock();
+		srcu_read_unlock(&kvm->irq_srcu, idx);
 	}
 
 	if (flags & POLLHUP) {
@@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		}
 
 		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
-		synchronize_rcu();
+		synchronize_srcu(&kvm->irq_srcu);
 
 		mutex_unlock(&kvm->irqfds.resampler_lock);
 	}
@@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 			 * another thread calls kvm_irq_routing_update before
 			 * we flush workqueue below (we synchronize with
 			 * kvm_irq_routing_update using irqfds.lock).
-			 * It is paired with synchronize_rcu done by caller
+			 * It is paired with synchronize_srcu done by caller
 			 * of that function.
 			 */
 			rcu_assign_pointer(irqfd->irq_entry, NULL);
@@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm)
 
 /*
  * Change irq_routing and irqfd.
- * Caller must invoke synchronize_rcu afterwards.
+ * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  */
 void kvm_irq_routing_update(struct kvm *kvm,
 			    struct kvm_irq_routing_table *irq_rt)
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e2e6b4473a96..ced4a542a031 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
 	struct kvm_irq_routing_table *irq_rt;
+	int idx;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 * Since there's no easy way to do this, we only support injecting MSI
 	 * which is limited to 1:1 GSI mapping.
 	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	if (irq < irq_rt->nr_rt_entries)
 		hlist_for_each_entry(e, &irq_rt->map[irq], link) {
 			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
@@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 				ret = -EWOULDBLOCK;
 			break;
 		}
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return ret;
 }
 
@@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 	mutex_lock(&kvm->irq_lock);
 	hlist_del_rcu(&kimn->link);
 	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask)
 {
 	struct kvm_irq_mask_notifier *kimn;
-	int gsi;
+	int idx, gsi;
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
 			if (kimn->irq == gsi)
 				kimn->func(kimn, mask);
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 20dc9e4a8f6c..b43c275775cd 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -26,6 +26,7 @@
 
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
+#include <linux/srcu.h>
 #include <linux/export.h>
 #include <trace/events/kvm.h>
 #include "irq.h"
@@ -33,19 +34,19 @@
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-	int gsi;
+	int gsi, idx;
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
 			if (kian->gsi == gsi) {
-				rcu_read_unlock();
+				srcu_read_unlock(&kvm->irq_srcu, idx);
 				return true;
 			}
 
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	return false;
 }
@@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-	int gsi;
+	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
 			if (kian->gsi == gsi)
 				kian->irq_acked(kian);
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 	mutex_lock(&kvm->irq_lock);
 	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 #ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
 #endif
@@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status)
 {
 	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
-	int ret = -1, i = 0;
+	int ret = -1, i = 0, idx;
 	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	if (irq < irq_rt->nr_rt_entries)
 		hlist_for_each_entry(e, &irq_rt->map[irq], link)
 			irq_set[i++] = *e;
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
 		int r;
@@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	kvm_irq_routing_update(kvm, new);
 	mutex_unlock(&kvm->irq_lock);
 
-	synchronize_rcu();
+	synchronize_srcu_expedited(&kvm->irq_srcu);
 
 	new = old;
 	r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9f5fab0a4fda..17b107fa7a2b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -456,11 +456,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
-		goto out_err_nodisable;
+		goto out_err_no_disable;
 
 	r = hardware_enable_all();
 	if (r)
-		goto out_err_nodisable;
+		goto out_err_no_disable;
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
@@ -472,10 +472,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	r = -ENOMEM;
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
-		goto out_err_nosrcu;
+		goto out_err_no_srcu;
 	kvm_init_memslots_id(kvm);
 	if (init_srcu_struct(&kvm->srcu))
-		goto out_err_nosrcu;
+		goto out_err_no_srcu;
+	if (init_srcu_struct(&kvm->irq_srcu))
+		goto out_err_no_irq_srcu;
 	for (i = 0; i < KVM_NR_BUSES; i++) {
 		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
 					GFP_KERNEL);
@@ -504,10 +506,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	return kvm;
 
 out_err:
+	cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
 	cleanup_srcu_struct(&kvm->srcu);
-out_err_nosrcu:
+out_err_no_srcu:
 	hardware_disable_all();
-out_err_nodisable:
+out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(kvm->buses[i]);
 	kfree(kvm->memslots);

From ccc916d9aac9d95baab922ab859f0ef37bb22558 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 2 May 2014 16:24:10 +0100
Subject: [PATCH 0613/1185] arm64: barriers: make use of barrier options with
 explicit barriers

When calling our low-level barrier macros directly, we can often suffice
with more relaxed behaviour than the default "all accesses, full system"
option.

This patch updates the users of dsb() to specify the option which they
actually require.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 98f7685ee69f871ba991089cb9685f0da07517ea)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 03244582bc55..c59a1bdab5eb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -71,13 +71,13 @@ static u32 get_ccsidr(u32 csselr)
 static void do_dc_cisw(u32 val)
 {
 	asm volatile("dc cisw, %x0" : : "r" (val));
-	dsb();
+	dsb(ish);
 }
 
 static void do_dc_csw(u32 val)
 {
 	asm volatile("dc csw, %x0" : : "r" (val));
-	dsb();
+	dsb(ish);
 }
 
 /* See note at ARM ARM B1.14.4 */

From 17acc52b78027a728c419a0d7b4d69d30b94294a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 2 May 2014 16:24:14 +0100
Subject: [PATCH 0614/1185] arm64: kvm: use inner-shareable barriers for
 inner-shareable maintenance

In order to ensure completion of inner-shareable maintenance instructions
(cache and TLB) on AArch64, we can use the -ish suffix to the dsb
instruction.

This patch relaxes our dsb sy instructions to dsb ish where possible.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit ee9e101c11478680d579bd20bb38a4d3e2514fe3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 2c56012cb2d2..b0d1512acf08 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -630,9 +630,15 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 	 * whole of Stage-1. Weep...
 	 */
 	tlbi	ipas2e1is, x1
-	dsb	sy
+	/*
+	 * We have to ensure completion of the invalidation at Stage-2,
+	 * since a table walk on another CPU could refill a TLB with a
+	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
+	 * the Stage-1 invalidation happened first.
+	 */
+	dsb	ish
 	tlbi	vmalle1is
-	dsb	sy
+	dsb	ish
 	isb
 
 	msr	vttbr_el2, xzr
@@ -643,7 +649,7 @@ ENTRY(__kvm_flush_vm_context)
 	dsb	ishst
 	tlbi	alle1is
 	ic	ialluis
-	dsb	sy
+	dsb	ish
 	ret
 ENDPROC(__kvm_flush_vm_context)
 

From 502f5822c7ac7b11040d4abb7ca1d09d56de251c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 20 May 2014 18:06:03 +0100
Subject: [PATCH 0615/1185] arm64: KVM: Enable minimalistic support for
 Cortex-A53

In order to allow KVM to run on Cortex-A53 implementations, wire the
minimal support required.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1252b3313642c3d0dff5b951b625468bf0dcd059)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/cputype.h     | 1 +
 arch/arm64/include/uapi/asm/kvm.h    | 3 ++-
 arch/arm64/kvm/guest.c               | 2 ++
 arch/arm64/kvm/sys_regs_generic_v8.c | 2 ++
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5fe138e0b828..343f7f737970 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -41,6 +41,7 @@
 
 #define ARM_CPU_PART_AEM_V8	0xD0F0
 #define ARM_CPU_PART_FOUNDATION	0xD000
+#define ARM_CPU_PART_CORTEX_A53	0xD030
 #define ARM_CPU_PART_CORTEX_A57	0xD070
 
 #define APM_CPU_PART_POTENZA	0x0000
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index e6471daf3fb5..e633ff8cdec8 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -57,8 +57,9 @@ struct kvm_regs {
 #define KVM_ARM_TARGET_FOUNDATION_V8	1
 #define KVM_ARM_TARGET_CORTEX_A57	2
 #define KVM_ARM_TARGET_XGENE_POTENZA	3
+#define KVM_ARM_TARGET_CORTEX_A53	4
 
-#define KVM_ARM_NUM_TARGETS		4
+#define KVM_ARM_NUM_TARGETS		5
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 08745578d54d..60b5c31f3c10 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -214,6 +214,8 @@ int __attribute_const__ kvm_target_cpu(void)
 			return KVM_ARM_TARGET_AEM_V8;
 		case ARM_CPU_PART_FOUNDATION:
 			return KVM_ARM_TARGET_FOUNDATION_V8;
+		case ARM_CPU_PART_CORTEX_A53:
+			return KVM_ARM_TARGET_CORTEX_A53;
 		case ARM_CPU_PART_CORTEX_A57:
 			return KVM_ARM_TARGET_CORTEX_A57;
 		};
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 8fe6f76b0edc..475fd2929310 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -88,6 +88,8 @@ static int __init sys_reg_genericv8_init(void)
 					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
 					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
+					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
 					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,

From c3bd5cf97635e0d04a2b0a7025d4a7512a6e6345 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 3 Jun 2014 13:44:17 +0200
Subject: [PATCH 0616/1185] KVM: add missing cleanup_srcu_struct

Reported-by: hrg <hrgstephen@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 820b3fcdeb80d30410f4427d2cbf9161c35fdeef)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 17b107fa7a2b..3db56912caed 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -607,6 +607,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
 	kvm_free_physmem(kvm);
+	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
 	hardware_disable_all();

From 892f5c6841880062cbe947d08131c2c55613ae94 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 12 May 2014 16:05:13 +0200
Subject: [PATCH 0617/1185] KVM: prepare for KVM_(S|G)ET_MP_STATE on other
 architectures

Highlight the aspects of the ioctls that are actually specific to x86
and ia64. As defined restrictions (irqchip) and mp states may not apply
to other architectures, these parts are flagged to belong to x86 and ia64.

In preparation for the use of KVM_(S|G)ET_MP_STATE by s390.
Fix a spelling error (KVM_SET_MP_STATE vs. KVM_SET_MPSTATE) on the way.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
(cherry picked from commit 0b4820d6d8b6448bc9f7fac1bb1a801a53b425e1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt | 21 ++++++++++++---------
 include/uapi/linux/kvm.h          |  3 ++-
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 48e7888291f5..8d135672b69a 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -969,18 +969,20 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86, ia64]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
-                                 which has not yet received an INIT signal
+                                 which has not yet received an INIT signal [x86,
+                                 ia64]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
-                                 now ready for a SIPI
+                                 now ready for a SIPI [x86, ia64]
  - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
-                                 is waiting for an interrupt
+                                 is waiting for an interrupt [x86, ia64]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
-                                 accessible via KVM_GET_VCPU_EVENTS)
+                                 accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
 
-This ioctl is only useful after KVM_CREATE_IRQCHIP.  Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
 
 
 4.39 KVM_SET_MP_STATE
@@ -994,8 +996,9 @@ Returns: 0 on success; -1 on error
 Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
 arguments.
 
-This ioctl is only useful after KVM_CREATE_IRQCHIP.  Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
 
 
 4.40 KVM_SET_IDENTITY_MAP_ADDR
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0a70874e4a63..521e4c0a08ac 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -399,8 +399,9 @@ struct kvm_vapic_addr {
 	__u64 vapic_addr;
 };
 
-/* for KVM_SET_MPSTATE */
+/* for KVM_SET_MP_STATE */
 
+/* not all states are valid on all architectures */
 #define KVM_MP_STATE_RUNNABLE          0
 #define KVM_MP_STATE_UNINITIALIZED     1
 #define KVM_MP_STATE_INIT_RECEIVED     2

From bdcfc46a6f2760588d3e59e2ff7abf235a2a513a Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 9 May 2014 23:31:31 +0200
Subject: [PATCH 0618/1185] arm/arm64: KVM: Fix and refactor unmap_range

unmap_range() was utterly broken, to quote Marc, and broke in all sorts
of situations.  It was also quite complicated to follow and didn't
follow the usual scheme of having a separate iterating function for each
level of page tables.

Address this by refactoring the code and introduce a pgd_clear()
function.

Reviewed-by: Jungseok Lee <jays.lee@samsung.com>
Reviewed-by: Mario Smarduch <m.smarduch@samsung.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4f853a714bf16338ff5261128e6c7ae2569e9505)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   |  12 +++
 arch/arm/kvm/mmu.c               | 165 ++++++++++++++++---------------
 arch/arm64/include/asm/kvm_mmu.h |  15 +++
 3 files changed, 115 insertions(+), 77 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5c7aa3c1519f..5cc0b0f5f72f 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -127,6 +127,18 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 })
 
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define kvm_pud_table_empty(pudp) (0)
+
+
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16f804938b8f..23360610aeac 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -90,104 +90,115 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
-static bool page_empty(void *ptr)
+static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
 {
-	struct page *ptr_page = virt_to_page(ptr);
-	return page_count(ptr_page) == 1;
+	pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
+	pgd_clear(pgd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pud_free(NULL, pud_table);
+	put_page(virt_to_page(pgd));
 }
 
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
-	if (pud_huge(*pud)) {
-		pud_clear(pud);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	} else {
-		pmd_t *pmd_table = pmd_offset(pud, 0);
-		pud_clear(pud);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-		pmd_free(NULL, pmd_table);
-	}
+	pmd_t *pmd_table = pmd_offset(pud, 0);
+	VM_BUG_ON(pud_huge(*pud));
+	pud_clear(pud);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pmd_free(NULL, pmd_table);
 	put_page(virt_to_page(pud));
 }
 
 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
-	if (kvm_pmd_huge(*pmd)) {
-		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	} else {
-		pte_t *pte_table = pte_offset_kernel(pmd, 0);
-		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-		pte_free_kernel(NULL, pte_table);
-	}
+	pte_t *pte_table = pte_offset_kernel(pmd, 0);
+	VM_BUG_ON(kvm_pmd_huge(*pmd));
+	pmd_clear(pmd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pte_free_kernel(NULL, pte_table);
 	put_page(virt_to_page(pmd));
 }
 
-static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
+static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	if (pte_present(*pte)) {
-		kvm_set_pte(pte, __pte(0));
-		put_page(virt_to_page(pte));
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	}
+	phys_addr_t start_addr = addr;
+	pte_t *pte, *start_pte;
+
+	start_pte = pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			kvm_set_pte(pte, __pte(0));
+			put_page(virt_to_page(pte));
+			kvm_tlb_flush_vmid_ipa(kvm, addr);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	if (kvm_pte_table_empty(start_pte))
+		clear_pmd_entry(kvm, pmd, start_addr);
 }
 
-static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
-			unsigned long long start, u64 size)
+static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long long addr = start, end = start + size;
-	u64 next;
+	phys_addr_t next, start_addr = addr;
+	pmd_t *pmd, *start_pmd;
 
-	while (addr < end) {
-		pgd = pgdp + pgd_index(addr);
-		pud = pud_offset(pgd, addr);
-		pte = NULL;
-		if (pud_none(*pud)) {
-			addr = kvm_pud_addr_end(addr, end);
-			continue;
-		}
-
-		if (pud_huge(*pud)) {
-			/*
-			 * If we are dealing with a huge pud, just clear it and
-			 * move on.
-			 */
-			clear_pud_entry(kvm, pud, addr);
-			addr = kvm_pud_addr_end(addr, end);
-			continue;
-		}
-
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
-			addr = kvm_pmd_addr_end(addr, end);
-			continue;
-		}
-
-		if (!kvm_pmd_huge(*pmd)) {
-			pte = pte_offset_kernel(pmd, addr);
-			clear_pte_entry(kvm, pte, addr);
-			next = addr + PAGE_SIZE;
-		}
-
-		/*
-		 * If the pmd entry is to be cleared, walk back up the ladder
-		 */
-		if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) {
-			clear_pmd_entry(kvm, pmd, addr);
-			next = kvm_pmd_addr_end(addr, end);
-			if (page_empty(pmd) && !page_empty(pud)) {
-				clear_pud_entry(kvm, pud, addr);
-				next = kvm_pud_addr_end(addr, end);
+	start_pmd = pmd = pmd_offset(pud, addr);
+	do {
+		next = kvm_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (kvm_pmd_huge(*pmd)) {
+				pmd_clear(pmd);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				put_page(virt_to_page(pmd));
+			} else {
+				unmap_ptes(kvm, pmd, addr, next);
 			}
 		}
+	} while (pmd++, addr = next, addr != end);
 
-		addr = next;
-	}
+	if (kvm_pmd_table_empty(start_pmd))
+		clear_pud_entry(kvm, pud, start_addr);
+}
+
+static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+		       phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t next, start_addr = addr;
+	pud_t *pud, *start_pud;
+
+	start_pud = pud = pud_offset(pgd, addr);
+	do {
+		next = kvm_pud_addr_end(addr, end);
+		if (!pud_none(*pud)) {
+			if (pud_huge(*pud)) {
+				pud_clear(pud);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				put_page(virt_to_page(pud));
+			} else {
+				unmap_pmds(kvm, pud, addr, next);
+			}
+		}
+	} while (pud++, addr = next, addr != end);
+
+	if (kvm_pud_table_empty(start_pud))
+		clear_pgd_entry(kvm, pgd, start_addr);
+}
+
+
+static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
+			phys_addr_t start, u64 size)
+{
+	pgd_t *pgd;
+	phys_addr_t addr = start, end = start + size;
+	phys_addr_t next;
+
+	pgd = pgdp + pgd_index(addr);
+	do {
+		next = kvm_pgd_addr_end(addr, end);
+		unmap_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
 }
 
 static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7d29847a893b..8e138c7c53ac 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -125,6 +125,21 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 #define kvm_pud_addr_end(addr, end)	pud_addr_end(addr, end)
 #define kvm_pmd_addr_end(addr, end)	pmd_addr_end(addr, end)
 
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#ifndef CONFIG_ARM64_64K_PAGES
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#else
+#define kvm_pmd_table_empty(pmdp) (0)
+#endif
+#define kvm_pud_table_empty(pudp) (0)
+
+
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))

From 332075bc83a401bc1a4deb1caeea1d45bd340866 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Fri, 6 Jun 2014 11:10:23 +0200
Subject: [PATCH 0619/1185] ARM: KVM: Unmap IPA on memslot delete/move

Currently when a KVM region is deleted or moved after
KVM_SET_USER_MEMORY_REGION ioctl, the corresponding
intermediate physical memory is not unmapped.

This patch corrects this and unmaps the region's IPA range
in kvm_arch_commit_memory_region using unmap_stage2_range.

Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit df6ce24f2ee485c4f9a5cb610063a5eb60da8267)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 37 -------------------------------------
 arch/arm/kvm/mmu.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 354dc42b6395..e2c2bfd4da95 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -155,16 +155,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    unsigned long npages)
-{
-	return 0;
-}
 
 /**
  * kvm_arch_destroy_vm - destroy the VM data structure
@@ -225,33 +215,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot,
-				   struct kvm_userspace_memory_region *mem,
-				   enum kvm_mr_change change)
-{
-	return 0;
-}
-
-void kvm_arch_commit_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem,
-				   const struct kvm_memory_slot *old,
-				   enum kvm_mr_change change)
-{
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-}
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 23360610aeac..b2a708be1407 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1111,3 +1111,49 @@ int kvm_mmu_init(void)
 	free_hyp_pgds();
 	return err;
 }
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				   struct kvm_userspace_memory_region *mem,
+				   const struct kvm_memory_slot *old,
+				   enum kvm_mr_change change)
+{
+	gpa_t gpa = old->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = old->npages << PAGE_SHIFT;
+	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+		spin_lock(&kvm->mmu_lock);
+		unmap_stage2_range(kvm, gpa, size);
+		spin_unlock(&kvm->mmu_lock);
+	}
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_userspace_memory_region *mem,
+				   enum kvm_mr_change change)
+{
+	return 0;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+			   struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
+{
+	return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+}

From c2ca19e2dc3f71d003a4ab691462d1df6974d00f Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@linaro.org>
Date: Thu, 26 Jun 2014 01:45:51 +0100
Subject: [PATCH 0620/1185] ARM: KVM: user_mem_abort: support stage 2 MMIO page
 mapping

A userspace process can map device MMIO memory via VFIO or /dev/mem,
e.g., for platform device passthrough support in QEMU.

During early development, we found the PAGE_S2 memory type being used
for MMIO mappings.  This patch corrects that by using the more strongly
ordered memory type for device MMIO mappings: PAGE_S2_DEVICE.

Signed-off-by: Kim Phillips <kim.phillips@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b88657674d39fc2127d62d0de9ca142e166443c8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index b2a708be1407..16e7994bf347 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -759,6 +759,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
 	pfn_t pfn;
+	pgprot_t mem_type = PAGE_S2;
 
 	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -809,6 +810,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (is_error_pfn(pfn))
 		return -EFAULT;
 
+	if (kvm_is_mmio_pfn(pfn))
+		mem_type = PAGE_S2_DEVICE;
+
 	spin_lock(&kvm->mmu_lock);
 	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
@@ -816,7 +820,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
 	if (hugetlb) {
-		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
+		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
 		new_pmd = pmd_mkhuge(new_pmd);
 		if (writable) {
 			kvm_set_s2pmd_writable(&new_pmd);
@@ -825,13 +829,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
-		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
+		pte_t new_pte = pfn_pte(pfn, mem_type);
 		if (writable) {
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
 		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
-		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
+		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
+				     mem_type == PAGE_S2_DEVICE);
 	}
 
 
From 896f51b90443e91d7358914df4a24d9c152c2b48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Tue, 1 Jul 2014 16:53:13 +0100
Subject: [PATCH 0621/1185] arm64: KVM: export demux regids as KVM_REG_ARM64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I suspect this is a -ECUTPASTE fault from the initial implementation. If
we don't declare the register ID to be KVM_REG_ARM64 the KVM_GET_ONE_REG
implementation kvm_arm_get_reg() returns -EINVAL and hilarity ensues.

The kvm/api.txt document describes all arm64 registers as starting with
0x60xx... (i.e KVM_REG_ARM64).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit efd48ceacea78e4d4656aa0a6bf4c5b92ed22130)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c59a1bdab5eb..34f25a590bd7 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -962,7 +962,7 @@ static unsigned int num_demux_regs(void)
 
 static int write_demux_regids(u64 __user *uindices)
 {
-	u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+	u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
 	unsigned int i;
 
 	val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;

From c675c7cef37bad776609e6923fe64019c10e2945 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Fri, 4 Jul 2014 15:54:14 +0100
Subject: [PATCH 0622/1185] arm64: KVM: allow export and import of generic
 timer regs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For correct guest suspend/resume behaviour we need to ensure we include
the generic timer registers for 64 bit guests. As CONFIG_KVM_ARM_TIMER is
always set for arm64 we don't need to worry about null implementations.
However I have re-jigged the kvm_arm_timer_set/get_reg declarations to
be in the common include/kvm/arm_arch_timer.h headers.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1df08ba0aa95f1a8832b7162eec51069bd9be7ae)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h |  3 --
 arch/arm/kvm/guest.c            | 10 -----
 arch/arm64/kvm/guest.c          | 68 ++++++++++++++++++++++++++++++++-
 include/kvm/arm_arch_timer.h    | 14 +++++++
 4 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 193ceaf01bfd..dc4e3edf39cc 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -228,7 +228,4 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
-u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
-int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
-
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index b23a59c1c522..986e625b5dbd 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -124,16 +124,6 @@ static bool is_timer_reg(u64 index)
 	return false;
 }
 
-int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
-{
-	return 0;
-}
-
-u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
-{
-	return 0;
-}
-
 #else
 
 #define NUM_TIMER_REGS 3
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 60b5c31f3c10..8d1ec2887a26 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -135,6 +135,59 @@ static unsigned long num_core_regs(void)
 	return sizeof(struct kvm_regs) / sizeof(__u32);
 }
 
+/**
+ * ARM64 versions of the TIMER registers, always available on arm64
+ */
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+	switch (index) {
+	case KVM_REG_ARM_TIMER_CTL:
+	case KVM_REG_ARM_TIMER_CNT:
+	case KVM_REG_ARM_TIMER_CVAL:
+		return true;
+	}
+	return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int ret;
+
+	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+	if (ret != 0)
+		return ret;
+
+	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+
+	val = kvm_arm_timer_get_reg(vcpu, reg->id);
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
 /**
  * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
  *
@@ -142,7 +195,8 @@ static unsigned long num_core_regs(void)
  */
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 {
-	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu);
+	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
+                + NUM_TIMER_REGS;
 }
 
 /**
@@ -154,6 +208,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
 	unsigned int i;
 	const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+	int ret;
 
 	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
 		if (put_user(core_reg | i, uindices))
@@ -161,6 +216,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 		uindices++;
 	}
 
+	ret = copy_timer_indices(vcpu, uindices);
+	if (ret)
+		return ret;
+	uindices += NUM_TIMER_REGS;
+
 	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
 }
 
@@ -174,6 +234,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return get_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return get_timer_reg(vcpu, reg);
+
 	return kvm_arm_sys_reg_get_reg(vcpu, reg);
 }
 
@@ -187,6 +250,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return set_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return set_timer_reg(vcpu, reg);
+
 	return kvm_arm_sys_reg_set_reg(vcpu, reg);
 }
 
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 6d9aeddc09bf..ad9db6045b2f 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -67,6 +67,10 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
+int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+
 #else
 static inline int kvm_timer_hyp_init(void)
 {
@@ -84,6 +88,16 @@ static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {}
+
+static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+	return 0;
+}
+
+static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+	return 0;
+}
 #endif
 
 #endif

From 7f124577d6247ae088a0acb0762297a5bdaeb9a3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 30 May 2013 10:20:36 +0100
Subject: [PATCH 0623/1185] KVM: arm/arm64: vgic: move GICv2 registers to their
 own structure

In order to make way for the GICv3 registers, move the v2-specific
registers to their own structure.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit eede821dbfd58df89edb072da64e006321eaef58)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kernel/asm-offsets.c   | 14 ++++-----
 arch/arm/kvm/interrupts_head.S  | 26 +++++++--------
 arch/arm64/kernel/asm-offsets.c | 14 ++++-----
 arch/arm64/kvm/hyp.S            | 26 +++++++--------
 include/kvm/arm_vgic.h          | 20 +++++++-----
 virt/kvm/arm/vgic.c             | 56 ++++++++++++++++-----------------
 6 files changed, 81 insertions(+), 75 deletions(-)

diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index aa2acc1dd986..776d9186e9c1 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -176,13 +176,13 @@ int main(void)
   DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
 #ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
-  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
-  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
-  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
-  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
-  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
-  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
 #ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 76af93025574..e4eaf30205c5 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -421,14 +421,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	ldr	r9, [r2, #GICH_ELRSR1]
 	ldr	r10, [r2, #GICH_APR]
 
-	str	r3, [r11, #VGIC_CPU_HCR]
-	str	r4, [r11, #VGIC_CPU_VMCR]
-	str	r5, [r11, #VGIC_CPU_MISR]
-	str	r6, [r11, #VGIC_CPU_EISR]
-	str	r7, [r11, #(VGIC_CPU_EISR + 4)]
-	str	r8, [r11, #VGIC_CPU_ELRSR]
-	str	r9, [r11, #(VGIC_CPU_ELRSR + 4)]
-	str	r10, [r11, #VGIC_CPU_APR]
+	str	r3, [r11, #VGIC_V2_CPU_HCR]
+	str	r4, [r11, #VGIC_V2_CPU_VMCR]
+	str	r5, [r11, #VGIC_V2_CPU_MISR]
+	str	r6, [r11, #VGIC_V2_CPU_EISR]
+	str	r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
+	str	r8, [r11, #VGIC_V2_CPU_ELRSR]
+	str	r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	r10, [r11, #VGIC_V2_CPU_APR]
 
 	/* Clear GICH_HCR */
 	mov	r5, #0
@@ -436,7 +436,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	/* Save list registers */
 	add	r2, r2, #GICH_LR0
-	add	r3, r11, #VGIC_CPU_LR
+	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r2], #4
 	str	r6, [r3], #4
@@ -463,9 +463,9 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r11, vcpu, #VCPU_VGIC_CPU
 
 	/* We only restore a minimal set of registers */
-	ldr	r3, [r11, #VGIC_CPU_HCR]
-	ldr	r4, [r11, #VGIC_CPU_VMCR]
-	ldr	r8, [r11, #VGIC_CPU_APR]
+	ldr	r3, [r11, #VGIC_V2_CPU_HCR]
+	ldr	r4, [r11, #VGIC_V2_CPU_VMCR]
+	ldr	r8, [r11, #VGIC_V2_CPU_APR]
 
 	str	r3, [r2, #GICH_HCR]
 	str	r4, [r2, #GICH_VMCR]
@@ -473,7 +473,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	/* Restore list registers */
 	add	r2, r2, #GICH_LR0
-	add	r3, r11, #VGIC_CPU_LR
+	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r3], #4
 	str	r6, [r2], #4
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 666e231d410b..dcfd8a616a94 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -127,13 +127,13 @@ int main(void)
   DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
   DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
-  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
-  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
-  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
-  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
-  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
-  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
   DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b0d1512acf08..877d82a134bc 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -412,14 +412,14 @@ CPU_BE(	rev	w9,  w9  )
 CPU_BE(	rev	w10, w10 )
 CPU_BE(	rev	w11, w11 )
 
-	str	w4, [x3, #VGIC_CPU_HCR]
-	str	w5, [x3, #VGIC_CPU_VMCR]
-	str	w6, [x3, #VGIC_CPU_MISR]
-	str	w7, [x3, #VGIC_CPU_EISR]
-	str	w8, [x3, #(VGIC_CPU_EISR + 4)]
-	str	w9, [x3, #VGIC_CPU_ELRSR]
-	str	w10, [x3, #(VGIC_CPU_ELRSR + 4)]
-	str	w11, [x3, #VGIC_CPU_APR]
+	str	w4, [x3, #VGIC_V2_CPU_HCR]
+	str	w5, [x3, #VGIC_V2_CPU_VMCR]
+	str	w6, [x3, #VGIC_V2_CPU_MISR]
+	str	w7, [x3, #VGIC_V2_CPU_EISR]
+	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
+	str	w9, [x3, #VGIC_V2_CPU_ELRSR]
+	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	w11, [x3, #VGIC_V2_CPU_APR]
 
 	/* Clear GICH_HCR */
 	str	wzr, [x2, #GICH_HCR]
@@ -427,7 +427,7 @@ CPU_BE(	rev	w11, w11 )
 	/* Save list registers */
 	add	x2, x2, #GICH_LR0
 	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_CPU_LR
+	add	x3, x3, #VGIC_V2_CPU_LR
 1:	ldr	w5, [x2], #4
 CPU_BE(	rev	w5, w5 )
 	str	w5, [x3], #4
@@ -452,9 +452,9 @@ CPU_BE(	rev	w5, w5 )
 	add	x3, x0, #VCPU_VGIC_CPU
 
 	/* We only restore a minimal set of registers */
-	ldr	w4, [x3, #VGIC_CPU_HCR]
-	ldr	w5, [x3, #VGIC_CPU_VMCR]
-	ldr	w6, [x3, #VGIC_CPU_APR]
+	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
+	ldr	w6, [x3, #VGIC_V2_CPU_APR]
 CPU_BE(	rev	w4, w4 )
 CPU_BE(	rev	w5, w5 )
 CPU_BE(	rev	w6, w6 )
@@ -466,7 +466,7 @@ CPU_BE(	rev	w6, w6 )
 	/* Restore list registers */
 	add	x2, x2, #GICH_LR0
 	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_CPU_LR
+	add	x3, x3, #VGIC_V2_CPU_LR
 1:	ldr	w5, [x3], #4
 CPU_BE(	rev	w5, w5 )
 	str	w5, [x2], #4
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f27000f55a83..f738e5a69ee9 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -110,6 +110,16 @@ struct vgic_dist {
 #endif
 };
 
+struct vgic_v2_cpu_if {
+	u32		vgic_hcr;
+	u32		vgic_vmcr;
+	u32		vgic_misr;	/* Saved only */
+	u32		vgic_eisr[2];	/* Saved only */
+	u32		vgic_elrsr[2];	/* Saved only */
+	u32		vgic_apr;
+	u32		vgic_lr[VGIC_MAX_LRS];
+};
+
 struct vgic_cpu {
 #ifdef CONFIG_KVM_ARM_VGIC
 	/* per IRQ to LR mapping */
@@ -126,13 +136,9 @@ struct vgic_cpu {
 	int		nr_lr;
 
 	/* CPU vif control registers for world switch */
-	u32		vgic_hcr;
-	u32		vgic_vmcr;
-	u32		vgic_misr;	/* Saved only */
-	u32		vgic_eisr[2];	/* Saved only */
-	u32		vgic_elrsr[2];	/* Saved only */
-	u32		vgic_apr;
-	u32		vgic_lr[VGIC_MAX_LRS];
+	union {
+		struct vgic_v2_cpu_if	vgic_v2;
+	};
 #endif
 };
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 56ff9bebb577..0ba1ab0721fd 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -601,7 +601,7 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
 static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
 {
 	clear_bit(lr_nr, vgic_cpu->lr_used);
-	vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE;
+	vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE;
 	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
 }
 
@@ -626,7 +626,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 	u32 *lr;
 
 	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		lr = &vgic_cpu->vgic_lr[i];
+		lr = &vgic_cpu->vgic_v2.vgic_lr[i];
 		irq = LR_IRQID(*lr);
 		source_cpu = LR_CPUID(*lr);
 
@@ -1007,7 +1007,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 	int lr;
 
 	for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+		int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
 
 		if (!vgic_irq_is_enabled(vcpu, irq)) {
 			vgic_retire_lr(lr, irq, vgic_cpu);
@@ -1037,11 +1037,11 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
 	/* Do we have an active interrupt for the same CPUID? */
 	if (lr != LR_EMPTY &&
-	    (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) {
+	    (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) {
 		kvm_debug("LR%d piggyback for IRQ%d %x\n",
-			  lr, irq, vgic_cpu->vgic_lr[lr]);
+			  lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]);
 		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-		vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
+		vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT;
 		return true;
 	}
 
@@ -1052,12 +1052,12 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 		return false;
 
 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-	vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
+	vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
 	vgic_cpu->vgic_irq_lr_map[irq] = lr;
 	set_bit(lr, vgic_cpu->lr_used);
 
 	if (!vgic_irq_is_edge(vcpu, irq))
-		vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
+		vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI;
 
 	return true;
 }
@@ -1155,9 +1155,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 epilog:
 	if (overflow) {
-		vgic_cpu->vgic_hcr |= GICH_HCR_UIE;
+		vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE;
 	} else {
-		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
+		vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
 		/*
 		 * We're about to run this VCPU, and we've consumed
 		 * everything the distributor had in store for
@@ -1173,21 +1173,21 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	bool level_pending = false;
 
-	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
+	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr);
 
-	if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
+	if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) {
 		/*
 		 * Some level interrupts have been EOIed. Clear their
 		 * active bit.
 		 */
 		int lr, irq;
 
-		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
+		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr,
 				 vgic_cpu->nr_lr) {
-			irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+			irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
 
 			vgic_irq_clear_active(vcpu, irq);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI;
+			vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI;
 
 			/* Any additional pending interrupt? */
 			if (vgic_dist_irq_is_pending(vcpu, irq)) {
@@ -1201,13 +1201,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 			 * Despite being EOIed, the LR may not have
 			 * been marked as empty.
 			 */
-			set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
+			set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr);
+			vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
 		}
 	}
 
-	if (vgic_cpu->vgic_misr & GICH_MISR_U)
-		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
+	if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U)
+		vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
 
 	return level_pending;
 }
@@ -1226,21 +1226,21 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	level_pending = vgic_process_maintenance(vcpu);
 
 	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
+	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
 			 vgic_cpu->nr_lr) {
 		int irq;
 
 		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
 			continue;
 
-		irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+		irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
 
 		BUG_ON(irq >= VGIC_NR_IRQS);
 		vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
 	}
 
 	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
+	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
 				      vgic_cpu->nr_lr);
 	if (level_pending || pending < vgic_cpu->nr_lr)
 		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
@@ -1436,10 +1436,10 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	 * points to their reset values. Anything else resets to zero
 	 * anyway.
 	 */
-	vgic_cpu->vgic_vmcr = 0;
+	vgic_cpu->vgic_v2.vgic_vmcr = 0;
 
 	vgic_cpu->nr_lr = vgic_nr_lr;
-	vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
+	vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
 
 	return 0;
 }
@@ -1746,15 +1746,15 @@ static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
 	}
 
 	if (!mmio->is_write) {
-		reg = (vgic_cpu->vgic_vmcr & mask) >> shift;
+		reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift;
 		mmio_data_write(mmio, ~0, reg);
 	} else {
 		reg = mmio_data_read(mmio, ~0);
 		reg = (reg << shift) & mask;
-		if (reg != (vgic_cpu->vgic_vmcr & mask))
+		if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask))
 			updated = true;
-		vgic_cpu->vgic_vmcr &= ~mask;
-		vgic_cpu->vgic_vmcr |= reg;
+		vgic_cpu->vgic_v2.vgic_vmcr &= ~mask;
+		vgic_cpu->vgic_v2.vgic_vmcr |= reg;
 	}
 	return updated;
 }

From 9f92df7009267f1a105e1338def7e99da648417c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 3 Jun 2013 15:55:02 +0100
Subject: [PATCH 0624/1185] KVM: ARM: vgic: introduce vgic_ops and LR
 manipulation primitives

In order to split the various register manipulation from the main vgic
code, introduce a vgic_ops structure, and start by abstracting the
LR manipulation code with a couple of accessors.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 8d5c6b06a5d5f8ebcf40558e566781d572920740)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  18 +++++
 virt/kvm/arm/vgic.c    | 162 ++++++++++++++++++++++++++++-------------
 2 files changed, 128 insertions(+), 52 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f738e5a69ee9..17bbe51b79a1 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -68,6 +68,24 @@ struct vgic_bytemap {
 	u32 shared[VGIC_NR_SHARED_IRQS  / 4];
 };
 
+struct kvm_vcpu;
+
+#define LR_STATE_PENDING	(1 << 0)
+#define LR_STATE_ACTIVE		(1 << 1)
+#define LR_STATE_MASK		(3 << 0)
+#define LR_EOI_INT		(1 << 2)
+
+struct vgic_lr {
+	u16	irq;
+	u8	source;
+	u8	state;
+};
+
+struct vgic_ops {
+	struct vgic_lr	(*get_lr)(const struct kvm_vcpu *, int);
+	void	(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
+};
+
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
 	spinlock_t		lock;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 0ba1ab0721fd..11408fee600e 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -94,9 +94,12 @@ static struct device_node *vgic_node;
 #define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
 
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
 static void vgic_kick_vcpus(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 static u32 vgic_nr_lr;
 
 static unsigned int vgic_maint_irq;
@@ -593,18 +596,6 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
 	return false;
 }
 
-#define LR_CPUID(lr)	\
-	(((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
-#define LR_IRQID(lr)	\
-	((lr) & GICH_LR_VIRTUALID)
-
-static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
-{
-	clear_bit(lr_nr, vgic_cpu->lr_used);
-	vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE;
-	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-}
-
 /**
  * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
  * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
@@ -622,13 +613,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	int vcpu_id = vcpu->vcpu_id;
-	int i, irq, source_cpu;
-	u32 *lr;
+	int i;
 
 	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		lr = &vgic_cpu->vgic_v2.vgic_lr[i];
-		irq = LR_IRQID(*lr);
-		source_cpu = LR_CPUID(*lr);
+		struct vgic_lr lr = vgic_get_lr(vcpu, i);
 
 		/*
 		 * There are three options for the state bits:
@@ -640,7 +628,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * If the LR holds only an active interrupt (not pending) then
 		 * just leave it alone.
 		 */
-		if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT)
+		if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
 			continue;
 
 		/*
@@ -649,18 +637,19 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * is fine, then we are only setting a few bits that were
 		 * already set.
 		 */
-		vgic_dist_irq_set(vcpu, irq);
-		if (irq < VGIC_NR_SGIS)
-			dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu;
-		*lr &= ~GICH_LR_PENDING_BIT;
+		vgic_dist_irq_set(vcpu, lr.irq);
+		if (lr.irq < VGIC_NR_SGIS)
+			dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
+		lr.state &= ~LR_STATE_PENDING;
+		vgic_set_lr(vcpu, i, lr);
 
 		/*
 		 * If there's no state left on the LR (it could still be
 		 * active), then the LR does not hold any useful info and can
 		 * be marked as free for other use.
 		 */
-		if (!(*lr & GICH_LR_STATE))
-			vgic_retire_lr(i, irq, vgic_cpu);
+		if (!(lr.state & LR_STATE_MASK))
+			vgic_retire_lr(i, lr.irq, vcpu);
 
 		/* Finally update the VGIC state. */
 		vgic_update_state(vcpu->kvm);
@@ -989,8 +978,69 @@ static void vgic_update_state(struct kvm *kvm)
 	}
 }
 
-#define MK_LR_PEND(src, irq)	\
-	(GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
+static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	struct vgic_lr lr_desc;
+	u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
+
+	lr_desc.irq	= val & GICH_LR_VIRTUALID;
+	if (lr_desc.irq <= 15)
+		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+	else
+		lr_desc.source = 0;
+	lr_desc.state	= 0;
+
+	if (val & GICH_LR_PENDING_BIT)
+		lr_desc.state |= LR_STATE_PENDING;
+	if (val & GICH_LR_ACTIVE_BIT)
+		lr_desc.state |= LR_STATE_ACTIVE;
+	if (val & GICH_LR_EOI)
+		lr_desc.state |= LR_EOI_INT;
+
+	return lr_desc;
+}
+
+static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
+			   struct vgic_lr lr_desc)
+{
+	u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+
+	if (lr_desc.state & LR_STATE_PENDING)
+		lr_val |= GICH_LR_PENDING_BIT;
+	if (lr_desc.state & LR_STATE_ACTIVE)
+		lr_val |= GICH_LR_ACTIVE_BIT;
+	if (lr_desc.state & LR_EOI_INT)
+		lr_val |= GICH_LR_EOI;
+
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
+}
+
+static const struct vgic_ops vgic_ops = {
+	.get_lr			= vgic_v2_get_lr,
+	.set_lr			= vgic_v2_set_lr,
+};
+
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	return vgic_ops.get_lr(vcpu, lr);
+}
+
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
+			       struct vgic_lr vlr)
+{
+	vgic_ops.set_lr(vcpu, lr, vlr);
+}
+
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
+
+	vlr.state = 0;
+	vgic_set_lr(vcpu, lr_nr, vlr);
+	clear_bit(lr_nr, vgic_cpu->lr_used);
+	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+}
 
 /*
  * An interrupt may have been disabled after being made pending on the
@@ -1007,12 +1057,12 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 	int lr;
 
 	for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
+		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
-		if (!vgic_irq_is_enabled(vcpu, irq)) {
-			vgic_retire_lr(lr, irq, vgic_cpu);
-			if (vgic_irq_is_active(vcpu, irq))
-				vgic_irq_clear_active(vcpu, irq);
+		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
+			vgic_retire_lr(lr, vlr.irq, vcpu);
+			if (vgic_irq_is_active(vcpu, vlr.irq))
+				vgic_irq_clear_active(vcpu, vlr.irq);
 		}
 	}
 }
@@ -1024,6 +1074,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_lr vlr;
 	int lr;
 
 	/* Sanitize the input... */
@@ -1036,13 +1087,15 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 	lr = vgic_cpu->vgic_irq_lr_map[irq];
 
 	/* Do we have an active interrupt for the same CPUID? */
-	if (lr != LR_EMPTY &&
-	    (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) {
-		kvm_debug("LR%d piggyback for IRQ%d %x\n",
-			  lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]);
-		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-		vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT;
-		return true;
+	if (lr != LR_EMPTY) {
+		vlr = vgic_get_lr(vcpu, lr);
+		if (vlr.source == sgi_source_id) {
+			kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
+			BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
+			vlr.state |= LR_STATE_PENDING;
+			vgic_set_lr(vcpu, lr, vlr);
+			return true;
+		}
 	}
 
 	/* Try to use another LR for this interrupt */
@@ -1052,12 +1105,16 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 		return false;
 
 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-	vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
 	vgic_cpu->vgic_irq_lr_map[irq] = lr;
 	set_bit(lr, vgic_cpu->lr_used);
 
+	vlr.irq = irq;
+	vlr.source = sgi_source_id;
+	vlr.state = LR_STATE_PENDING;
 	if (!vgic_irq_is_edge(vcpu, irq))
-		vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI;
+		vlr.state |= LR_EOI_INT;
+
+	vgic_set_lr(vcpu, lr, vlr);
 
 	return true;
 }
@@ -1180,21 +1237,23 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		 * Some level interrupts have been EOIed. Clear their
 		 * active bit.
 		 */
-		int lr, irq;
+		int lr;
 
 		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr,
 				 vgic_cpu->nr_lr) {
-			irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
+			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
-			vgic_irq_clear_active(vcpu, irq);
-			vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI;
+			vgic_irq_clear_active(vcpu, vlr.irq);
+			WARN_ON(vlr.state & LR_STATE_MASK);
+			vlr.state = 0;
+			vgic_set_lr(vcpu, lr, vlr);
 
 			/* Any additional pending interrupt? */
-			if (vgic_dist_irq_is_pending(vcpu, irq)) {
-				vgic_cpu_irq_set(vcpu, irq);
+			if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) {
+				vgic_cpu_irq_set(vcpu, vlr.irq);
 				level_pending = true;
 			} else {
-				vgic_cpu_irq_clear(vcpu, irq);
+				vgic_cpu_irq_clear(vcpu, vlr.irq);
 			}
 
 			/*
@@ -1202,7 +1261,6 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 			 * been marked as empty.
 			 */
 			set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr);
-			vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
 		}
 	}
 
@@ -1228,15 +1286,15 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	/* Clear mappings for empty LRs */
 	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
 			 vgic_cpu->nr_lr) {
-		int irq;
+		struct vgic_lr vlr;
 
 		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
 			continue;
 
-		irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
+		vlr = vgic_get_lr(vcpu, lr);
 
-		BUG_ON(irq >= VGIC_NR_IRQS);
-		vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+		BUG_ON(vlr.irq >= VGIC_NR_IRQS);
+		vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
 	}
 
 	/* Check if we still have something up our sleeve... */

From 32d98f25d20daedfa49c32a73e57630bdc0e74a0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Jun 2013 10:29:39 +0100
Subject: [PATCH 0625/1185] KVM: ARM: vgic: abstract access to the ELRSR bitmap

Move the GICH_ELRSR access to its own functions, and add them to
the vgic_ops structure.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 69bb2c9fbc11d9d4358fbb798db15c9092eb4d8c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  2 ++
 virt/kvm/arm/vgic.c    | 46 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 17bbe51b79a1..38864f5e47bc 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -84,6 +84,8 @@ struct vgic_lr {
 struct vgic_ops {
 	struct vgic_lr	(*get_lr)(const struct kvm_vcpu *, int);
 	void	(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
+	void	(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
+	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 11408fee600e..6dcc974fa2b4 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1015,9 +1015,32 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
 }
 
+static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+				  struct vgic_lr lr_desc)
+{
+	if (!(lr_desc.state & LR_STATE_MASK))
+		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
+}
+
+static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+#if BITS_PER_LONG == 64
+	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
+	val <<= 32;
+	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
+#else
+	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
+#endif
+	return val;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
+	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
+	.get_elrsr		= vgic_v2_get_elrsr,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1031,6 +1054,17 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
 	vgic_ops.set_lr(vcpu, lr, vlr);
 }
 
+static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+			       struct vgic_lr vlr)
+{
+	vgic_ops.sync_lr_elrsr(vcpu, lr, vlr);
+}
+
+static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
+{
+	return vgic_ops.get_elrsr(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1260,7 +1294,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 			 * Despite being EOIed, the LR may not have
 			 * been marked as empty.
 			 */
-			set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr);
+			vgic_sync_lr_elrsr(vcpu, lr, vlr);
 		}
 	}
 
@@ -1278,14 +1312,17 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	u64 elrsr;
+	unsigned long *elrsr_ptr;
 	int lr, pending;
 	bool level_pending;
 
 	level_pending = vgic_process_maintenance(vcpu);
+	elrsr = vgic_get_elrsr(vcpu);
+	elrsr_ptr = (unsigned long *)&elrsr;
 
 	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
-			 vgic_cpu->nr_lr) {
+	for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) {
 		struct vgic_lr vlr;
 
 		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
@@ -1298,8 +1335,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	}
 
 	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
-				      vgic_cpu->nr_lr);
+	pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr);
 	if (level_pending || pending < vgic_cpu->nr_lr)
 		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
 }

From b874bb5cb4795de150d1157a0cf10138122cc093 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Jun 2013 10:33:43 +0100
Subject: [PATCH 0626/1185] KVM: ARM: vgic: abstract EISR bitmap access

Move the GICH_EISR access to its own function.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 8d6a0313c125c3c7b208b75695fe6ab00afab4c5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  1 +
 virt/kvm/arm/vgic.c    | 25 +++++++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 38864f5e47bc..ccb9b59818f4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -86,6 +86,7 @@ struct vgic_ops {
 	void	(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
 	void	(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
 	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
+	u64	(*get_eisr)(const struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 6dcc974fa2b4..1e857e6e66b5 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1036,11 +1036,26 @@ static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
 	return val;
 }
 
+static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+#if BITS_PER_LONG == 64
+	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
+	val <<= 32;
+	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
+#else
+	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
+#endif
+	return val;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
 	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
 	.get_elrsr		= vgic_v2_get_elrsr,
+	.get_eisr		= vgic_v2_get_eisr,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1065,6 +1080,11 @@ static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
 	return vgic_ops.get_elrsr(vcpu);
 }
 
+static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
+{
+	return vgic_ops.get_eisr(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1271,10 +1291,11 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		 * Some level interrupts have been EOIed. Clear their
 		 * active bit.
 		 */
+		u64 eisr = vgic_get_eisr(vcpu);
+		unsigned long *eisr_ptr = (unsigned long *)&eisr;
 		int lr;
 
-		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr,
-				 vgic_cpu->nr_lr) {
+		for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 			vgic_irq_clear_active(vcpu, vlr.irq);

From 3f79fb5a583821ca4c97d1b694285d36697e69c9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Jun 2013 11:02:10 +0100
Subject: [PATCH 0627/1185] KVM: ARM: vgic: abstract MISR decoding

Instead of directly dealing with the GICH_MISR bits, move the code to
its own function and use a couple of public flags to represent the
actual state.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 495dd859f304689a7cd5ef413c439cb090dc25e6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  4 ++++
 virt/kvm/arm/vgic.c    | 26 +++++++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ccb9b59818f4..4857508b12e7 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -87,6 +87,7 @@ struct vgic_ops {
 	void	(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
 	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
 	u64	(*get_eisr)(const struct kvm_vcpu *vcpu);
+	u32	(*get_interrupt_status)(const struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
@@ -165,6 +166,9 @@ struct vgic_cpu {
 
 #define LR_EMPTY	0xff
 
+#define INT_STATUS_EOI		(1 << 0)
+#define INT_STATUS_UNDERFLOW	(1 << 1)
+
 struct kvm;
 struct kvm_vcpu;
 struct kvm_run;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 1e857e6e66b5..c0bcc9735424 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1050,12 +1050,26 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
 	return val;
 }
 
+static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+	u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
+	u32 ret = 0;
+
+	if (misr & GICH_MISR_EOI)
+		ret |= INT_STATUS_EOI;
+	if (misr & GICH_MISR_U)
+		ret |= INT_STATUS_UNDERFLOW;
+
+	return ret;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
 	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
 	.get_elrsr		= vgic_v2_get_elrsr,
 	.get_eisr		= vgic_v2_get_eisr,
+	.get_interrupt_status	= vgic_v2_get_interrupt_status,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1085,6 +1099,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
 	return vgic_ops.get_eisr(vcpu);
 }
 
+static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
+{
+	return vgic_ops.get_interrupt_status(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1282,11 +1301,12 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	u32 status = vgic_get_interrupt_status(vcpu);
 	bool level_pending = false;
 
-	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr);
+	kvm_debug("STATUS = %08x\n", status);
 
-	if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) {
+	if (status & INT_STATUS_EOI) {
 		/*
 		 * Some level interrupts have been EOIed. Clear their
 		 * active bit.
@@ -1319,7 +1339,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U)
+	if (status & INT_STATUS_UNDERFLOW)
 		vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
 
 	return level_pending;

From 900f2c6eeaaa0daf752a06d654fed8d98d626f9f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Jun 2013 11:24:17 +0100
Subject: [PATCH 0628/1185] KVM: ARM: vgic: move underflow handling to vgic_ops

Move the code dealing with LR underflow handling to its own functions,
and make them accessible through vgic_ops.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 909d9b5025f149af6cfc304a76ad6218e6622cc0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  2 ++
 virt/kvm/arm/vgic.c    | 28 +++++++++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 4857508b12e7..cdfa5d9567c6 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -88,6 +88,8 @@ struct vgic_ops {
 	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
 	u64	(*get_eisr)(const struct kvm_vcpu *vcpu);
 	u32	(*get_interrupt_status)(const struct kvm_vcpu *vcpu);
+	void	(*enable_underflow)(struct kvm_vcpu *vcpu);
+	void	(*disable_underflow)(struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index c0bcc9735424..6d618e0b08a1 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1063,6 +1063,16 @@ static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+}
+
+static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
@@ -1070,6 +1080,8 @@ static const struct vgic_ops vgic_ops = {
 	.get_elrsr		= vgic_v2_get_elrsr,
 	.get_eisr		= vgic_v2_get_eisr,
 	.get_interrupt_status	= vgic_v2_get_interrupt_status,
+	.enable_underflow	= vgic_v2_enable_underflow,
+	.disable_underflow	= vgic_v2_disable_underflow,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1104,6 +1116,16 @@ static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
 	return vgic_ops.get_interrupt_status(vcpu);
 }
 
+static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vgic_ops.enable_underflow(vcpu);
+}
+
+static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vgic_ops.disable_underflow(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1285,9 +1307,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 epilog:
 	if (overflow) {
-		vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+		vgic_enable_underflow(vcpu);
 	} else {
-		vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+		vgic_disable_underflow(vcpu);
 		/*
 		 * We're about to run this VCPU, and we've consumed
 		 * everything the distributor had in store for
@@ -1340,7 +1362,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 	}
 
 	if (status & INT_STATUS_UNDERFLOW)
-		vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+		vgic_disable_underflow(vcpu);
 
 	return level_pending;
 }

From 32a0e35ba5210fab6b6379971bba24886b69952a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Feb 2014 17:48:10 +0000
Subject: [PATCH 0629/1185] KVM: ARM: vgic: abstract VMCR access

Instead of directly messing with with the GICH_VMCR bits for the CPU
interface save/restore code, add accessors that encode/decode the
entire set of registers exposed by VMCR.

Not the most efficient thing, but given that this code is only used
by the save/restore code, performance is far from being critical.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit beee38b9d0c0ea6cf2a7f35c3108f7d8281d4545)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  9 ++++++
 virt/kvm/arm/vgic.c    | 69 ++++++++++++++++++++++++++++++++----------
 2 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index cdfa5d9567c6..f51580043170 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -81,6 +81,13 @@ struct vgic_lr {
 	u8	state;
 };
 
+struct vgic_vmcr {
+	u32	ctlr;
+	u32	abpr;
+	u32	bpr;
+	u32	pmr;
+};
+
 struct vgic_ops {
 	struct vgic_lr	(*get_lr)(const struct kvm_vcpu *, int);
 	void	(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
@@ -90,6 +97,8 @@ struct vgic_ops {
 	u32	(*get_interrupt_status)(const struct kvm_vcpu *vcpu);
 	void	(*enable_underflow)(struct kvm_vcpu *vcpu);
 	void	(*disable_underflow)(struct kvm_vcpu *vcpu);
+	void	(*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+	void	(*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 6d618e0b08a1..5c706393956d 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -100,8 +100,10 @@ static void vgic_kick_vcpus(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
-static u32 vgic_nr_lr;
+static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 
+static u32 vgic_nr_lr;
 static unsigned int vgic_maint_irq;
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
@@ -1073,6 +1075,28 @@ static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
 }
 
+static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+
+	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
+	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
+	vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
+}
+
+static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr;
+
+	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
+	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
+	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
+
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
@@ -1082,6 +1106,8 @@ static const struct vgic_ops vgic_ops = {
 	.get_interrupt_status	= vgic_v2_get_interrupt_status,
 	.enable_underflow	= vgic_v2_enable_underflow,
 	.disable_underflow	= vgic_v2_disable_underflow,
+	.get_vmcr		= vgic_v2_get_vmcr,
+	.set_vmcr		= vgic_v2_set_vmcr,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1126,6 +1152,16 @@ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
 	vgic_ops.disable_underflow(vcpu);
 }
 
+static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+	vgic_ops.get_vmcr(vcpu, vmcr);
+}
+
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+	vgic_ops.set_vmcr(vcpu, vmcr);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1879,39 +1915,40 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
 				 struct kvm_exit_mmio *mmio, phys_addr_t offset)
 {
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	u32 reg, mask = 0, shift = 0;
 	bool updated = false;
+	struct vgic_vmcr vmcr;
+	u32 *vmcr_field;
+	u32 reg;
+
+	vgic_get_vmcr(vcpu, &vmcr);
 
 	switch (offset & ~0x3) {
 	case GIC_CPU_CTRL:
-		mask = GICH_VMCR_CTRL_MASK;
-		shift = GICH_VMCR_CTRL_SHIFT;
+		vmcr_field = &vmcr.ctlr;
 		break;
 	case GIC_CPU_PRIMASK:
-		mask = GICH_VMCR_PRIMASK_MASK;
-		shift = GICH_VMCR_PRIMASK_SHIFT;
+		vmcr_field = &vmcr.pmr;
 		break;
 	case GIC_CPU_BINPOINT:
-		mask = GICH_VMCR_BINPOINT_MASK;
-		shift = GICH_VMCR_BINPOINT_SHIFT;
+		vmcr_field = &vmcr.bpr;
 		break;
 	case GIC_CPU_ALIAS_BINPOINT:
-		mask = GICH_VMCR_ALIAS_BINPOINT_MASK;
-		shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+		vmcr_field = &vmcr.abpr;
 		break;
+	default:
+		BUG();
 	}
 
 	if (!mmio->is_write) {
-		reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift;
+		reg = *vmcr_field;
 		mmio_data_write(mmio, ~0, reg);
 	} else {
 		reg = mmio_data_read(mmio, ~0);
-		reg = (reg << shift) & mask;
-		if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask))
+		if (reg != *vmcr_field) {
+			*vmcr_field = reg;
+			vgic_set_vmcr(vcpu, &vmcr);
 			updated = true;
-		vgic_cpu->vgic_v2.vgic_vmcr &= ~mask;
-		vgic_cpu->vgic_v2.vgic_vmcr |= reg;
+		}
 	}
 	return updated;
 }

From 6d6775fa71660fd7c58741881d6bbbfd2be68f3a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Jun 2013 11:36:38 +0100
Subject: [PATCH 0630/1185] KVM: ARM: vgic: introduce vgic_enable

Move the code dealing with enabling the VGIC on to vgic_ops.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit da8dafd1777cdd93091207952297d221a88e6479)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  1 +
 virt/kvm/arm/vgic.c    | 29 +++++++++++++++++++++--------
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f51580043170..2228973ea8e4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -99,6 +99,7 @@ struct vgic_ops {
 	void	(*disable_underflow)(struct kvm_vcpu *vcpu);
 	void	(*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 	void	(*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+	void	(*enable)(struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 5c706393956d..70f674bb13a1 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1097,6 +1097,19 @@ static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
 }
 
+static void vgic_v2_enable(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * By forcing VMCR to zero, the GIC will restore the binary
+	 * points to their reset values. Anything else resets to zero
+	 * anyway.
+	 */
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+
+	/* Get the show on the road... */
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
@@ -1108,6 +1121,7 @@ static const struct vgic_ops vgic_ops = {
 	.disable_underflow	= vgic_v2_disable_underflow,
 	.get_vmcr		= vgic_v2_get_vmcr,
 	.set_vmcr		= vgic_v2_set_vmcr,
+	.enable			= vgic_v2_enable,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1162,6 +1176,11 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
 	vgic_ops.set_vmcr(vcpu, vmcr);
 }
 
+static inline void vgic_enable(struct kvm_vcpu *vcpu)
+{
+	vgic_ops.enable(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1624,15 +1643,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
 	}
 
-	/*
-	 * By forcing VMCR to zero, the GIC will restore the binary
-	 * points to their reset values. Anything else resets to zero
-	 * anyway.
-	 */
-	vgic_cpu->vgic_v2.vgic_vmcr = 0;
-
 	vgic_cpu->nr_lr = vgic_nr_lr;
-	vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
+
+	vgic_enable(vcpu);
 
 	return 0;
 }

From 6e707c119719a9261ac7305194c0004d11bb594a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 18 Jun 2013 19:17:28 +0100
Subject: [PATCH 0631/1185] KVM: ARM: introduce vgic_params structure

Move all the data specific to a given GIC implementation into its own
little structure.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit ca85f623e37d096206e092ef037a145a60fa7f85)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 11 +++++++
 virt/kvm/arm/vgic.c    | 70 ++++++++++++++++++++----------------------
 2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 2228973ea8e4..ce2e14226dbf 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -102,6 +102,17 @@ struct vgic_ops {
 	void	(*enable)(struct kvm_vcpu *vcpu);
 };
 
+struct vgic_params {
+	/* Physical address of vgic virtual cpu interface */
+	phys_addr_t	vcpu_base;
+	/* Number of list registers */
+	u32		nr_lr;
+	/* Interrupt number */
+	unsigned int	maint_irq;
+	/* Virtual control interface base address */
+	void __iomem	*vctrl_base;
+};
+
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
 	spinlock_t		lock;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 70f674bb13a1..f3a996d0a100 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -76,14 +76,6 @@
 #define IMPLEMENTER_ARM		0x43b
 #define GICC_ARCH_VERSION_V2	0x2
 
-/* Physical address of vgic virtual cpu interface */
-static phys_addr_t vgic_vcpu_base;
-
-/* Virtual control interface base address */
-static void __iomem *vgic_vctrl_base;
-
-static struct device_node *vgic_node;
-
 #define ACCESS_READ_VALUE	(1 << 0)
 #define ACCESS_READ_RAZ		(0 << 0)
 #define ACCESS_READ_MASK(x)	((x) & (1 << 0))
@@ -103,8 +95,7 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 
-static u32 vgic_nr_lr;
-static unsigned int vgic_maint_irq;
+static struct vgic_params vgic;
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
@@ -1206,7 +1197,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	int lr;
 
-	for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+	for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) {
 		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
@@ -1250,8 +1241,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
 	/* Try to use another LR for this interrupt */
 	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-			       vgic_cpu->nr_lr);
-	if (lr >= vgic_cpu->nr_lr)
+			       vgic.nr_lr);
+	if (lr >= vgic.nr_lr)
 		return false;
 
 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
@@ -1377,7 +1368,6 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	u32 status = vgic_get_interrupt_status(vcpu);
 	bool level_pending = false;
 
@@ -1392,7 +1382,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		unsigned long *eisr_ptr = (unsigned long *)&eisr;
 		int lr;
 
-		for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) {
+		for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 			vgic_irq_clear_active(vcpu, vlr.irq);
@@ -1440,7 +1430,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	elrsr_ptr = (unsigned long *)&elrsr;
 
 	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) {
+	for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) {
 		struct vgic_lr vlr;
 
 		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
@@ -1453,8 +1443,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	}
 
 	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr);
-	if (level_pending || pending < vgic_cpu->nr_lr)
+	pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr);
+	if (level_pending || pending < vgic.nr_lr)
 		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
 }
 
@@ -1643,7 +1633,12 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
 	}
 
-	vgic_cpu->nr_lr = vgic_nr_lr;
+	/*
+	 * Store the number of LRs per vcpu, so we don't have to go
+	 * all the way to the distributor structure to find out. Only
+	 * assembly code should use this one.
+	 */
+	vgic_cpu->nr_lr = vgic.nr_lr;
 
 	vgic_enable(vcpu);
 
@@ -1652,7 +1647,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void vgic_init_maintenance_interrupt(void *info)
 {
-	enable_percpu_irq(vgic_maint_irq, 0);
+	enable_percpu_irq(vgic.maint_irq, 0);
 }
 
 static int vgic_cpu_notify(struct notifier_block *self,
@@ -1665,7 +1660,7 @@ static int vgic_cpu_notify(struct notifier_block *self,
 		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic_maint_irq);
+		disable_percpu_irq(vgic.maint_irq);
 		break;
 	}
 
@@ -1681,6 +1676,7 @@ int kvm_vgic_hyp_init(void)
 	int ret;
 	struct resource vctrl_res;
 	struct resource vcpu_res;
+	struct device_node *vgic_node;
 
 	vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
 	if (!vgic_node) {
@@ -1688,17 +1684,17 @@ int kvm_vgic_hyp_init(void)
 		return -ENODEV;
 	}
 
-	vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
-	if (!vgic_maint_irq) {
+	vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0);
+	if (!vgic.maint_irq) {
 		kvm_err("error getting vgic maintenance irq from DT\n");
 		ret = -ENXIO;
 		goto out;
 	}
 
-	ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
+	ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler,
 				 "vgic", kvm_get_running_vcpus());
 	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
+		kvm_err("Cannot register interrupt %d\n", vgic.maint_irq);
 		goto out;
 	}
 
@@ -1714,18 +1710,18 @@ int kvm_vgic_hyp_init(void)
 		goto out_free_irq;
 	}
 
-	vgic_vctrl_base = of_iomap(vgic_node, 2);
-	if (!vgic_vctrl_base) {
+	vgic.vctrl_base = of_iomap(vgic_node, 2);
+	if (!vgic.vctrl_base) {
 		kvm_err("Cannot ioremap VCTRL\n");
 		ret = -ENOMEM;
 		goto out_free_irq;
 	}
 
-	vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
-	vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
+	vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR);
+	vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1;
 
-	ret = create_hyp_io_mappings(vgic_vctrl_base,
-				     vgic_vctrl_base + resource_size(&vctrl_res),
+	ret = create_hyp_io_mappings(vgic.vctrl_base,
+				     vgic.vctrl_base + resource_size(&vctrl_res),
 				     vctrl_res.start);
 	if (ret) {
 		kvm_err("Cannot map VCTRL into hyp\n");
@@ -1733,7 +1729,7 @@ int kvm_vgic_hyp_init(void)
 	}
 
 	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-		 vctrl_res.start, vgic_maint_irq);
+		 vctrl_res.start, vgic.maint_irq);
 	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
 
 	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
@@ -1741,14 +1737,14 @@ int kvm_vgic_hyp_init(void)
 		ret = -ENXIO;
 		goto out_unmap;
 	}
-	vgic_vcpu_base = vcpu_res.start;
+	vgic.vcpu_base = vcpu_res.start;
 
 	goto out;
 
 out_unmap:
-	iounmap(vgic_vctrl_base);
+	iounmap(vgic.vctrl_base);
 out_free_irq:
-	free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
+	free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus());
 out:
 	of_node_put(vgic_node);
 	return ret;
@@ -1783,7 +1779,7 @@ int kvm_vgic_init(struct kvm *kvm)
 	}
 
 	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
-				    vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
+				    vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE);
 	if (ret) {
 		kvm_err("Unable to remap VGIC CPU to VCPU\n");
 		goto out;
@@ -1829,7 +1825,7 @@ int kvm_vgic_create(struct kvm *kvm)
 	}
 
 	spin_lock_init(&kvm->arch.vgic.lock);
-	kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
+	kvm->arch.vgic.vctrl_base = vgic.vctrl_base;
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 

From ab8bb40339927ab4e5875330461a9a9ffaf42a56 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Feb 2014 18:13:03 +0000
Subject: [PATCH 0632/1185] KVM: ARM: vgic: split GICv2 backend from the main
 vgic code

Brutally hack the innocent vgic code, and move the GICv2 specific code
to its own file, using vgic_ops and vgic_params as a way to pass
information between the two blocks.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 8f186d522c69bb18dd9b93a634da4953228c67d4)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/Makefile   |   1 +
 arch/arm64/kvm/Makefile |   2 +-
 include/kvm/arm_vgic.h  |  11 +-
 virt/kvm/arm/vgic-v2.c  | 248 +++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/vgic.c     | 267 +++++++---------------------------------
 5 files changed, 304 insertions(+), 225 deletions(-)
 create mode 100644 virt/kvm/arm/vgic-v2.c

diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 789bca9e64a7..f7057ed045b6 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -21,4 +21,5 @@ obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 72a9fd583ad3..7e92952d139e 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -19,5 +19,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ce2e14226dbf..d8d52a9ca6a1 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -32,7 +32,8 @@
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
 #define VGIC_NR_SHARED_IRQS	(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
 #define VGIC_MAX_CPUS		KVM_MAX_VCPUS
-#define VGIC_MAX_LRS		(1 << 6)
+
+#define VGIC_V2_MAX_LRS		(1 << 6)
 
 /* Sanity checks... */
 #if (VGIC_MAX_CPUS > 8)
@@ -162,7 +163,7 @@ struct vgic_v2_cpu_if {
 	u32		vgic_eisr[2];	/* Saved only */
 	u32		vgic_elrsr[2];	/* Saved only */
 	u32		vgic_apr;
-	u32		vgic_lr[VGIC_MAX_LRS];
+	u32		vgic_lr[VGIC_V2_MAX_LRS];
 };
 
 struct vgic_cpu {
@@ -175,7 +176,7 @@ struct vgic_cpu {
 	DECLARE_BITMAP(	pending_shared, VGIC_NR_SHARED_IRQS);
 
 	/* Bitmap of used/free list registers */
-	DECLARE_BITMAP(	lr_used, VGIC_MAX_LRS);
+	DECLARE_BITMAP(	lr_used, VGIC_V2_MAX_LRS);
 
 	/* Number of list registers on this CPU */
 	int		nr_lr;
@@ -214,6 +215,10 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
 #define vgic_initialized(k)	((k)->arch.vgic.ready)
 
+int vgic_v2_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params);
+
 #else
 static inline int kvm_vgic_hyp_init(void)
 {
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
new file mode 100644
index 000000000000..940418ebd0d0
--- /dev/null
+++ b/virt/kvm/arm/vgic-v2.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	struct vgic_lr lr_desc;
+	u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
+
+	lr_desc.irq	= val & GICH_LR_VIRTUALID;
+	if (lr_desc.irq <= 15)
+		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+	else
+		lr_desc.source = 0;
+	lr_desc.state	= 0;
+
+	if (val & GICH_LR_PENDING_BIT)
+		lr_desc.state |= LR_STATE_PENDING;
+	if (val & GICH_LR_ACTIVE_BIT)
+		lr_desc.state |= LR_STATE_ACTIVE;
+	if (val & GICH_LR_EOI)
+		lr_desc.state |= LR_EOI_INT;
+
+	return lr_desc;
+}
+
+static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
+			   struct vgic_lr lr_desc)
+{
+	u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+
+	if (lr_desc.state & LR_STATE_PENDING)
+		lr_val |= GICH_LR_PENDING_BIT;
+	if (lr_desc.state & LR_STATE_ACTIVE)
+		lr_val |= GICH_LR_ACTIVE_BIT;
+	if (lr_desc.state & LR_EOI_INT)
+		lr_val |= GICH_LR_EOI;
+
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
+}
+
+static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+				  struct vgic_lr lr_desc)
+{
+	if (!(lr_desc.state & LR_STATE_MASK))
+		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
+}
+
+static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+#if BITS_PER_LONG == 64
+	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
+	val <<= 32;
+	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
+#else
+	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
+#endif
+	return val;
+}
+
+static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+#if BITS_PER_LONG == 64
+	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
+	val <<= 32;
+	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
+#else
+	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
+#endif
+	return val;
+}
+
+static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+	u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
+	u32 ret = 0;
+
+	if (misr & GICH_MISR_EOI)
+		ret |= INT_STATUS_EOI;
+	if (misr & GICH_MISR_U)
+		ret |= INT_STATUS_UNDERFLOW;
+
+	return ret;
+}
+
+static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+}
+
+static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+}
+
+static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+
+	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
+	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
+	vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
+}
+
+static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr;
+
+	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
+	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
+	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
+
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+}
+
+static void vgic_v2_enable(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * By forcing VMCR to zero, the GIC will restore the binary
+	 * points to their reset values. Anything else resets to zero
+	 * anyway.
+	 */
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+
+	/* Get the show on the road... */
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
+}
+
+static const struct vgic_ops vgic_v2_ops = {
+	.get_lr			= vgic_v2_get_lr,
+	.set_lr			= vgic_v2_set_lr,
+	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
+	.get_elrsr		= vgic_v2_get_elrsr,
+	.get_eisr		= vgic_v2_get_eisr,
+	.get_interrupt_status	= vgic_v2_get_interrupt_status,
+	.enable_underflow	= vgic_v2_enable_underflow,
+	.disable_underflow	= vgic_v2_disable_underflow,
+	.get_vmcr		= vgic_v2_get_vmcr,
+	.set_vmcr		= vgic_v2_set_vmcr,
+	.enable			= vgic_v2_enable,
+};
+
+static struct vgic_params vgic_v2_params;
+
+/**
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
+ * @node:	pointer to the DT node
+ * @ops: 	address of a pointer to the GICv2 operations
+ * @params:	address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv2 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v2_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params)
+{
+	int ret;
+	struct resource vctrl_res;
+	struct resource vcpu_res;
+	struct vgic_params *vgic = &vgic_v2_params;
+
+	vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+	if (!vgic->maint_irq) {
+		kvm_err("error getting vgic maintenance irq from DT\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
+	if (ret) {
+		kvm_err("Cannot obtain GICH resource\n");
+		goto out;
+	}
+
+	vgic->vctrl_base = of_iomap(vgic_node, 2);
+	if (!vgic->vctrl_base) {
+		kvm_err("Cannot ioremap GICH\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR);
+	vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
+
+	ret = create_hyp_io_mappings(vgic->vctrl_base,
+				     vgic->vctrl_base + resource_size(&vctrl_res),
+				     vctrl_res.start);
+	if (ret) {
+		kvm_err("Cannot map VCTRL into hyp\n");
+		goto out_unmap;
+	}
+
+	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
+		kvm_err("Cannot obtain GICV resource\n");
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+	vgic->vcpu_base = vcpu_res.start;
+
+	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+		 vctrl_res.start, vgic->maint_irq);
+
+	*ops = &vgic_v2_ops;
+	*params = vgic;
+	goto out;
+
+out_unmap:
+	iounmap(vgic->vctrl_base);
+out:
+	of_node_put(vgic_node);
+	return ret;
+}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index f3a996d0a100..e4b9cbbbee4c 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -95,7 +95,8 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 
-static struct vgic_params vgic;
+static const struct vgic_ops *vgic_ops;
+static const struct vgic_params *vgic;
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
@@ -971,205 +972,61 @@ static void vgic_update_state(struct kvm *kvm)
 	}
 }
 
-static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
-{
-	struct vgic_lr lr_desc;
-	u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
-
-	lr_desc.irq	= val & GICH_LR_VIRTUALID;
-	if (lr_desc.irq <= 15)
-		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
-	else
-		lr_desc.source = 0;
-	lr_desc.state	= 0;
-
-	if (val & GICH_LR_PENDING_BIT)
-		lr_desc.state |= LR_STATE_PENDING;
-	if (val & GICH_LR_ACTIVE_BIT)
-		lr_desc.state |= LR_STATE_ACTIVE;
-	if (val & GICH_LR_EOI)
-		lr_desc.state |= LR_EOI_INT;
-
-	return lr_desc;
-}
-
-static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
-			   struct vgic_lr lr_desc)
-{
-	u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
-
-	if (lr_desc.state & LR_STATE_PENDING)
-		lr_val |= GICH_LR_PENDING_BIT;
-	if (lr_desc.state & LR_STATE_ACTIVE)
-		lr_val |= GICH_LR_ACTIVE_BIT;
-	if (lr_desc.state & LR_EOI_INT)
-		lr_val |= GICH_LR_EOI;
-
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
-}
-
-static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
-				  struct vgic_lr lr_desc)
-{
-	if (!(lr_desc.state & LR_STATE_MASK))
-		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
-}
-
-static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
-{
-	u64 val;
-
-#if BITS_PER_LONG == 64
-	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
-	val <<= 32;
-	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
-#else
-	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
-#endif
-	return val;
-}
-
-static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
-{
-	u64 val;
-
-#if BITS_PER_LONG == 64
-	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
-	val <<= 32;
-	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
-#else
-	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
-#endif
-	return val;
-}
-
-static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
-{
-	u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
-	u32 ret = 0;
-
-	if (misr & GICH_MISR_EOI)
-		ret |= INT_STATUS_EOI;
-	if (misr & GICH_MISR_U)
-		ret |= INT_STATUS_UNDERFLOW;
-
-	return ret;
-}
-
-static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
-}
-
-static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
-}
-
-static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
-{
-	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
-
-	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
-	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
-	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
-	vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
-}
-
-static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
-{
-	u32 vmcr;
-
-	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
-	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
-	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
-	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
-
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
-}
-
-static void vgic_v2_enable(struct kvm_vcpu *vcpu)
-{
-	/*
-	 * By forcing VMCR to zero, the GIC will restore the binary
-	 * points to their reset values. Anything else resets to zero
-	 * anyway.
-	 */
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
-
-	/* Get the show on the road... */
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
-}
-
-static const struct vgic_ops vgic_ops = {
-	.get_lr			= vgic_v2_get_lr,
-	.set_lr			= vgic_v2_set_lr,
-	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
-	.get_elrsr		= vgic_v2_get_elrsr,
-	.get_eisr		= vgic_v2_get_eisr,
-	.get_interrupt_status	= vgic_v2_get_interrupt_status,
-	.enable_underflow	= vgic_v2_enable_underflow,
-	.disable_underflow	= vgic_v2_disable_underflow,
-	.get_vmcr		= vgic_v2_get_vmcr,
-	.set_vmcr		= vgic_v2_set_vmcr,
-	.enable			= vgic_v2_enable,
-};
-
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
 {
-	return vgic_ops.get_lr(vcpu, lr);
+	return vgic_ops->get_lr(vcpu, lr);
 }
 
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
 			       struct vgic_lr vlr)
 {
-	vgic_ops.set_lr(vcpu, lr, vlr);
+	vgic_ops->set_lr(vcpu, lr, vlr);
 }
 
 static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
 			       struct vgic_lr vlr)
 {
-	vgic_ops.sync_lr_elrsr(vcpu, lr, vlr);
+	vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
 }
 
 static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
 {
-	return vgic_ops.get_elrsr(vcpu);
+	return vgic_ops->get_elrsr(vcpu);
 }
 
 static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
 {
-	return vgic_ops.get_eisr(vcpu);
+	return vgic_ops->get_eisr(vcpu);
 }
 
 static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
 {
-	return vgic_ops.get_interrupt_status(vcpu);
+	return vgic_ops->get_interrupt_status(vcpu);
 }
 
 static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
 {
-	vgic_ops.enable_underflow(vcpu);
+	vgic_ops->enable_underflow(vcpu);
 }
 
 static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
 {
-	vgic_ops.disable_underflow(vcpu);
+	vgic_ops->disable_underflow(vcpu);
 }
 
 static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
 {
-	vgic_ops.get_vmcr(vcpu, vmcr);
+	vgic_ops->get_vmcr(vcpu, vmcr);
 }
 
 static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
 {
-	vgic_ops.set_vmcr(vcpu, vmcr);
+	vgic_ops->set_vmcr(vcpu, vmcr);
 }
 
 static inline void vgic_enable(struct kvm_vcpu *vcpu)
 {
-	vgic_ops.enable(vcpu);
+	vgic_ops->enable(vcpu);
 }
 
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
@@ -1197,7 +1054,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	int lr;
 
-	for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) {
+	for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
 		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
@@ -1241,8 +1098,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
 	/* Try to use another LR for this interrupt */
 	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-			       vgic.nr_lr);
-	if (lr >= vgic.nr_lr)
+			       vgic->nr_lr);
+	if (lr >= vgic->nr_lr)
 		return false;
 
 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
@@ -1382,7 +1239,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		unsigned long *eisr_ptr = (unsigned long *)&eisr;
 		int lr;
 
-		for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) {
+		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 			vgic_irq_clear_active(vcpu, vlr.irq);
@@ -1430,7 +1287,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	elrsr_ptr = (unsigned long *)&elrsr;
 
 	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) {
+	for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
 		struct vgic_lr vlr;
 
 		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
@@ -1443,8 +1300,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	}
 
 	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr);
-	if (level_pending || pending < vgic.nr_lr)
+	pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
+	if (level_pending || pending < vgic->nr_lr)
 		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
 }
 
@@ -1638,7 +1495,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	 * all the way to the distributor structure to find out. Only
 	 * assembly code should use this one.
 	 */
-	vgic_cpu->nr_lr = vgic.nr_lr;
+	vgic_cpu->nr_lr = vgic->nr_lr;
 
 	vgic_enable(vcpu);
 
@@ -1647,7 +1504,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void vgic_init_maintenance_interrupt(void *info)
 {
-	enable_percpu_irq(vgic.maint_irq, 0);
+	enable_percpu_irq(vgic->maint_irq, 0);
 }
 
 static int vgic_cpu_notify(struct notifier_block *self,
@@ -1660,7 +1517,7 @@ static int vgic_cpu_notify(struct notifier_block *self,
 		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic.maint_irq);
+		disable_percpu_irq(vgic->maint_irq);
 		break;
 	}
 
@@ -1671,31 +1528,36 @@ static struct notifier_block vgic_cpu_nb = {
 	.notifier_call = vgic_cpu_notify,
 };
 
+static const struct of_device_id vgic_ids[] = {
+	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+	{},
+};
+
 int kvm_vgic_hyp_init(void)
 {
-	int ret;
-	struct resource vctrl_res;
-	struct resource vcpu_res;
+	const struct of_device_id *matched_id;
+	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+			  const struct vgic_params **);
 	struct device_node *vgic_node;
+	int ret;
 
-	vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
+	vgic_node = of_find_matching_node_and_match(NULL,
+						    vgic_ids, &matched_id);
 	if (!vgic_node) {
-		kvm_err("error: no compatible vgic node in DT\n");
+		kvm_err("error: no compatible GIC node found\n");
 		return -ENODEV;
 	}
 
-	vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0);
-	if (!vgic.maint_irq) {
-		kvm_err("error getting vgic maintenance irq from DT\n");
-		ret = -ENXIO;
-		goto out;
-	}
+	vgic_probe = matched_id->data;
+	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+	if (ret)
+		return ret;
 
-	ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler,
+	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
 				 "vgic", kvm_get_running_vcpus());
 	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic.maint_irq);
-		goto out;
+		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+		return ret;
 	}
 
 	ret = __register_cpu_notifier(&vgic_cpu_nb);
@@ -1704,49 +1566,12 @@ int kvm_vgic_hyp_init(void)
 		goto out_free_irq;
 	}
 
-	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
-	if (ret) {
-		kvm_err("Cannot obtain VCTRL resource\n");
-		goto out_free_irq;
-	}
-
-	vgic.vctrl_base = of_iomap(vgic_node, 2);
-	if (!vgic.vctrl_base) {
-		kvm_err("Cannot ioremap VCTRL\n");
-		ret = -ENOMEM;
-		goto out_free_irq;
-	}
-
-	vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR);
-	vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1;
-
-	ret = create_hyp_io_mappings(vgic.vctrl_base,
-				     vgic.vctrl_base + resource_size(&vctrl_res),
-				     vctrl_res.start);
-	if (ret) {
-		kvm_err("Cannot map VCTRL into hyp\n");
-		goto out_unmap;
-	}
-
-	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-		 vctrl_res.start, vgic.maint_irq);
 	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
 
-	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
-		kvm_err("Cannot obtain VCPU resource\n");
-		ret = -ENXIO;
-		goto out_unmap;
-	}
-	vgic.vcpu_base = vcpu_res.start;
+	return 0;
 
-	goto out;
-
-out_unmap:
-	iounmap(vgic.vctrl_base);
 out_free_irq:
-	free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus());
-out:
-	of_node_put(vgic_node);
+	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
 	return ret;
 }
 
@@ -1779,7 +1604,7 @@ int kvm_vgic_init(struct kvm *kvm)
 	}
 
 	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
-				    vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE);
+				    vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
 	if (ret) {
 		kvm_err("Unable to remap VGIC CPU to VCPU\n");
 		goto out;
@@ -1825,7 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm)
 	}
 
 	spin_lock_init(&kvm->arch.vgic.lock);
-	kvm->arch.vgic.vctrl_base = vgic.vctrl_base;
+	kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 

From 79f17b6074e34191952a9249b061012a23f3888a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 15 May 2014 10:03:25 +0100
Subject: [PATCH 0633/1185] KVM: ARM: vgic: revisit implementation of
 irqchip_in_kernel

So far, irqchip_in_kernel() was implemented by testing the value of
vctrl_base, which worked fine with GICv2.

With GICv3, this field is useless, as we're using system registers
instead of a emmory mapped interface. To solve this, add a boolean
flag indicating if the we're using a vgic or not.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f982cf4e9c37b19478c7bc6e0484a43a7e78cf57)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 3 ++-
 virt/kvm/arm/vgic.c    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index d8d52a9ca6a1..f6b9fec6fcac 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -117,6 +117,7 @@ struct vgic_params {
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
 	spinlock_t		lock;
+	bool			in_kernel;
 	bool			ready;
 
 	/* Virtual control interface mapping */
@@ -212,7 +213,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		      struct kvm_exit_mmio *mmio);
 
-#define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
+#define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)	((k)->arch.vgic.ready)
 
 int vgic_v2_probe(struct device_node *vgic_node,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e4b9cbbbee4c..1348e74c4d11 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1650,6 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm)
 	}
 
 	spin_lock_init(&kvm->arch.vgic.lock);
+	kvm->arch.vgic.in_kernel = true;
 	kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;

From cab0beb93ff52f5a47cf9a1a03c9c73d473d28d6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 26 Jun 2013 15:16:40 +0100
Subject: [PATCH 0634/1185] arm64: KVM: remove __kvm_hyp_code_{start,end} from
 hyp.S

We already have __hyp_text_{start,end} to express the boundaries
of the HYP text section, and __kvm_hyp_code_{start,end} are getting
in the way of a more modular world switch code.

Just turn __kvm_hyp_code_{start,end} into #defines mapping the
linker-emited symbols.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 45451914c875bba44903ce4f1445e047b7992bf7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h | 6 ++++--
 arch/arm64/include/asm/virt.h    | 4 ++++
 arch/arm64/kvm/hyp.S             | 6 ------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 9fcd54b1e16d..d0bfc4ba82c0 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -18,6 +18,8 @@
 #ifndef __ARM_KVM_ASM_H__
 #define __ARM_KVM_ASM_H__
 
+#include <asm/virt.h>
+
 /*
  * 0 is reserved as an invalid value.
  * Order *must* be kept in sync with the hyp switch code.
@@ -96,8 +98,8 @@ extern char __kvm_hyp_init_end[];
 
 extern char __kvm_hyp_vector[];
 
-extern char __kvm_hyp_code_start[];
-extern char __kvm_hyp_code_end[];
+#define	__kvm_hyp_code_start	__hyp_text_start
+#define	__kvm_hyp_code_end	__hyp_text_end
 
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 215ad4649dd7..7a5df5252dd7 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -50,6 +50,10 @@ static inline bool is_hyp_mode_mismatched(void)
 	return __boot_cpu_mode[0] != __boot_cpu_mode[1];
 }
 
+/* The section containing the hypervisor text */
+extern char __hyp_text_start[];
+extern char __hyp_text_end[];
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 877d82a134bc..9c5d0acb3654 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -36,9 +36,6 @@
 	.pushsection	.hyp.text, "ax"
 	.align	PAGE_SHIFT
 
-__kvm_hyp_code_start:
-	.globl __kvm_hyp_code_start
-
 .macro save_common_regs
 	// x2: base address for cpu context
 	// x3: tmp register
@@ -880,7 +877,4 @@ ENTRY(__kvm_hyp_vector)
 	ventry	el1_error_invalid		// Error 32-bit EL1
 ENDPROC(__kvm_hyp_vector)
 
-__kvm_hyp_code_end:
-	.globl	__kvm_hyp_code_end
-
 	.popsection

From fd17281069b65b1672718613d8c89cc9a409bfcd Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 21 Jun 2013 11:57:56 +0100
Subject: [PATCH 0635/1185] arm64: KVM: split GICv2 world switch from hyp code

Move the GICv2 world switch code into its own file, and add the
necessary indirection to the arm64 switch code.

Also introduce a new type field to the vgic_params structure.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 1a9b13056dde7e3092304d6041ccc60a913042ea)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   |   5 ++
 arch/arm64/include/asm/kvm_asm.h  |   4 +
 arch/arm64/include/asm/kvm_host.h |  21 +++++
 arch/arm64/kernel/asm-offsets.c   |   3 +
 arch/arm64/kvm/Makefile           |   4 +-
 arch/arm64/kvm/hyp.S              | 104 ++++-------------------
 arch/arm64/kvm/vgic-v2-switch.S   | 133 ++++++++++++++++++++++++++++++
 include/kvm/arm_vgic.h            |   7 +-
 virt/kvm/arm/vgic-v2.c            |   1 +
 virt/kvm/arm/vgic.c               |   3 +
 10 files changed, 195 insertions(+), 90 deletions(-)
 create mode 100644 arch/arm64/kvm/vgic-v2-switch.S

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index dc4e3edf39cc..6dfb404f6c46 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,6 +225,11 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 	return 0;
 }
 
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+	BUG_ON(vgic->type != VGIC_V2);
+}
+
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index d0bfc4ba82c0..6252264341c8 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -105,6 +105,10 @@ extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern char __save_vgic_v2_state[];
+extern char __restore_vgic_v2_state[];
+
 #endif
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 92242ce06309..4c182d0aae70 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -200,4 +200,25 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 		     hyp_stack_ptr, vector_ptr);
 }
 
+struct vgic_sr_vectors {
+	void	*save_vgic;
+	void	*restore_vgic;
+};
+
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+	extern struct vgic_sr_vectors __vgic_sr_vectors;
+
+	switch(vgic->type)
+	{
+	case VGIC_V2:
+		__vgic_sr_vectors.save_vgic	= __save_vgic_v2_state;
+		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v2_state;
+		break;
+
+	default:
+		BUG();
+	}
+}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index dcfd8a616a94..9ff0b2b97392 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -127,6 +127,9 @@ int main(void)
   DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
   DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
+  DEFINE(VGIC_SAVE_FN,		offsetof(struct vgic_sr_vectors, save_vgic));
+  DEFINE(VGIC_RESTORE_FN,	offsetof(struct vgic_sr_vectors, restore_vgic));
+  DEFINE(VGIC_SR_VECTOR_SZ,	sizeof(struct vgic_sr_vectors));
   DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
   DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
   DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 7e92952d139e..daf24dc59e2c 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -19,5 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 9c5d0acb3654..56df9a352a81 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -16,7 +16,6 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/irqchip/arm-gic.h>
 
 #include <asm/assembler.h>
 #include <asm/memory.h>
@@ -376,100 +375,23 @@
 .endm
 
 /*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
+ * Call into the vgic backend for state saving
  */
 .macro save_vgic_state
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* Save all interesting registers */
-	ldr	w4, [x2, #GICH_HCR]
-	ldr	w5, [x2, #GICH_VMCR]
-	ldr	w6, [x2, #GICH_MISR]
-	ldr	w7, [x2, #GICH_EISR0]
-	ldr	w8, [x2, #GICH_EISR1]
-	ldr	w9, [x2, #GICH_ELRSR0]
-	ldr	w10, [x2, #GICH_ELRSR1]
-	ldr	w11, [x2, #GICH_APR]
-CPU_BE(	rev	w4,  w4  )
-CPU_BE(	rev	w5,  w5  )
-CPU_BE(	rev	w6,  w6  )
-CPU_BE(	rev	w7,  w7  )
-CPU_BE(	rev	w8,  w8  )
-CPU_BE(	rev	w9,  w9  )
-CPU_BE(	rev	w10, w10 )
-CPU_BE(	rev	w11, w11 )
-
-	str	w4, [x3, #VGIC_V2_CPU_HCR]
-	str	w5, [x3, #VGIC_V2_CPU_VMCR]
-	str	w6, [x3, #VGIC_V2_CPU_MISR]
-	str	w7, [x3, #VGIC_V2_CPU_EISR]
-	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
-	str	w9, [x3, #VGIC_V2_CPU_ELRSR]
-	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
-	str	w11, [x3, #VGIC_V2_CPU_APR]
-
-	/* Clear GICH_HCR */
-	str	wzr, [x2, #GICH_HCR]
-
-	/* Save list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_V2_CPU_LR
-1:	ldr	w5, [x2], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x3], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
+	adr	x24, __vgic_sr_vectors
+	ldr	x24, [x24, VGIC_SAVE_FN]
+	kern_hyp_va	x24
+	blr	x24
 .endm
 
 /*
- * Restore the VGIC CPU state from memory
- * x0: Register pointing to VCPU struct
+ * Call into the vgic backend for state restoring
  */
 .macro restore_vgic_state
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* We only restore a minimal set of registers */
-	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
-	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
-	ldr	w6, [x3, #VGIC_V2_CPU_APR]
-CPU_BE(	rev	w4, w4 )
-CPU_BE(	rev	w5, w5 )
-CPU_BE(	rev	w6, w6 )
-
-	str	w4, [x2, #GICH_HCR]
-	str	w5, [x2, #GICH_VMCR]
-	str	w6, [x2, #GICH_APR]
-
-	/* Restore list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_V2_CPU_LR
-1:	ldr	w5, [x3], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x2], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
+	adr	x24, __vgic_sr_vectors
+	ldr	x24, [x24, #VGIC_RESTORE_FN]
+	kern_hyp_va	x24
+	blr	x24
 .endm
 
 .macro save_timer_state
@@ -650,6 +572,12 @@ ENTRY(__kvm_flush_vm_context)
 	ret
 ENDPROC(__kvm_flush_vm_context)
 
+	// struct vgic_sr_vectors __vgi_sr_vectors;
+	.align 3
+ENTRY(__vgic_sr_vectors)
+	.skip	VGIC_SR_VECTOR_SZ
+ENDPROC(__vgic_sr_vectors)
+
 __kvm_hyp_panic:
 	// Guess the context by looking at VTTBR:
 	// If zero, then we're already a host.
diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
new file mode 100644
index 000000000000..ae211772f991
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v2-switch.S
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+ENTRY(__save_vgic_v2_state)
+__save_vgic_v2_state:
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* Save all interesting registers */
+	ldr	w4, [x2, #GICH_HCR]
+	ldr	w5, [x2, #GICH_VMCR]
+	ldr	w6, [x2, #GICH_MISR]
+	ldr	w7, [x2, #GICH_EISR0]
+	ldr	w8, [x2, #GICH_EISR1]
+	ldr	w9, [x2, #GICH_ELRSR0]
+	ldr	w10, [x2, #GICH_ELRSR1]
+	ldr	w11, [x2, #GICH_APR]
+CPU_BE(	rev	w4,  w4  )
+CPU_BE(	rev	w5,  w5  )
+CPU_BE(	rev	w6,  w6  )
+CPU_BE(	rev	w7,  w7  )
+CPU_BE(	rev	w8,  w8  )
+CPU_BE(	rev	w9,  w9  )
+CPU_BE(	rev	w10, w10 )
+CPU_BE(	rev	w11, w11 )
+
+	str	w4, [x3, #VGIC_V2_CPU_HCR]
+	str	w5, [x3, #VGIC_V2_CPU_VMCR]
+	str	w6, [x3, #VGIC_V2_CPU_MISR]
+	str	w7, [x3, #VGIC_V2_CPU_EISR]
+	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
+	str	w9, [x3, #VGIC_V2_CPU_ELRSR]
+	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	w11, [x3, #VGIC_V2_CPU_APR]
+
+	/* Clear GICH_HCR */
+	str	wzr, [x2, #GICH_HCR]
+
+	/* Save list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_V2_CPU_LR
+1:	ldr	w5, [x2], #4
+CPU_BE(	rev	w5, w5 )
+	str	w5, [x3], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+	ret
+ENDPROC(__save_vgic_v2_state)
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+ENTRY(__restore_vgic_v2_state)
+__restore_vgic_v2_state:
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* We only restore a minimal set of registers */
+	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
+	ldr	w6, [x3, #VGIC_V2_CPU_APR]
+CPU_BE(	rev	w4, w4 )
+CPU_BE(	rev	w5, w5 )
+CPU_BE(	rev	w6, w6 )
+
+	str	w4, [x2, #GICH_HCR]
+	str	w5, [x2, #GICH_VMCR]
+	str	w6, [x2, #GICH_APR]
+
+	/* Restore list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_V2_CPU_LR
+1:	ldr	w5, [x3], #4
+CPU_BE(	rev	w5, w5 )
+	str	w5, [x2], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+	ret
+ENDPROC(__restore_vgic_v2_state)
+
+	.popsection
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f6b9fec6fcac..65f1121a3beb 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -24,7 +24,6 @@
 #include <linux/irqreturn.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
-#include <linux/irqchip/arm-gic.h>
 
 #define VGIC_NR_IRQS		256
 #define VGIC_NR_SGIS		16
@@ -71,6 +70,10 @@ struct vgic_bytemap {
 
 struct kvm_vcpu;
 
+enum vgic_type {
+	VGIC_V2,		/* Good ol' GICv2 */
+};
+
 #define LR_STATE_PENDING	(1 << 0)
 #define LR_STATE_ACTIVE		(1 << 1)
 #define LR_STATE_MASK		(3 << 0)
@@ -104,6 +107,8 @@ struct vgic_ops {
 };
 
 struct vgic_params {
+	/* vgic type */
+	enum vgic_type	type;
 	/* Physical address of vgic virtual cpu interface */
 	phys_addr_t	vcpu_base;
 	/* Number of list registers */
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 940418ebd0d0..d6c9c142f813 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -236,6 +236,7 @@ int vgic_v2_probe(struct device_node *vgic_node,
 	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
 		 vctrl_res.start, vgic->maint_irq);
 
+	vgic->type = VGIC_V2;
 	*ops = &vgic_v2_ops;
 	*params = vgic;
 	goto out;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 1348e74c4d11..7867b9a1f694 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1568,6 +1568,9 @@ int kvm_vgic_hyp_init(void)
 
 	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
 
+	/* Callback into for arch code for setup */
+	vgic_arch_setup(vgic);
+
 	return 0;
 
 out_free_irq:

From c292a45791af6d60cc7c644809f84810a38e1771 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 9 Aug 2013 18:19:11 +0100
Subject: [PATCH 0636/1185] arm64: KVM: move HCR_EL2.{IMO,FMO} manipulation
 into the vgic switch code

GICv3 requires the IMO and FMO bits to be tightly coupled with some
of the interrupt controller's register switch.

In order to have similar code paths, move the manipulation of these
bits to the GICv2 switch code.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit ac3c3747e2db2f326ffc601651de544cdd33a8e9)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h |  5 +++--
 arch/arm64/kvm/hyp.S             | 17 ++++++++++++-----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3d6903006a8a..cc83520459ed 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -76,9 +76,10 @@
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
 			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
-			 HCR_AMO | HCR_IMO | HCR_FMO | \
-			 HCR_SWIO | HCR_TIDCP | HCR_RW)
+			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+#define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
+
 
 /* Hyp System Control Register (SCTLR_EL2) bits */
 #define SCTLR_EL2_EE	(1 << 25)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 56df9a352a81..5945f3bdea7a 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -335,11 +335,8 @@
 .endm
 
 .macro activate_traps
-	ldr	x2, [x0, #VCPU_IRQ_LINES]
-	ldr	x1, [x0, #VCPU_HCR_EL2]
-	orr	x2, x2, x1
-	msr	hcr_el2, x2
-
+	ldr     x2, [x0, #VCPU_HCR_EL2]
+	msr     hcr_el2, x2
 	ldr	x2, =(CPTR_EL2_TTA)
 	msr	cptr_el2, x2
 
@@ -382,12 +379,22 @@
 	ldr	x24, [x24, VGIC_SAVE_FN]
 	kern_hyp_va	x24
 	blr	x24
+	mrs	x24, hcr_el2
+	mov	x25, #HCR_INT_OVERRIDE
+	neg	x25, x25
+	and	x24, x24, x25
+	msr	hcr_el2, x24
 .endm
 
 /*
  * Call into the vgic backend for state restoring
  */
 .macro restore_vgic_state
+	mrs	x24, hcr_el2
+	ldr	x25, [x0, #VCPU_IRQ_LINES]
+	orr	x24, x24, #HCR_INT_OVERRIDE
+	orr	x24, x24, x25
+	msr	hcr_el2, x24
 	adr	x24, __vgic_sr_vectors
 	ldr	x24, [x24, #VGIC_RESTORE_FN]
 	kern_hyp_va	x24

From 9a35d57d6984382050d493e2fa1140c9a6b78671 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 12 Jul 2013 15:15:23 +0100
Subject: [PATCH 0637/1185] KVM: ARM: vgic: add the GICv3 backend

Introduce the support code for emulating a GICv2 on top of GICv3
hardware.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b2fb1c0d378399e1427a91bb991c094f2ca09a2f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |   2 +
 arch/arm64/kvm/vgic-v3-switch.S  |  29 ++++
 include/kvm/arm_vgic.h           |  28 ++++
 virt/kvm/arm/vgic-v3.c           | 231 +++++++++++++++++++++++++++++++
 4 files changed, 290 insertions(+)
 create mode 100644 arch/arm64/kvm/vgic-v3-switch.S
 create mode 100644 virt/kvm/arm/vgic-v3.c

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 6252264341c8..ed4987bf9ac7 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -106,6 +106,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
+extern u64 __vgic_v3_get_ich_vtr_el2(void);
+
 extern char __save_vgic_v2_state[];
 extern char __restore_vgic_v2_state[];
 
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
new file mode 100644
index 000000000000..9fbf27350c84
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+ENTRY(__vgic_v3_get_ich_vtr_el2)
+	mrs	x0, ICH_VTR_EL2
+	ret
+ENDPROC(__vgic_v3_get_ich_vtr_el2)
+
+	.popsection
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 65f1121a3beb..35b0c121bb65 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -33,6 +33,7 @@
 #define VGIC_MAX_CPUS		KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
+#define VGIC_V3_MAX_LRS		16
 
 /* Sanity checks... */
 #if (VGIC_MAX_CPUS > 8)
@@ -72,6 +73,7 @@ struct kvm_vcpu;
 
 enum vgic_type {
 	VGIC_V2,		/* Good ol' GICv2 */
+	VGIC_V3,		/* New fancy GICv3 */
 };
 
 #define LR_STATE_PENDING	(1 << 0)
@@ -172,6 +174,19 @@ struct vgic_v2_cpu_if {
 	u32		vgic_lr[VGIC_V2_MAX_LRS];
 };
 
+struct vgic_v3_cpu_if {
+#ifdef CONFIG_ARM_GIC_V3
+	u32		vgic_hcr;
+	u32		vgic_vmcr;
+	u32		vgic_misr;	/* Saved only */
+	u32		vgic_eisr;	/* Saved only */
+	u32		vgic_elrsr;	/* Saved only */
+	u32		vgic_ap0r[4];
+	u32		vgic_ap1r[4];
+	u64		vgic_lr[VGIC_V3_MAX_LRS];
+#endif
+};
+
 struct vgic_cpu {
 #ifdef CONFIG_KVM_ARM_VGIC
 	/* per IRQ to LR mapping */
@@ -190,6 +205,7 @@ struct vgic_cpu {
 	/* CPU vif control registers for world switch */
 	union {
 		struct vgic_v2_cpu_if	vgic_v2;
+		struct vgic_v3_cpu_if	vgic_v3;
 	};
 #endif
 };
@@ -224,6 +240,18 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 int vgic_v2_probe(struct device_node *vgic_node,
 		  const struct vgic_ops **ops,
 		  const struct vgic_params **params);
+#ifdef CONFIG_ARM_GIC_V3
+int vgic_v3_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params);
+#else
+static inline int vgic_v3_probe(struct device_node *vgic_node,
+				const struct vgic_ops **ops,
+				const struct vgic_params **params)
+{
+	return -ENODEV;
+}
+#endif
 
 #else
 static inline int kvm_vgic_hyp_init(void)
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
new file mode 100644
index 000000000000..f01d44685720
--- /dev/null
+++ b/virt/kvm/arm/vgic-v3.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID		(0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT	(10)
+#define GICH_LR_PHYSID_CPUID		(7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+/*
+ * LRs are stored in reverse order in memory. make sure we index them
+ * correctly.
+ */
+#define LR_INDEX(lr)			(VGIC_V3_MAX_LRS - 1 - lr)
+
+static u32 ich_vtr_el2;
+
+static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	struct vgic_lr lr_desc;
+	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
+
+	lr_desc.irq	= val & GICH_LR_VIRTUALID;
+	if (lr_desc.irq <= 15)
+		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+	else
+		lr_desc.source = 0;
+	lr_desc.state	= 0;
+
+	if (val & ICH_LR_PENDING_BIT)
+		lr_desc.state |= LR_STATE_PENDING;
+	if (val & ICH_LR_ACTIVE_BIT)
+		lr_desc.state |= LR_STATE_ACTIVE;
+	if (val & ICH_LR_EOI)
+		lr_desc.state |= LR_EOI_INT;
+
+	return lr_desc;
+}
+
+static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
+			   struct vgic_lr lr_desc)
+{
+	u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) |
+		      lr_desc.irq);
+
+	if (lr_desc.state & LR_STATE_PENDING)
+		lr_val |= ICH_LR_PENDING_BIT;
+	if (lr_desc.state & LR_STATE_ACTIVE)
+		lr_val |= ICH_LR_ACTIVE_BIT;
+	if (lr_desc.state & LR_EOI_INT)
+		lr_val |= ICH_LR_EOI;
+
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
+}
+
+static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+				  struct vgic_lr lr_desc)
+{
+	if (!(lr_desc.state & LR_STATE_MASK))
+		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
+}
+
+static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr;
+}
+
+static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
+}
+
+static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+	u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
+	u32 ret = 0;
+
+	if (misr & ICH_MISR_EOI)
+		ret |= INT_STATUS_EOI;
+	if (misr & ICH_MISR_U)
+		ret |= INT_STATUS_UNDERFLOW;
+
+	return ret;
+}
+
+static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
+
+	vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
+	vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
+	vmcrp->bpr  = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
+	vmcrp->pmr  = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+}
+
+static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE;
+}
+
+static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE;
+}
+
+static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr;
+
+	vmcr  = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
+	vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
+	vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
+	vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
+
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
+}
+
+static void vgic_v3_enable(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * By forcing VMCR to zero, the GIC will restore the binary
+	 * points to their reset values. Anything else resets to zero
+	 * anyway.
+	 */
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0;
+
+	/* Get the show on the road... */
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN;
+}
+
+static const struct vgic_ops vgic_v3_ops = {
+	.get_lr			= vgic_v3_get_lr,
+	.set_lr			= vgic_v3_set_lr,
+	.sync_lr_elrsr		= vgic_v3_sync_lr_elrsr,
+	.get_elrsr		= vgic_v3_get_elrsr,
+	.get_eisr		= vgic_v3_get_eisr,
+	.get_interrupt_status	= vgic_v3_get_interrupt_status,
+	.enable_underflow	= vgic_v3_enable_underflow,
+	.disable_underflow	= vgic_v3_disable_underflow,
+	.get_vmcr		= vgic_v3_get_vmcr,
+	.set_vmcr		= vgic_v3_set_vmcr,
+	.enable			= vgic_v3_enable,
+};
+
+static struct vgic_params vgic_v3_params;
+
+/**
+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
+ * @node:	pointer to the DT node
+ * @ops: 	address of a pointer to the GICv3 operations
+ * @params:	address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv3 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v3_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params)
+{
+	int ret = 0;
+	u32 gicv_idx;
+	struct resource vcpu_res;
+	struct vgic_params *vgic = &vgic_v3_params;
+
+	vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+	if (!vgic->maint_irq) {
+		kvm_err("error getting vgic maintenance irq from DT\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+
+	/*
+	 * The ListRegs field is 5 bits, but there is a architectural
+	 * maximum of 16 list registers. Just ignore bit 4...
+	 */
+	vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
+
+	if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
+		gicv_idx = 1;
+
+	gicv_idx += 3; /* Also skip GICD, GICC, GICH */
+	if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
+		kvm_err("Cannot obtain GICV region\n");
+		ret = -ENXIO;
+		goto out;
+	}
+	vgic->vcpu_base = vcpu_res.start;
+	vgic->vctrl_base = NULL;
+	vgic->type = VGIC_V3;
+
+	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+		 vcpu_res.start, vgic->maint_irq);
+
+	*ops = &vgic_v3_ops;
+	*params = vgic;
+
+out:
+	of_node_put(vgic_node);
+	return ret;
+}

From 825ae8cc53eeb522fb8c8bf9095fbee898cc1766 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 9 Jul 2013 10:45:49 +0100
Subject: [PATCH 0638/1185] arm64: KVM: vgic: add GICv3 world switch

Introduce the GICv3 world switch code used to save/restore the
GICv3 context.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 754d37726010d872f1f714a8ce8920acdfa4978c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |   2 +
 arch/arm64/kernel/asm-offsets.c  |   8 ++
 arch/arm64/kvm/vgic-v3-switch.S  | 238 +++++++++++++++++++++++++++++++
 3 files changed, 248 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ed4987bf9ac7..a28c35b337ec 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -110,6 +110,8 @@ extern u64 __vgic_v3_get_ich_vtr_el2(void);
 
 extern char __save_vgic_v2_state[];
 extern char __restore_vgic_v2_state[];
+extern char __save_vgic_v3_state[];
+extern char __restore_vgic_v3_state[];
 
 #endif
 
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 9ff0b2b97392..65ebb2ccde5f 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -137,6 +137,14 @@ int main(void)
   DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
   DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
   DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
+  DEFINE(VGIC_V3_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
+  DEFINE(VGIC_V3_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
+  DEFINE(VGIC_V3_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
+  DEFINE(VGIC_V3_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
+  DEFINE(VGIC_V3_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
+  DEFINE(VGIC_V3_CPU_AP0R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
+  DEFINE(VGIC_V3_CPU_AP1R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
+  DEFINE(VGIC_V3_CPU_LR,	offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
   DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
index 9fbf27350c84..21e68f606a8f 100644
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -18,9 +18,247 @@
 #include <linux/linkage.h>
 #include <linux/irqchip/arm-gic-v3.h>
 
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
 	.text
 	.pushsection	.hyp.text, "ax"
 
+/*
+ * We store LRs in reverse order to let the CPU deal with streaming
+ * access. Use this macro to make it look saner...
+ */
+#define LR_OFFSET(n)	(VGIC_V3_CPU_LR + (15 - n) * 8)
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro	save_vgic_v3_state
+	// Compute the address of struct vgic_cpu
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	// Make sure stores to the GIC via the memory mapped interface
+	// are now visible to the system register interface
+	dsb	st
+
+	// Save all interesting registers
+	mrs	x4, ICH_HCR_EL2
+	mrs	x5, ICH_VMCR_EL2
+	mrs	x6, ICH_MISR_EL2
+	mrs	x7, ICH_EISR_EL2
+	mrs	x8, ICH_ELSR_EL2
+
+	str	w4, [x3, #VGIC_V3_CPU_HCR]
+	str	w5, [x3, #VGIC_V3_CPU_VMCR]
+	str	w6, [x3, #VGIC_V3_CPU_MISR]
+	str	w7, [x3, #VGIC_V3_CPU_EISR]
+	str	w8, [x3, #VGIC_V3_CPU_ELRSR]
+
+	msr	ICH_HCR_EL2, xzr
+
+	mrs	x21, ICH_VTR_EL2
+	mvn	w22, w21
+	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	mrs	x20, ICH_LR15_EL2
+	mrs	x19, ICH_LR14_EL2
+	mrs	x18, ICH_LR13_EL2
+	mrs	x17, ICH_LR12_EL2
+	mrs	x16, ICH_LR11_EL2
+	mrs	x15, ICH_LR10_EL2
+	mrs	x14, ICH_LR9_EL2
+	mrs	x13, ICH_LR8_EL2
+	mrs	x12, ICH_LR7_EL2
+	mrs	x11, ICH_LR6_EL2
+	mrs	x10, ICH_LR5_EL2
+	mrs	x9, ICH_LR4_EL2
+	mrs	x8, ICH_LR3_EL2
+	mrs	x7, ICH_LR2_EL2
+	mrs	x6, ICH_LR1_EL2
+	mrs	x5, ICH_LR0_EL2
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	str	x20, [x3, #LR_OFFSET(15)]
+	str	x19, [x3, #LR_OFFSET(14)]
+	str	x18, [x3, #LR_OFFSET(13)]
+	str	x17, [x3, #LR_OFFSET(12)]
+	str	x16, [x3, #LR_OFFSET(11)]
+	str	x15, [x3, #LR_OFFSET(10)]
+	str	x14, [x3, #LR_OFFSET(9)]
+	str	x13, [x3, #LR_OFFSET(8)]
+	str	x12, [x3, #LR_OFFSET(7)]
+	str	x11, [x3, #LR_OFFSET(6)]
+	str	x10, [x3, #LR_OFFSET(5)]
+	str	x9, [x3, #LR_OFFSET(4)]
+	str	x8, [x3, #LR_OFFSET(3)]
+	str	x7, [x3, #LR_OFFSET(2)]
+	str	x6, [x3, #LR_OFFSET(1)]
+	str	x5, [x3, #LR_OFFSET(0)]
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	mrs	x20, ICH_AP0R3_EL2
+	str	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+	mrs	x19, ICH_AP0R2_EL2
+	str	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+6:	mrs	x18, ICH_AP0R1_EL2
+	str	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+5:	mrs	x17, ICH_AP0R0_EL2
+	str	w17, [x3, #VGIC_V3_CPU_AP0R]
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	mrs	x20, ICH_AP1R3_EL2
+	str	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+	mrs	x19, ICH_AP1R2_EL2
+	str	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+6:	mrs	x18, ICH_AP1R1_EL2
+	str	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+5:	mrs	x17, ICH_AP1R0_EL2
+	str	w17, [x3, #VGIC_V3_CPU_AP1R]
+
+	// Restore SRE_EL1 access and re-enable SRE at EL1.
+	mrs	x5, ICC_SRE_EL2
+	orr	x5, x5, #ICC_SRE_EL2_ENABLE
+	msr	ICC_SRE_EL2, x5
+	isb
+	mov	x5, #1
+	msr	ICC_SRE_EL1, x5
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+.macro	restore_vgic_v3_state
+	// Disable SRE_EL1 access. Necessary, otherwise
+	// ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
+	msr	ICC_SRE_EL1, xzr
+	isb
+
+	// Compute the address of struct vgic_cpu
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	// Restore all interesting registers
+	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
+
+	msr	ICH_HCR_EL2, x4
+	msr	ICH_VMCR_EL2, x5
+
+	mrs	x21, ICH_VTR_EL2
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	ldr	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+	msr	ICH_AP1R3_EL2, x20
+	ldr	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+	msr	ICH_AP1R2_EL2, x19
+6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+	msr	ICH_AP1R1_EL2, x18
+5:	ldr	w17, [x3, #VGIC_V3_CPU_AP1R]
+	msr	ICH_AP1R0_EL2, x17
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	ldr	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+	msr	ICH_AP0R3_EL2, x20
+	ldr	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+	msr	ICH_AP0R2_EL2, x19
+6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+	msr	ICH_AP0R1_EL2, x18
+5:	ldr	w17, [x3, #VGIC_V3_CPU_AP0R]
+	msr	ICH_AP0R0_EL2, x17
+
+	and	w22, w21, #0xf
+	mvn	w22, w21
+	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	ldr	x20, [x3, #LR_OFFSET(15)]
+	ldr	x19, [x3, #LR_OFFSET(14)]
+	ldr	x18, [x3, #LR_OFFSET(13)]
+	ldr	x17, [x3, #LR_OFFSET(12)]
+	ldr	x16, [x3, #LR_OFFSET(11)]
+	ldr	x15, [x3, #LR_OFFSET(10)]
+	ldr	x14, [x3, #LR_OFFSET(9)]
+	ldr	x13, [x3, #LR_OFFSET(8)]
+	ldr	x12, [x3, #LR_OFFSET(7)]
+	ldr	x11, [x3, #LR_OFFSET(6)]
+	ldr	x10, [x3, #LR_OFFSET(5)]
+	ldr	x9, [x3, #LR_OFFSET(4)]
+	ldr	x8, [x3, #LR_OFFSET(3)]
+	ldr	x7, [x3, #LR_OFFSET(2)]
+	ldr	x6, [x3, #LR_OFFSET(1)]
+	ldr	x5, [x3, #LR_OFFSET(0)]
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	msr	ICH_LR15_EL2, x20
+	msr	ICH_LR14_EL2, x19
+	msr	ICH_LR13_EL2, x18
+	msr	ICH_LR12_EL2, x17
+	msr	ICH_LR11_EL2, x16
+	msr	ICH_LR10_EL2, x15
+	msr	ICH_LR9_EL2,  x14
+	msr	ICH_LR8_EL2,  x13
+	msr	ICH_LR7_EL2,  x12
+	msr	ICH_LR6_EL2,  x11
+	msr	ICH_LR5_EL2,  x10
+	msr	ICH_LR4_EL2,   x9
+	msr	ICH_LR3_EL2,   x8
+	msr	ICH_LR2_EL2,   x7
+	msr	ICH_LR1_EL2,   x6
+	msr	ICH_LR0_EL2,   x5
+
+	// Ensure that the above will have reached the
+	// (re)distributors. This ensure the guest will read
+	// the correct values from the memory-mapped interface.
+	isb
+	dsb	sy
+
+	// Prevent the guest from touching the GIC system registers
+	mrs	x5, ICC_SRE_EL2
+	and	x5, x5, #~ICC_SRE_EL2_ENABLE
+	msr	ICC_SRE_EL2, x5
+.endm
+
+ENTRY(__save_vgic_v3_state)
+	save_vgic_v3_state
+	ret
+ENDPROC(__save_vgic_v3_state)
+
+ENTRY(__restore_vgic_v3_state)
+	restore_vgic_v3_state
+	ret
+ENDPROC(__restore_vgic_v3_state)
+
 ENTRY(__vgic_v3_get_ich_vtr_el2)
 	mrs	x0, ICH_VTR_EL2
 	ret

From 0e705de508452a4437225048231f8e7aa5f56d2c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 9 Jul 2013 10:45:49 +0100
Subject: [PATCH 0639/1185] arm64: KVM: vgic: enable GICv2 emulation on top on
 GICv3 hardware

Add the last missing bits that enable GICv2 emulation on top of
GICv3 hardware.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 67b2abfedb7b861bead93400fa315c5c30879d51)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 7 +++++++
 arch/arm64/kvm/Makefile           | 2 ++
 virt/kvm/arm/vgic.c               | 1 +
 3 files changed, 10 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4c182d0aae70..4ae9213aa997 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -216,6 +216,13 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v2_state;
 		break;
 
+#ifdef CONFIG_ARM_GIC_V3
+	case VGIC_V3:
+		__vgic_sr_vectors.save_vgic	= __save_vgic_v3_state;
+		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v3_state;
+		break;
+#endif
+
 	default:
 		BUG();
 	}
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index daf24dc59e2c..32a096174b94 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -22,4 +22,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 7867b9a1f694..795ab482333d 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1530,6 +1530,7 @@ static struct notifier_block vgic_cpu_nb = {
 
 static const struct of_device_id vgic_ids[] = {
 	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
 	{},
 };
 

From ff815e50964d2593064d47c9dbd4a3bb03ea3e18 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:01 -0700
Subject: [PATCH 0640/1185] ARM: KVM: fix vgic V7 assembler code to work in BE
 image

The vgic h/w registers are little endian; when BE asm code
reads/writes from/to them, it needs to do byteswap after/before.
Byteswap code uses ARM_BE8 wrapper to add swap only if
CONFIG_CPU_BIG_ENDIAN is configured.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 64054c25cf7e060cd6780744fefe7ed3990e4f21)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/interrupts_head.S | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index e4eaf30205c5..68d99c69639c 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -1,4 +1,5 @@
 #include <linux/irqchip/arm-gic.h>
+#include <asm/assembler.h>
 
 #define VCPU_USR_REG(_reg_nr)	(VCPU_USR_REGS + (_reg_nr * 4))
 #define VCPU_USR_SP		(VCPU_USR_REG(13))
@@ -420,6 +421,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	ldr	r8, [r2, #GICH_ELRSR0]
 	ldr	r9, [r2, #GICH_ELRSR1]
 	ldr	r10, [r2, #GICH_APR]
+ARM_BE8(rev	r3, r3	)
+ARM_BE8(rev	r4, r4	)
+ARM_BE8(rev	r5, r5	)
+ARM_BE8(rev	r6, r6	)
+ARM_BE8(rev	r7, r7	)
+ARM_BE8(rev	r8, r8	)
+ARM_BE8(rev	r9, r9	)
+ARM_BE8(rev	r10, r10	)
 
 	str	r3, [r11, #VGIC_V2_CPU_HCR]
 	str	r4, [r11, #VGIC_V2_CPU_VMCR]
@@ -439,6 +448,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r2], #4
+ARM_BE8(rev	r6, r6	)
 	str	r6, [r3], #4
 	subs	r4, r4, #1
 	bne	1b
@@ -466,6 +476,9 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	ldr	r3, [r11, #VGIC_V2_CPU_HCR]
 	ldr	r4, [r11, #VGIC_V2_CPU_VMCR]
 	ldr	r8, [r11, #VGIC_V2_CPU_APR]
+ARM_BE8(rev	r3, r3	)
+ARM_BE8(rev	r4, r4	)
+ARM_BE8(rev	r8, r8	)
 
 	str	r3, [r2, #GICH_HCR]
 	str	r4, [r2, #GICH_VMCR]
@@ -476,6 +489,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r3], #4
+ARM_BE8(rev	r6, r6  )
 	str	r6, [r2], #4
 	subs	r4, r4, #1
 	bne	1b

From af39d18ecef850d57705c56cda31da0d855424ea Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:02 -0700
Subject: [PATCH 0641/1185] ARM: KVM: handle 64bit values passed to mrcc or
 from mcrr instructions in BE case

In some cases the mcrr and mrrc instructions in combination with the ldrd
and strd instructions need to deal with 64bit value in memory. The ldrd
and strd instructions already handle endianness within word (register)
boundaries but to get effect of the whole 64bit value represented correctly,
rr_lo_hi macro is introduced and is used to swap registers positions when
the mcrr and mrrc instructions are used. That has the effect of swapping
two words.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 19b0e60a63f758a28329aa40f4270a6c98c2dcb7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h | 18 ++++++++++++++++++
 arch/arm/kvm/init.S            |  4 ++--
 arch/arm/kvm/interrupts.S      |  4 ++--
 arch/arm/kvm/interrupts_head.S |  6 +++---
 4 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 53b3c4a50d5c..3a67bec72d0c 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -61,6 +61,24 @@
 #define ARM_EXCEPTION_FIQ	  6
 #define ARM_EXCEPTION_HVC	  7
 
+/*
+ * The rr_lo_hi macro swaps a pair of registers depending on
+ * current endianness. It is used in conjunction with ldrd and strd
+ * instructions that load/store a 64-bit value from/to memory to/from
+ * a pair of registers which are used with the mrrc and mcrr instructions.
+ * If used with the ldrd/strd instructions, the a1 parameter is the first
+ * source/destination register and the a2 parameter is the second
+ * source/destination register. Note that the ldrd/strd instructions
+ * already swap the bytes within the words correctly according to the
+ * endianness setting, but the order of the registers need to be effectively
+ * swapped when used with the mrrc/mcrr instructions.
+ */
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define rr_lo_hi(a1, a2) a2, a1
+#else
+#define rr_lo_hi(a1, a2) a1, a2
+#endif
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 1b9844d369cc..2cc14dfad049 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -71,7 +71,7 @@ __do_hyp_init:
 	bne	phase2			@ Yes, second stage init
 
 	@ Set the HTTBR to point to the hypervisor PGD pointer passed
-	mcrr	p15, 4, r2, r3, c2
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
 
 	@ Set the HTCR and VTCR to the same shareability and cacheability
 	@ settings as the non-secure TTBCR and with T0SZ == 0.
@@ -137,7 +137,7 @@ phase2:
 	mov	pc, r0
 
 target:	@ We're now in the trampoline code, switch page tables
-	mcrr	p15, 4, r2, r3, c2
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
 	isb
 
 	@ Invalidate the old TLBs
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 0d68d4073068..24d4e65806a7 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -52,7 +52,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 	dsb	ishst
 	add	r0, r0, #KVM_VTTBR
 	ldrd	r2, r3, [r0]
-	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	mcrr	p15, 6, rr_lo_hi(r2, r3), c2	@ Write VTTBR
 	isb
 	mcr     p15, 0, r0, c8, c3, 0	@ TLBIALLIS (rt ignored)
 	dsb	ish
@@ -135,7 +135,7 @@ ENTRY(__kvm_vcpu_run)
 	ldr	r1, [vcpu, #VCPU_KVM]
 	add	r1, r1, #KVM_VTTBR
 	ldrd	r2, r3, [r1]
-	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	mcrr	p15, 6, rr_lo_hi(r2, r3), c2	@ Write VTTBR
 
 	@ We're all done, just restore the GPRs and go to the guest
 	restore_guest_regs
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 68d99c69639c..98c8c5b9a87f 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -520,7 +520,7 @@ ARM_BE8(rev	r6, r6  )
 	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
 	isb
 
-	mrrc	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	mrrc	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
 	ldr	r4, =VCPU_TIMER_CNTV_CVAL
 	add	r5, vcpu, r4
 	strd	r2, r3, [r5]
@@ -560,12 +560,12 @@ ARM_BE8(rev	r6, r6  )
 
 	ldr	r2, [r4, #KVM_TIMER_CNTVOFF]
 	ldr	r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
-	mcrr	p15, 4, r2, r3, c14	@ CNTVOFF
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c14	@ CNTVOFF
 
 	ldr	r4, =VCPU_TIMER_CNTV_CVAL
 	add	r5, vcpu, r4
 	ldrd	r2, r3, [r5]
-	mcrr	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	mcrr	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
 	isb
 
 	ldr	r2, [vcpu, #VCPU_TIMER_CNTV_CTL]

From b76b02ab6245e8641cc4acdc16b0050132bd9065 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:03 -0700
Subject: [PATCH 0642/1185] ARM: KVM: __kvm_vcpu_run function return result fix
 in BE case

The __kvm_vcpu_run function returns a 64-bit result in two registers,
which has to be adjusted for BE case.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 6d7311b520864531c81f0e0237e96146d8057d77)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/interrupts.S | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 24d4e65806a7..01dcb0e752d9 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -199,8 +199,13 @@ after_vfp_restore:
 
 	restore_host_regs
 	clrex				@ Clear exclusive monitor
+#ifndef CONFIG_CPU_ENDIAN_BE8
 	mov	r0, r1			@ Return the return code
 	mov	r1, #0			@ Clear upper bits in return value
+#else
+	@ r1 already has return code
+	mov	r0, #0			@ Clear upper bits in return value
+#endif /* CONFIG_CPU_ENDIAN_BE8 */
 	bx	lr			@ return to IOCTL
 
 /********************************************************************

From df5bce90aca5ced2e4ccee2a9ef913b064a47aea Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:04 -0700
Subject: [PATCH 0643/1185] ARM: KVM: vgic mmio should hold data as LE bytes
 array in BE case

According to recent clarifications of mmio.data array meaning -
the mmio.data array should hold bytes as they would appear in
memory. Vgic is little endian device. And in case of BE image
kernel side that emulates vgic, holds data in BE form. So we
need to byteswap cpu<->le32 vgic registers when we read/write them
from mmio.data[].

Change has no effect in LE case because cpu already runs in le32.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 1c9f04717ca8326e8df759d5dda9cd1b3d968b5b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 795ab482333d..b0edc8c670f8 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -238,12 +238,12 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
 
 static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
 {
-	return *((u32 *)mmio->data) & mask;
+	return le32_to_cpu(*((u32 *)mmio->data)) & mask;
 }
 
 static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
 {
-	*((u32 *)mmio->data) = value & mask;
+	*((u32 *)mmio->data) = cpu_to_le32(value) & mask;
 }
 
 /**

From d6ff09058017a8dfd96fa24502691c83df6566e7 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:05 -0700
Subject: [PATCH 0644/1185] ARM: KVM: MMIO support BE host running LE code

In case of status register E bit is not set (LE mode) and host runs in
BE mode we need byteswap data, so read/write is emulated correctly.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 27f194fd360a96cc64bebb2d69dd5abd67984b8a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 0fa90c962ac8..69b746955fca 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -185,9 +185,16 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
 		default:
 			return be32_to_cpu(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return le16_to_cpu(data & 0xffff);
+		default:
+			return le32_to_cpu(data);
+		}
 	}
-
-	return data;		/* Leave LE untouched */
 }
 
 static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
@@ -203,9 +210,16 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 		default:
 			return cpu_to_be32(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_le16(data & 0xffff);
+		default:
+			return cpu_to_le32(data);
+		}
 	}
-
-	return data;		/* Leave LE untouched */
 }
 
 #endif /* __ARM_KVM_EMULATE_H__ */

From 1f5f8779f88483402671d58f0e307934218e22f6 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:06 -0700
Subject: [PATCH 0645/1185] ARM: KVM: one_reg coproc set and get BE fixes

Fix code that handles KVM_SET_ONE_REG, KVM_GET_ONE_REG ioctls to work in BE
image. Before this fix get/set_one_reg functions worked correctly only in
LE case - reg_from_user was taking 'void *' kernel address that actually could
be target/source memory of either 4 bytes size or 8 bytes size, and code copied
from/to user memory that could hold either 4 bytes register, 8 byte register
or pair of 4 bytes registers.

In order to work in endian agnostic way reg_from_user to reg_to_user functions
should copy register value only to kernel variable with size that matches
register size. In few place where size mismatch existed fix issue on macro
caller side.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 73891f72c414afff6da6f01e7af2ff5a44a8b823)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c | 88 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 79 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index c58a35116f63..37a0fe1bb9bb 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -44,6 +44,31 @@ static u32 cache_levels;
 /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
 #define CSSELR_MAX 12
 
+/*
+ * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some
+ * of cp15 registers can be viewed either as couple of two u32 registers
+ * or one u64 register. Current u64 register encoding is that least
+ * significant u32 word is followed by most significant u32 word.
+ */
+static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
+				       const struct coproc_reg *r,
+				       u64 val)
+{
+	vcpu->arch.cp15[r->reg] = val & 0xffffffff;
+	vcpu->arch.cp15[r->reg + 1] = val >> 32;
+}
+
+static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
+				      const struct coproc_reg *r)
+{
+	u64 val;
+
+	val = vcpu->arch.cp15[r->reg + 1];
+	val = val << 32;
+	val = val | vcpu->arch.cp15[r->reg];
+	return val;
+}
+
 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	kvm_inject_undefined(vcpu);
@@ -682,17 +707,23 @@ static struct coproc_reg invariant_cp15[] = {
 	{ CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
 };
 
+/*
+ * Reads a register value from a userspace address to a kernel
+ * variable. Make sure that register size matches sizeof(*__val).
+ */
 static int reg_from_user(void *val, const void __user *uaddr, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
 }
 
+/*
+ * Writes a register value to a userspace address from a kernel variable.
+ * Make sure that register size matches sizeof(*__val).
+ */
 static int reg_to_user(void __user *uaddr, const void *val, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
@@ -702,6 +733,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
 {
 	struct coproc_params params;
 	const struct coproc_reg *r;
+	int ret;
 
 	if (!index_to_params(id, &params))
 		return -ENOENT;
@@ -710,7 +742,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
 	if (!r)
 		return -ENOENT;
 
-	return reg_to_user(uaddr, &r->val, id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(id) == 4) {
+		u32 val = r->val;
+
+		ret = reg_to_user(uaddr, &val, id);
+	} else if (KVM_REG_SIZE(id) == 8) {
+		ret = reg_to_user(uaddr, &r->val, id);
+	}
+	return ret;
 }
 
 static int set_invariant_cp15(u64 id, void __user *uaddr)
@@ -718,7 +758,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
 	struct coproc_params params;
 	const struct coproc_reg *r;
 	int err;
-	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+	u64 val;
 
 	if (!index_to_params(id, &params))
 		return -ENOENT;
@@ -726,7 +766,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
 	if (!r)
 		return -ENOENT;
 
-	err = reg_from_user(&val, uaddr, id);
+	err = -ENOENT;
+	if (KVM_REG_SIZE(id) == 4) {
+		u32 val32;
+
+		err = reg_from_user(&val32, uaddr, id);
+		if (!err)
+			val = val32;
+	} else if (KVM_REG_SIZE(id) == 8) {
+		err = reg_from_user(&val, uaddr, id);
+	}
 	if (err)
 		return err;
 
@@ -1004,6 +1053,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	const struct coproc_reg *r;
 	void __user *uaddr = (void __user *)(long)reg->addr;
+	int ret;
 
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
 		return demux_c15_get(reg->id, uaddr);
@@ -1015,14 +1065,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (!r)
 		return get_invariant_cp15(reg->id, uaddr);
 
-	/* Note: copies two regs if size is 64 bit. */
-	return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(reg->id) == 8) {
+		u64 val;
+
+		val = vcpu_cp15_reg64_get(vcpu, r);
+		ret = reg_to_user(uaddr, &val, reg->id);
+	} else if (KVM_REG_SIZE(reg->id) == 4) {
+		ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+	}
+
+	return ret;
 }
 
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	const struct coproc_reg *r;
 	void __user *uaddr = (void __user *)(long)reg->addr;
+	int ret;
 
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
 		return demux_c15_set(reg->id, uaddr);
@@ -1034,8 +1094,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (!r)
 		return set_invariant_cp15(reg->id, uaddr);
 
-	/* Note: copies two regs if size is 64 bit */
-	return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(reg->id) == 8) {
+		u64 val;
+
+		ret = reg_from_user(&val, uaddr, reg->id);
+		if (!ret)
+			vcpu_cp15_reg64_set(vcpu, r, val);
+	} else if (KVM_REG_SIZE(reg->id) == 4) {
+		ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+	}
+
+	return ret;
 }
 
 static unsigned int num_demux_regs(void)

From 7c1537fb45e64848a4c6f19b66fec1671d608d1b Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:07 -0700
Subject: [PATCH 0646/1185] ARM: KVM: enable KVM in Kconfig on big-endian
 systems

Previous patches addresses ARMV7 big-endian virtualiztion,
kvm related issues, so enable ARM_VIRT_EXT for big-endian
now.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f5aa462147a209dab40b02f0f70234784b913bf9)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 4be5bb150bdd..466bd299b1a8 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -23,7 +23,7 @@ config KVM
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
-	depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN
+	depends on ARM_VIRT_EXT && ARM_LPAE
 	---help---
 	  Support hosting virtualized guest machines. You will also
 	  need to select one or more of the processor modules below.

From 5d129b5dba663c42acca6a4711cc8732f692a18a Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:08 -0700
Subject: [PATCH 0647/1185] ARM64: KVM: MMIO support BE host running LE code

In case of guest CPU running in LE mode and host runs in
BE mode we need byteswap data, so read/write is emulated correctly.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b30070862edbdb252f9d0d3a1e61b8dc4c68e3d2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index dd8ecfc3f995..fdc3e21abd8d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -213,6 +213,17 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
 		default:
 			return be64_to_cpu(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return le16_to_cpu(data & 0xffff);
+		case 4:
+			return le32_to_cpu(data & 0xffffffff);
+		default:
+			return le64_to_cpu(data);
+		}
 	}
 
 	return data;		/* Leave LE untouched */
@@ -233,6 +244,17 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 		default:
 			return cpu_to_be64(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_le16(data & 0xffff);
+		case 4:
+			return cpu_to_le32(data & 0xffffffff);
+		default:
+			return cpu_to_le64(data);
+		}
 	}
 
 	return data;		/* Leave LE untouched */

From e51ab0ff6436524131e948a9a31258bb859b1ec5 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:09 -0700
Subject: [PATCH 0648/1185] ARM64: KVM: store kvm_vcpu_fault_info est_el2 as
 word

esr_el2 field of struct kvm_vcpu_fault_info has u32 type.
It should be stored as word. Current code works in LE case
because existing puts least significant word of x1 into
esr_el2, and it puts most significant work of x1 into next
field, which accidentally is OK because it is updated again
by next instruction. But existing code breaks in BE case.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit ba083d20d8cfa9e999043cd89c4ebc964ccf8927)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 5945f3bdea7a..7874e022d077 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -762,7 +762,7 @@ el1_trap:
 	mrs	x2, far_el2
 
 2:	mrs	x0, tpidr_el2
-	str	x1, [x0, #VCPU_ESR_EL2]
+	str	w1, [x0, #VCPU_ESR_EL2]
 	str	x2, [x0, #VCPU_FAR_EL2]
 	str	x3, [x0, #VCPU_HPFAR_EL2]
 

From df032d7ed86bcaf850b549698c62eb182c2cb4e7 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:10 -0700
Subject: [PATCH 0649/1185] ARM64: KVM: fix vgic_bitmap_get_reg function for BE
 64bit case

Fix vgic_bitmap_get_reg function to return 'right' word address of
'unsigned long' bitmap value in case of BE 64bit image.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 9662fb4854e1319b4affda47f279c3f210316def)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b0edc8c670f8..ede8f6466c95 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -98,14 +98,34 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 static const struct vgic_ops *vgic_ops;
 static const struct vgic_params *vgic;
 
+/*
+ * struct vgic_bitmap contains unions that provide two views of
+ * the same data. In one case it is an array of registers of
+ * u32's, and in the other case it is a bitmap of unsigned
+ * longs.
+ *
+ * This does not work on 64-bit BE systems, because the bitmap access
+ * will store two consecutive 32-bit words with the higher-addressed
+ * register's bits at the lower index and the lower-addressed register's
+ * bits at the higher index.
+ *
+ * Therefore, swizzle the register index when accessing the 32-bit word
+ * registers to access the right register's value.
+ */
+#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
+#define REG_OFFSET_SWIZZLE	1
+#else
+#define REG_OFFSET_SWIZZLE	0
+#endif
+
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
 {
 	offset >>= 2;
 	if (!offset)
-		return x->percpu[cpuid].reg;
+		return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE);
 	else
-		return x->shared.reg + offset - 1;
+		return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
 }
 
 static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,

From 21c40b1a24a48d76af181a1b18315a4f0e61b37e Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:12 -0700
Subject: [PATCH 0650/1185] ARM64: KVM: set and get of sys registers in BE case

Since size of all sys registers is always 8 bytes. Current
code is actually endian agnostic. Just clean it up a bit.
Removed comment about little endian. Change type of pointer
from 'void *' to 'u64 *' to enforce stronger type checking.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 26c99af1018c35020cfad1d20f02acb224807655)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 34f25a590bd7..f0ceceffa95a 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -776,17 +776,15 @@ static struct sys_reg_desc invariant_sys_regs[] = {
 	  NULL, get_ctr_el0 },
 };
 
-static int reg_from_user(void *val, const void __user *uaddr, u64 id)
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
 }
 
-static int reg_to_user(void __user *uaddr, const void *val, u64 id)
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;

From 5208e0ff0d5097acc955d60d4daa395893a772ce Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Wed, 2 Jul 2014 17:19:30 +0100
Subject: [PATCH 0651/1185] ARM64: KVM: fix big endian issue in access_vm_reg
 for 32bit guest

Fix issue with 32bit guests running on top of BE KVM host.
Indexes of high and low words of 64bit cp15 register are
swapped in case of big endian code, since 64bit cp15 state is
restored or saved with double word write or read instruction.

Define helper macro to access low words of 64bit cp15 register.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f0a3eaff71b8bd5d5acfda1f0cf3eedf49755622)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  6 ++++++
 arch/arm64/kvm/sys_regs.c         | 10 ++++------
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4ae9213aa997..503c70661636 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -140,6 +140,12 @@ struct kvm_vcpu_arch {
 #define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
 #define vcpu_cp15(v,r)		((v)->arch.ctxt.cp15[(r)])
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 1)])
+#else
+#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 0)])
+#endif
+
 struct kvm_vm_stat {
 	u32 remote_tlb_flush;
 };
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f0ceceffa95a..56288f31c12d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -134,13 +134,11 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
 	BUG_ON(!p->is_write);
 
 	val = *vcpu_reg(vcpu, p->Rt);
-	if (!p->is_aarch32) {
+	if (!p->is_aarch32 || !p->is_32bit)
 		vcpu_sys_reg(vcpu, r->reg) = val;
-	} else {
-		vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL;
-		if (!p->is_32bit)
-			vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
-	}
+	else
+		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+
 	return true;
 }
 

From 129fb6b2b94c46e279406736a959cc39a23500c6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 10:21:16 +0100
Subject: [PATCH 0652/1185] arm64: KVM: rename pm_fake handler to trap_raz_wi

pm_fake doesn't quite describe what the handler does (ignoring writes
and returning 0 for reads).

As we're about to use it (a lot) in a different context, rename it
with a (admitedly cryptic) name that make sense for all users.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 7609c1251f9d8bbcd6a05ba22153e50cf4f88cff)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 83 ++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 56288f31c12d..492ba301e10c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -161,18 +161,9 @@ static bool access_sctlr(struct kvm_vcpu *vcpu,
 	return true;
 }
 
-/*
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters.  Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- */
-static bool pm_fake(struct kvm_vcpu *vcpu,
-		    const struct sys_reg_params *p,
-		    const struct sys_reg_desc *r)
+static bool trap_raz_wi(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
 {
 	if (p->is_write)
 		return ignore_write(vcpu, p);
@@ -199,6 +190,17 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 /*
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters.  Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ *
+ * Same goes for the whole debug infrastructure, which probably breaks
+ * some guest functionnality. This should be fixed.
  */
 static const struct sys_reg_desc sys_reg_descs[] = {
 	/* DC ISW */
@@ -258,10 +260,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
 	/* PMINTENSET_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMINTENCLR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
-	  pm_fake },
+	  trap_raz_wi },
 
 	/* MAIR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
@@ -290,43 +292,43 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
 	/* PMCR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCNTENSET_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCNTENCLR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMOVSCLR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMSWINC_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMSELR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCEID0_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCEID1_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCCNTR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMXEVTYPER_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMXEVCNTR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMUSERENR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMOVSSET_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
-	  pm_fake },
+	  trap_raz_wi },
 
 	/* TPIDR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
@@ -372,19 +374,20 @@ static const struct sys_reg_desc cp15_regs[] = {
 	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
 	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
 
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
+	/* PMU */
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
 
 	{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
 	{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },

From efacbc423cf057a9051ca0fd240e68f5596700e7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 7 May 2014 12:13:14 +0100
Subject: [PATCH 0653/1185] arm64: move DBG_MDSCR_* to asm/debug-monitors.h

In order to be able to use the DBG_MDSCR_* macros from the KVM code,
move the relevant definitions to the obvious include file.

Also move the debug_el enum to a portion of the file that is guarded
by #ifndef __ASSEMBLY__ in order to use that file from assembly code.

Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 51ba248164d0eeb8b4f94d405430c18a56c6ac9a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/debug-monitors.h | 19 ++++++++++++++-----
 arch/arm64/kernel/debug-monitors.c      |  9 ---------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7c951a510b54..aab72ce22348 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -18,6 +18,15 @@
 
 #ifdef __KERNEL__
 
+/* Low-level stepping controls. */
+#define DBG_MDSCR_SS		(1 << 0)
+#define DBG_SPSR_SS		(1 << 21)
+
+/* MDSCR_EL1 enabling bits */
+#define DBG_MDSCR_KDE		(1 << 13)
+#define DBG_MDSCR_MDE		(1 << 15)
+#define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
+
 #define	DBG_ESR_EVT(x)		(((x) >> 27) & 0x7)
 
 /* AArch64 */
@@ -73,11 +82,6 @@
 
 #define CACHE_FLUSH_IS_SAFE		1
 
-enum debug_el {
-	DBG_ACTIVE_EL0 = 0,
-	DBG_ACTIVE_EL1,
-};
-
 /* AArch32 */
 #define DBG_ESR_EVT_BKPT	0x4
 #define DBG_ESR_EVT_VECC	0x5
@@ -115,6 +119,11 @@ void unregister_break_hook(struct break_hook *hook);
 
 u8 debug_monitors_arch(void);
 
+enum debug_el {
+	DBG_ACTIVE_EL0 = 0,
+	DBG_ACTIVE_EL1,
+};
+
 void enable_debug_monitors(enum debug_el el);
 void disable_debug_monitors(enum debug_el el);
 
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 553a120fc838..fea84694fce4 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -31,15 +31,6 @@
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
 
-/* Low-level stepping controls. */
-#define DBG_MDSCR_SS		(1 << 0)
-#define DBG_SPSR_SS		(1 << 21)
-
-/* MDSCR_EL1 enabling bits */
-#define DBG_MDSCR_KDE		(1 << 13)
-#define DBG_MDSCR_MDE		(1 << 15)
-#define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
-
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
 {

From ad4686e6b3c82d7cf3f79853cc12f655ce444668 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 10:24:46 +0100
Subject: [PATCH 0654/1185] arm64: KVM: add trap handlers for AArch64 debug
 registers

Add handlers for all the AArch64 debug registers that are accessible
from EL0 or EL1. The trapping code keeps track of the state of the
debug registers, allowing for the switch code to implement a lazy
switching strategy.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 0c557ed4983b7abe152212b5b1726c2a789b2c61)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h  |  28 ++++--
 arch/arm64/include/asm/kvm_host.h |   3 +
 arch/arm64/kvm/sys_regs.c         | 150 +++++++++++++++++++++++++++++-
 3 files changed, 172 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index a28c35b337ec..660f75c48bbb 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -45,14 +45,25 @@
 #define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
 #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
 #define	PAR_EL1		21	/* Physical Address Register */
+#define MDSCR_EL1	22	/* Monitor Debug System Control Register */
+#define DBGBCR0_EL1	23	/* Debug Breakpoint Control Registers (0-15) */
+#define DBGBCR15_EL1	38
+#define DBGBVR0_EL1	39	/* Debug Breakpoint Value Registers (0-15) */
+#define DBGBVR15_EL1	54
+#define DBGWCR0_EL1	55	/* Debug Watchpoint Control Registers (0-15) */
+#define DBGWCR15_EL1	70
+#define DBGWVR0_EL1	71	/* Debug Watchpoint Value Registers (0-15) */
+#define DBGWVR15_EL1	86
+#define MDCCINT_EL1	87	/* Monitor Debug Comms Channel Interrupt Enable Reg */
+
 /* 32bit specific registers. Keep them at the end of the range */
-#define	DACR32_EL2	22	/* Domain Access Control Register */
-#define	IFSR32_EL2	23	/* Instruction Fault Status Register */
-#define	FPEXC32_EL2	24	/* Floating-Point Exception Control Register */
-#define	DBGVCR32_EL2	25	/* Debug Vector Catch Register */
-#define	TEECR32_EL1	26	/* ThumbEE Configuration Register */
-#define	TEEHBR32_EL1	27	/* ThumbEE Handler Base Register */
-#define	NR_SYS_REGS	28
+#define	DACR32_EL2	88	/* Domain Access Control Register */
+#define	IFSR32_EL2	89	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	90	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	91	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	92	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	93	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	94
 
 /* 32bit mapping */
 #define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
@@ -89,6 +100,9 @@
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
 
+#define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
+#define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 503c70661636..8e410f761918 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -101,6 +101,9 @@ struct kvm_vcpu_arch {
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
+	/* Debug state */
+	u64 debug_flags;
+
 	/* Pointer to host CPU context */
 	kvm_cpu_context_t *host_cpu_context;
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 492ba301e10c..d53ce430b178 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -30,6 +30,7 @@
 #include <asm/kvm_mmu.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
+#include <asm/debug-monitors.h>
 #include <trace/events/kvm.h>
 
 #include "sys_regs.h"
@@ -171,6 +172,73 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
 		return read_zero(vcpu, p);
 }
 
+static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = (1 << 3);
+		return true;
+	}
+}
+
+static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
+				   const struct sys_reg_params *p,
+				   const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		u32 val;
+		asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
+		*vcpu_reg(vcpu, p->Rt) = val;
+		return true;
+	}
+}
+
+/*
+ * We want to avoid world-switching all the DBG registers all the
+ * time:
+ * 
+ * - If we've touched any debug register, it is likely that we're
+ *   going to touch more of them. It then makes sense to disable the
+ *   traps and start doing the save/restore dance
+ * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
+ *   then mandatory to save/restore the registers, as the guest
+ *   depends on them.
+ * 
+ * For this, we use a DIRTY bit, indicating the guest has modified the
+ * debug registers, used as follow:
+ *
+ * On guest entry:
+ * - If the dirty bit is set (because we're coming back from trapping),
+ *   disable the traps, save host registers, restore guest registers.
+ * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
+ *   set the dirty bit, disable the traps, save host registers,
+ *   restore guest registers.
+ * - Otherwise, enable the traps
+ *
+ * On guest exit:
+ * - If the dirty bit is set, save guest registers, restore host
+ *   registers and clear the dirty bit. This ensure that the host can
+ *   now use the debug registers.
+ */
+static bool trap_debug_regs(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_params *p,
+			    const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+	}
+
+	return true;
+}
+
 static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
 	u64 amair;
@@ -187,6 +255,21 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
 }
 
+/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
+#define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
+	/* DBGBVRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100),	\
+	  trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 },		\
+	/* DBGBCRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101),	\
+	  trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 },		\
+	/* DBGWVRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110),	\
+	  trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 },		\
+	/* DBGWCRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111),	\
+	  trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
+
 /*
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -199,8 +282,12 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
  * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
  * all PM registers, which doesn't crash the guest kernel at least.
  *
- * Same goes for the whole debug infrastructure, which probably breaks
- * some guest functionnality. This should be fixed.
+ * Debug handling: We do trap most, if not all debug related system
+ * registers. The implementation is good enough to ensure that a guest
+ * can use these with minimal performance degradation. The drawback is
+ * that we don't implement any of the external debug, none of the
+ * OSlock protocol. This should be revisited if we ever encounter a
+ * more demanding guest...
  */
 static const struct sys_reg_desc sys_reg_descs[] = {
 	/* DC ISW */
@@ -213,12 +300,71 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
 	  access_dcsw },
 
+	DBG_BCR_BVR_WCR_WVR_EL1(0),
+	DBG_BCR_BVR_WCR_WVR_EL1(1),
+	/* MDCCINT_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+	  trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
+	/* MDSCR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+	  trap_debug_regs, reset_val, MDSCR_EL1, 0 },
+	DBG_BCR_BVR_WCR_WVR_EL1(2),
+	DBG_BCR_BVR_WCR_WVR_EL1(3),
+	DBG_BCR_BVR_WCR_WVR_EL1(4),
+	DBG_BCR_BVR_WCR_WVR_EL1(5),
+	DBG_BCR_BVR_WCR_WVR_EL1(6),
+	DBG_BCR_BVR_WCR_WVR_EL1(7),
+	DBG_BCR_BVR_WCR_WVR_EL1(8),
+	DBG_BCR_BVR_WCR_WVR_EL1(9),
+	DBG_BCR_BVR_WCR_WVR_EL1(10),
+	DBG_BCR_BVR_WCR_WVR_EL1(11),
+	DBG_BCR_BVR_WCR_WVR_EL1(12),
+	DBG_BCR_BVR_WCR_WVR_EL1(13),
+	DBG_BCR_BVR_WCR_WVR_EL1(14),
+	DBG_BCR_BVR_WCR_WVR_EL1(15),
+
+	/* MDRAR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  trap_raz_wi },
+	/* OSLAR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
+	  trap_raz_wi },
+	/* OSLSR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
+	  trap_oslsr_el1 },
+	/* OSDLR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
+	  trap_raz_wi },
+	/* DBGPRCR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
+	  trap_raz_wi },
+	/* DBGCLAIMSET_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
+	  trap_raz_wi },
+	/* DBGCLAIMCLR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
+	  trap_raz_wi },
+	/* DBGAUTHSTATUS_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
+	  trap_dbgauthstatus_el1 },
+
 	/* TEECR32_EL1 */
 	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
 	  NULL, reset_val, TEECR32_EL1, 0 },
 	/* TEEHBR32_EL1 */
 	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
 	  NULL, reset_val, TEEHBR32_EL1, 0 },
+
+	/* MDCCSR_EL1 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
+	  trap_raz_wi },
+	/* DBGDTR_EL0 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
+	  trap_raz_wi },
+	/* DBGDTR[TR]X_EL0 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
+	  trap_raz_wi },
+
 	/* DBGVCR32_EL2 */
 	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
 	  NULL, reset_val, DBGVCR32_EL2, 0 },

From d62af886035cc34e73e82e06179951f6c4e72c6d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 10:27:13 +0100
Subject: [PATCH 0655/1185] arm64: KVM: common infrastructure for handling
 AArch32 CP14/CP15

As we're about to trap a bunch of CP14 registers, let's rework
the CP15 handling so it can be generalized and work with multiple
tables.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 72564016aae45f42e488f926bc803f9a2e1c771c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h    |   2 +-
 arch/arm64/include/asm/kvm_coproc.h |   3 +-
 arch/arm64/include/asm/kvm_host.h   |  13 ++-
 arch/arm64/kvm/handle_exit.c        |   4 +-
 arch/arm64/kvm/sys_regs.c           | 133 +++++++++++++++++++++++-----
 5 files changed, 124 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 660f75c48bbb..69027ded5006 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -95,7 +95,7 @@
 #define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
 #define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
 #define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-#define NR_CP15_REGS	(NR_SYS_REGS * 2)
+#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
 
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
index 9a59301cd014..0b52377a6c11 100644
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -39,7 +39,8 @@ void kvm_register_target_sys_reg_table(unsigned int target,
 				       struct kvm_sys_reg_target_table *table);
 
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8e410f761918..79812be4f25f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -86,7 +86,7 @@ struct kvm_cpu_context {
 	struct kvm_regs	gp_regs;
 	union {
 		u64 sys_regs[NR_SYS_REGS];
-		u32 cp15[NR_CP15_REGS];
+		u32 copro[NR_COPRO_REGS];
 	};
 };
 
@@ -141,12 +141,17 @@ struct kvm_vcpu_arch {
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
 #define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
-#define vcpu_cp15(v,r)		((v)->arch.ctxt.cp15[(r)])
+/*
+ * CP14 and CP15 live in the same array, as they are backed by the
+ * same system registers.
+ */
+#define vcpu_cp14(v,r)		((v)->arch.ctxt.copro[(r)])
+#define vcpu_cp15(v,r)		((v)->arch.ctxt.copro[(r)])
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 1)])
+#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 1)])
 #else
-#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 0)])
+#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 0)])
 #endif
 
 struct kvm_vm_stat {
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 182415e1a952..e28be510380c 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -73,9 +73,9 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_EL2_EC_WFI]	= kvm_handle_wfx,
 	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
 	[ESR_EL2_EC_CP15_64]	= kvm_handle_cp15_64,
-	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_32,
 	[ESR_EL2_EC_CP14_LS]	= kvm_handle_cp14_load_store,
-	[ESR_EL2_EC_CP14_64]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_CP14_64]	= kvm_handle_cp14_64,
 	[ESR_EL2_EC_HVC32]	= handle_hvc,
 	[ESR_EL2_EC_SMC32]	= handle_smc,
 	[ESR_EL2_EC_HVC64]	= handle_hvc,
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d53ce430b178..266afd972ad3 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -494,6 +494,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  NULL, reset_val, FPEXC32_EL2, 0x70 },
 };
 
+/* Trapped cp14 registers */
+static const struct sys_reg_desc cp14_regs[] = {
+};
+
 /*
  * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
  * depending on the way they are accessed (as a 32bit or a 64bit
@@ -601,26 +605,29 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+/*
+ * emulate_cp --  tries to match a sys_reg access in a handling table, and
+ *                call the corresponding trap handler.
+ *
+ * @params: pointer to the descriptor of the access
+ * @table: array of trap descriptors
+ * @num: size of the trap descriptor array
+ *
+ * Return 0 if the access has been handled, and -1 if not.
+ */
+static int emulate_cp(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_params *params,
+		      const struct sys_reg_desc *table,
+		      size_t num)
 {
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
+	const struct sys_reg_desc *r;
 
-static void emulate_cp15(struct kvm_vcpu *vcpu,
-			 const struct sys_reg_params *params)
-{
-	size_t num;
-	const struct sys_reg_desc *table, *r;
+	if (!table)
+		return -1;	/* Not handled */
 
-	table = get_target_table(vcpu->arch.target, false, &num);
-
-	/* Search target-specific then generic table. */
 	r = find_reg(params, table, num);
-	if (!r)
-		r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs));
 
-	if (likely(r)) {
+	if (r) {
 		/*
 		 * Not having an accessor means that we have
 		 * configured a trap that we don't know how to
@@ -632,22 +639,51 @@ static void emulate_cp15(struct kvm_vcpu *vcpu,
 		if (likely(r->access(vcpu, params, r))) {
 			/* Skip instruction, since it was emulated */
 			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-			return;
 		}
-		/* If access function fails, it should complain. */
+
+		/* Handled */
+		return 0;
 	}
 
-	kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu));
+	/* Not handled */
+	return -1;
+}
+
+static void unhandled_cp_access(struct kvm_vcpu *vcpu,
+				struct sys_reg_params *params)
+{
+	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+	int cp;
+
+	switch(hsr_ec) {
+	case ESR_EL2_EC_CP15_32:
+	case ESR_EL2_EC_CP15_64:
+		cp = 15;
+		break;
+	case ESR_EL2_EC_CP14_MR:
+	case ESR_EL2_EC_CP14_64:
+		cp = 14;
+		break;
+	default:
+		WARN_ON((cp = -1));
+	}
+
+	kvm_err("Unsupported guest CP%d access at: %08lx\n",
+		cp, *vcpu_pc(vcpu));
 	print_sys_reg_instr(params);
 	kvm_inject_undefined(vcpu);
 }
 
 /**
- * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_desc *global,
+			    size_t nr_global,
+			    const struct sys_reg_desc *target_specific,
+			    size_t nr_specific)
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
@@ -676,8 +712,14 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		*vcpu_reg(vcpu, params.Rt) = val;
 	}
 
-	emulate_cp15(vcpu, &params);
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+		goto out;
+	if (!emulate_cp(vcpu, &params, global, nr_global))
+		goto out;
 
+	unhandled_cp_access(vcpu, &params);
+
+out:
 	/* Do the opposite hack for the read side */
 	if (!params.is_write) {
 		u64 val = *vcpu_reg(vcpu, params.Rt);
@@ -693,7 +735,11 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_desc *global,
+			    size_t nr_global,
+			    const struct sys_reg_desc *target_specific,
+			    size_t nr_specific)
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
@@ -708,10 +754,51 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Op1 = (hsr >> 14) & 0x7;
 	params.Op2 = (hsr >> 17) & 0x7;
 
-	emulate_cp15(vcpu, &params);
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+		return 1;
+	if (!emulate_cp(vcpu, &params, global, nr_global))
+		return 1;
+
+	unhandled_cp_access(vcpu, &params);
 	return 1;
 }
 
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	const struct sys_reg_desc *target_specific;
+	size_t num;
+
+	target_specific = get_target_table(vcpu->arch.target, false, &num);
+	return kvm_handle_cp_64(vcpu,
+				cp15_regs, ARRAY_SIZE(cp15_regs),
+				target_specific, num);
+}
+
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	const struct sys_reg_desc *target_specific;
+	size_t num;
+
+	target_specific = get_target_table(vcpu->arch.target, false, &num);
+	return kvm_handle_cp_32(vcpu,
+				cp15_regs, ARRAY_SIZE(cp15_regs),
+				target_specific, num);
+}
+
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return kvm_handle_cp_64(vcpu,
+				cp14_regs, ARRAY_SIZE(cp14_regs),
+				NULL, 0);
+}
+
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return kvm_handle_cp_32(vcpu,
+				cp14_regs, ARRAY_SIZE(cp14_regs),
+				NULL, 0);
+}
+
 static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 			   const struct sys_reg_params *params)
 {

From 693292e6a93a775a91421b16f87f632e5d7f077b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 14:11:48 +0100
Subject: [PATCH 0656/1185] arm64: KVM: use separate tables for AArch32 32 and
 64bit traps

An interesting "feature" of the CP14 encoding is that there is
an overlap between 32 and 64bit registers, meaning they cannot
live in the same table as we did for CP15.

Create separate tables for 64bit CP14 and CP15 registers, and
let the top level handler use the right one.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit a9866ba0cddfc497335fa02a175c4578b96722ff)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 266afd972ad3..499a351fd1b9 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -498,13 +498,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 static const struct sys_reg_desc cp14_regs[] = {
 };
 
+/* Trapped cp14 64bit registers */
+static const struct sys_reg_desc cp14_64_regs[] = {
+};
+
 /*
  * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
  * depending on the way they are accessed (as a 32bit or a 64bit
  * register).
  */
 static const struct sys_reg_desc cp15_regs[] = {
-	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
@@ -545,6 +548,10 @@ static const struct sys_reg_desc cp15_regs[] = {
 	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
 	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
 
+};
+
+static const struct sys_reg_desc cp15_64_regs[] = {
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
 };
 
@@ -770,7 +777,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	target_specific = get_target_table(vcpu->arch.target, false, &num);
 	return kvm_handle_cp_64(vcpu,
-				cp15_regs, ARRAY_SIZE(cp15_regs),
+				cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
 				target_specific, num);
 }
 
@@ -788,7 +795,7 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	return kvm_handle_cp_64(vcpu,
-				cp14_regs, ARRAY_SIZE(cp14_regs),
+				cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
 				NULL, 0);
 }
 

From 2c4bc5c6442250ebe7a1b08ed297fc48aef99547 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 7 May 2014 13:43:39 +0100
Subject: [PATCH 0657/1185] arm64: KVM: check ordering of all system register
 tables

We now have multiple tables for the various system registers
we trap. Make sure we check the order of all of them, as it is
critical that we get the order right (been there, done that...).

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit e6a9551760623d1703487e8a16bb9c3ea8a7e7a8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 499a351fd1b9..8ab47c7326ed 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1308,14 +1308,32 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 	return write_demux_regids(uindices);
 }
 
+static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 1; i < n; i++) {
+		if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
+			kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 void kvm_sys_reg_table_init(void)
 {
 	unsigned int i;
 	struct sys_reg_desc clidr;
 
 	/* Make sure tables are unique and in order. */
-	for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++)
-		BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0);
+	BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
+	BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
+	BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
+	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
+	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
+	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
 
 	/* We abuse the reset function to overwrite the table itself. */
 	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)

From c85f50a20b43754698af82b27279ca87cb04bd3a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 10:31:37 +0100
Subject: [PATCH 0658/1185] arm64: KVM: add trap handlers for AArch32 debug
 registers

Add handlers for all the AArch32 debug registers that are accessible
from EL0 or EL1. The code follow the same strategy as the AArch64
counterpart with regards to tracking the dirty state of the debug
registers.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit bdfb4b389c8d8f07e2d5b8e1291e01c789ba4aad)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |   9 ++
 arch/arm64/kvm/sys_regs.c        | 144 ++++++++++++++++++++++++++++++-
 2 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 69027ded5006..483842180f8f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -95,6 +95,15 @@
 #define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
 #define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
 #define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+
+#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
+#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
+#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
+#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
+#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
+#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
+#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
+
 #define NR_COPRO_REGS	(NR_SYS_REGS * 2)
 
 #define ARM_EXCEPTION_IRQ	  0
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 8ab47c7326ed..a4fd5267c65b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -494,12 +494,153 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  NULL, reset_val, FPEXC32_EL2, 0x70 },
 };
 
-/* Trapped cp14 registers */
+static bool trap_dbgidr(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
+		u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
+		u32 el3 = !!((pfr >> 12) & 0xf);
+
+		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
+					  (((dfr >> 12) & 0xf) << 24) |
+					  (((dfr >> 28) & 0xf) << 20) |
+					  (6 << 16) | (el3 << 14) | (el3 << 12));
+		return true;
+	}
+}
+
+static bool trap_debug32(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+	}
+
+	return true;
+}
+
+#define DBG_BCR_BVR_WCR_WVR(n)					\
+	/* DBGBVRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32,	\
+	  NULL, (cp14_DBGBVR0 + (n) * 2) },			\
+	/* DBGBCRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32,	\
+	  NULL, (cp14_DBGBCR0 + (n) * 2) },			\
+	/* DBGWVRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32,	\
+	  NULL, (cp14_DBGWVR0 + (n) * 2) },			\
+	/* DBGWCRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32,	\
+	  NULL, (cp14_DBGWCR0 + (n) * 2) }
+
+#define DBGBXVR(n)						\
+	{ Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32,	\
+	  NULL, cp14_DBGBXVR0 + n * 2 }
+
+/*
+ * Trapped cp14 registers. We generally ignore most of the external
+ * debug, on the principle that they don't really make sense to a
+ * guest. Revisit this one day, whould this principle change.
+ */
 static const struct sys_reg_desc cp14_regs[] = {
+	/* DBGIDR */
+	{ Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
+	/* DBGDTRRXext */
+	{ Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
+
+	DBG_BCR_BVR_WCR_WVR(0),
+	/* DBGDSCRint */
+	{ Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(1),
+	/* DBGDCCINT */
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 },
+	/* DBGDSCRext */
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 },
+	DBG_BCR_BVR_WCR_WVR(2),
+	/* DBGDTR[RT]Xint */
+	{ Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
+	/* DBGDTR[RT]Xext */
+	{ Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(3),
+	DBG_BCR_BVR_WCR_WVR(4),
+	DBG_BCR_BVR_WCR_WVR(5),
+	/* DBGWFAR */
+	{ Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi },
+	/* DBGOSECCR */
+	{ Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(6),
+	/* DBGVCR */
+	{ Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 },
+	DBG_BCR_BVR_WCR_WVR(7),
+	DBG_BCR_BVR_WCR_WVR(8),
+	DBG_BCR_BVR_WCR_WVR(9),
+	DBG_BCR_BVR_WCR_WVR(10),
+	DBG_BCR_BVR_WCR_WVR(11),
+	DBG_BCR_BVR_WCR_WVR(12),
+	DBG_BCR_BVR_WCR_WVR(13),
+	DBG_BCR_BVR_WCR_WVR(14),
+	DBG_BCR_BVR_WCR_WVR(15),
+
+	/* DBGDRAR (32bit) */
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi },
+
+	DBGBXVR(0),
+	/* DBGOSLAR */
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
+	DBGBXVR(1),
+	/* DBGOSLSR */
+	{ Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
+	DBGBXVR(2),
+	DBGBXVR(3),
+	/* DBGOSDLR */
+	{ Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi },
+	DBGBXVR(4),
+	/* DBGPRCR */
+	{ Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi },
+	DBGBXVR(5),
+	DBGBXVR(6),
+	DBGBXVR(7),
+	DBGBXVR(8),
+	DBGBXVR(9),
+	DBGBXVR(10),
+	DBGBXVR(11),
+	DBGBXVR(12),
+	DBGBXVR(13),
+	DBGBXVR(14),
+	DBGBXVR(15),
+
+	/* DBGDSAR (32bit) */
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi },
+
+	/* DBGDEVID2 */
+	{ Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi },
+	/* DBGDEVID1 */
+	{ Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi },
+	/* DBGDEVID */
+	{ Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi },
+	/* DBGCLAIMSET */
+	{ Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi },
+	/* DBGCLAIMCLR */
+	{ Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi },
+	/* DBGAUTHSTATUS */
+	{ Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 },
 };
 
 /* Trapped cp14 64bit registers */
 static const struct sys_reg_desc cp14_64_regs[] = {
+	/* DBGDRAR (64bit) */
+	{ Op1( 0), CRm( 1), .access = trap_raz_wi },
+
+	/* DBGDSAR (64bit) */
+	{ Op1( 0), CRm( 2), .access = trap_raz_wi },
 };
 
 /*
@@ -547,7 +688,6 @@ static const struct sys_reg_desc cp15_regs[] = {
 	{ Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
 	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
 	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
-
 };
 
 static const struct sys_reg_desc cp15_64_regs[] = {

From 44af263503e0e31b1f88ce86df73e467f18e3198 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 7 May 2014 13:44:49 +0100
Subject: [PATCH 0659/1185] arm64: KVM: implement lazy world switch for debug
 registers

Implement switching of the debug registers. While the number
of registers is massive, CPUs usually don't implement them all
(A57 has 6 breakpoints and 4 watchpoints, which gives us a total
of 22 registers "only").

Also, we only save/restore them when MDSCR_EL1 has debug enabled,
or when we've flagged the debug registers as dirty. It means that
most of the time, we only save/restore MDSCR_EL1.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b0e626b380872b663918230fafdac128c34fea56)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kernel/asm-offsets.c |   1 +
 arch/arm64/kvm/hyp.S            | 463 +++++++++++++++++++++++++++++++-
 2 files changed, 458 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 65ebb2ccde5f..825d76c21d84 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -118,6 +118,7 @@ int main(void)
   DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
   DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
+  DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
   DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 7874e022d077..100494b5c7d4 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -20,6 +20,7 @@
 #include <asm/assembler.h>
 #include <asm/memory.h>
 #include <asm/asm-offsets.h>
+#include <asm/debug-monitors.h>
 #include <asm/fpsimdmacros.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
@@ -211,6 +212,7 @@
 	mrs	x22, 	amair_el1
 	mrs	x23, 	cntkctl_el1
 	mrs	x24,	par_el1
+	mrs	x25,	mdscr_el1
 
 	stp	x4, x5, [x3]
 	stp	x6, x7, [x3, #16]
@@ -222,7 +224,202 @@
 	stp	x18, x19, [x3, #112]
 	stp	x20, x21, [x3, #128]
 	stp	x22, x23, [x3, #144]
-	str	x24, [x3, #160]
+	stp	x24, x25, [x3, #160]
+.endm
+
+.macro save_debug
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgbcr15_el1
+	mrs	x19, dbgbcr14_el1
+	mrs	x18, dbgbcr13_el1
+	mrs	x17, dbgbcr12_el1
+	mrs	x16, dbgbcr11_el1
+	mrs	x15, dbgbcr10_el1
+	mrs	x14, dbgbcr9_el1
+	mrs	x13, dbgbcr8_el1
+	mrs	x12, dbgbcr7_el1
+	mrs	x11, dbgbcr6_el1
+	mrs	x10, dbgbcr5_el1
+	mrs	x9, dbgbcr4_el1
+	mrs	x8, dbgbcr3_el1
+	mrs	x7, dbgbcr2_el1
+	mrs	x6, dbgbcr1_el1
+	mrs	x5, dbgbcr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgbvr15_el1
+	mrs	x19, dbgbvr14_el1
+	mrs	x18, dbgbvr13_el1
+	mrs	x17, dbgbvr12_el1
+	mrs	x16, dbgbvr11_el1
+	mrs	x15, dbgbvr10_el1
+	mrs	x14, dbgbvr9_el1
+	mrs	x13, dbgbvr8_el1
+	mrs	x12, dbgbvr7_el1
+	mrs	x11, dbgbvr6_el1
+	mrs	x10, dbgbvr5_el1
+	mrs	x9, dbgbvr4_el1
+	mrs	x8, dbgbvr3_el1
+	mrs	x7, dbgbvr2_el1
+	mrs	x6, dbgbvr1_el1
+	mrs	x5, dbgbvr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgwcr15_el1
+	mrs	x19, dbgwcr14_el1
+	mrs	x18, dbgwcr13_el1
+	mrs	x17, dbgwcr12_el1
+	mrs	x16, dbgwcr11_el1
+	mrs	x15, dbgwcr10_el1
+	mrs	x14, dbgwcr9_el1
+	mrs	x13, dbgwcr8_el1
+	mrs	x12, dbgwcr7_el1
+	mrs	x11, dbgwcr6_el1
+	mrs	x10, dbgwcr5_el1
+	mrs	x9, dbgwcr4_el1
+	mrs	x8, dbgwcr3_el1
+	mrs	x7, dbgwcr2_el1
+	mrs	x6, dbgwcr1_el1
+	mrs	x5, dbgwcr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgwvr15_el1
+	mrs	x19, dbgwvr14_el1
+	mrs	x18, dbgwvr13_el1
+	mrs	x17, dbgwvr12_el1
+	mrs	x16, dbgwvr11_el1
+	mrs	x15, dbgwvr10_el1
+	mrs	x14, dbgwvr9_el1
+	mrs	x13, dbgwvr8_el1
+	mrs	x12, dbgwvr7_el1
+	mrs	x11, dbgwvr6_el1
+	mrs	x10, dbgwvr5_el1
+	mrs	x9, dbgwvr4_el1
+	mrs	x8, dbgwvr3_el1
+	mrs	x7, dbgwvr2_el1
+	mrs	x6, dbgwvr1_el1
+	mrs	x5, dbgwvr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	mrs	x21, mdccint_el1
+	str	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
 .endm
 
 .macro restore_sysregs
@@ -241,7 +438,7 @@
 	ldp	x18, x19, [x3, #112]
 	ldp	x20, x21, [x3, #128]
 	ldp	x22, x23, [x3, #144]
-	ldr	x24, [x3, #160]
+	ldp	x24, x25, [x3, #160]
 
 	msr	vmpidr_el2,	x4
 	msr	csselr_el1,	x5
@@ -264,6 +461,198 @@
 	msr	amair_el1,	x22
 	msr	cntkctl_el1,	x23
 	msr	par_el1,	x24
+	msr	mdscr_el1,	x25
+.endm
+
+.macro restore_debug
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	msr	dbgbcr15_el1, x20
+	msr	dbgbcr14_el1, x19
+	msr	dbgbcr13_el1, x18
+	msr	dbgbcr12_el1, x17
+	msr	dbgbcr11_el1, x16
+	msr	dbgbcr10_el1, x15
+	msr	dbgbcr9_el1, x14
+	msr	dbgbcr8_el1, x13
+	msr	dbgbcr7_el1, x12
+	msr	dbgbcr6_el1, x11
+	msr	dbgbcr5_el1, x10
+	msr	dbgbcr4_el1, x9
+	msr	dbgbcr3_el1, x8
+	msr	dbgbcr2_el1, x7
+	msr	dbgbcr1_el1, x6
+	msr	dbgbcr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	msr	dbgbvr15_el1, x20
+	msr	dbgbvr14_el1, x19
+	msr	dbgbvr13_el1, x18
+	msr	dbgbvr12_el1, x17
+	msr	dbgbvr11_el1, x16
+	msr	dbgbvr10_el1, x15
+	msr	dbgbvr9_el1, x14
+	msr	dbgbvr8_el1, x13
+	msr	dbgbvr7_el1, x12
+	msr	dbgbvr6_el1, x11
+	msr	dbgbvr5_el1, x10
+	msr	dbgbvr4_el1, x9
+	msr	dbgbvr3_el1, x8
+	msr	dbgbvr2_el1, x7
+	msr	dbgbvr1_el1, x6
+	msr	dbgbvr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	msr	dbgwcr15_el1, x20
+	msr	dbgwcr14_el1, x19
+	msr	dbgwcr13_el1, x18
+	msr	dbgwcr12_el1, x17
+	msr	dbgwcr11_el1, x16
+	msr	dbgwcr10_el1, x15
+	msr	dbgwcr9_el1, x14
+	msr	dbgwcr8_el1, x13
+	msr	dbgwcr7_el1, x12
+	msr	dbgwcr6_el1, x11
+	msr	dbgwcr5_el1, x10
+	msr	dbgwcr4_el1, x9
+	msr	dbgwcr3_el1, x8
+	msr	dbgwcr2_el1, x7
+	msr	dbgwcr1_el1, x6
+	msr	dbgwcr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	msr	dbgwvr15_el1, x20
+	msr	dbgwvr14_el1, x19
+	msr	dbgwvr13_el1, x18
+	msr	dbgwvr12_el1, x17
+	msr	dbgwvr11_el1, x16
+	msr	dbgwvr10_el1, x15
+	msr	dbgwvr9_el1, x14
+	msr	dbgwvr8_el1, x13
+	msr	dbgwvr7_el1, x12
+	msr	dbgwvr6_el1, x11
+	msr	dbgwvr5_el1, x10
+	msr	dbgwvr4_el1, x9
+	msr	dbgwvr3_el1, x8
+	msr	dbgwvr2_el1, x7
+	msr	dbgwvr1_el1, x6
+	msr	dbgwvr0_el1, x5
+
+	ldr	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+	msr	mdccint_el1, x21
 .endm
 
 .macro skip_32bit_state tmp, target
@@ -278,6 +667,35 @@
 	tbz	\tmp, #12, \target
 .endm
 
+.macro skip_debug_state tmp, target
+	ldr	\tmp, [x0, #VCPU_DEBUG_FLAGS]
+	tbz	\tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
+.endm
+
+.macro compute_debug_state target
+	// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
+	// is set, we do a full save/restore cycle and disable trapping.
+	add	x25, x0, #VCPU_CONTEXT
+
+	// Check the state of MDSCR_EL1
+	ldr	x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)]
+	and	x26, x25, #DBG_MDSCR_KDE
+	and	x25, x25, #DBG_MDSCR_MDE
+	adds	xzr, x25, x26
+	b.eq	9998f		// Nothing to see there
+
+	// If any interesting bits was set, we must set the flag
+	mov	x26, #KVM_ARM64_DEBUG_DIRTY
+	str	x26, [x0, #VCPU_DEBUG_FLAGS]
+	b	9999f		// Don't skip restore
+
+9998:
+	// Otherwise load the flags from memory in case we recently
+	// trapped
+	skip_debug_state x25, \target
+9999:
+.endm
+
 .macro save_guest_32bit_state
 	skip_32bit_state x3, 1f
 
@@ -293,10 +711,13 @@
 	mrs	x4, dacr32_el2
 	mrs	x5, ifsr32_el2
 	mrs	x6, fpexc32_el2
-	mrs	x7, dbgvcr32_el2
 	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
+	str	x6, [x3, #16]
 
+	skip_debug_state x8, 2f
+	mrs	x7, dbgvcr32_el2
+	str	x7, [x3, #24]
+2:
 	skip_tee_state x8, 1f
 
 	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
@@ -319,12 +740,15 @@
 
 	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
 	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
+	ldr	x6, [x3, #16]
 	msr	dacr32_el2, x4
 	msr	ifsr32_el2, x5
 	msr	fpexc32_el2, x6
-	msr	dbgvcr32_el2, x7
 
+	skip_debug_state x8, 2f
+	ldr	x7, [x3, #24]
+	msr	dbgvcr32_el2, x7
+2:
 	skip_tee_state x8, 1f
 
 	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
@@ -463,6 +887,14 @@ __restore_sysregs:
 	restore_sysregs
 	ret
 
+__save_debug:
+	save_debug
+	ret
+
+__restore_debug:
+	restore_debug
+	ret
+
 __save_fpsimd:
 	save_fpsimd
 	ret
@@ -494,6 +926,9 @@ ENTRY(__kvm_vcpu_run)
 	bl __save_fpsimd
 	bl __save_sysregs
 
+	compute_debug_state 1f
+	bl	__save_debug
+1:
 	activate_traps
 	activate_vm
 
@@ -505,6 +940,10 @@ ENTRY(__kvm_vcpu_run)
 
 	bl __restore_sysregs
 	bl __restore_fpsimd
+
+	skip_debug_state x3, 1f
+	bl	__restore_debug
+1:
 	restore_guest_32bit_state
 	restore_guest_regs
 
@@ -521,6 +960,10 @@ __kvm_vcpu_return:
 	save_guest_regs
 	bl __save_fpsimd
 	bl __save_sysregs
+
+	skip_debug_state x3, 1f
+	bl	__save_debug
+1:
 	save_guest_32bit_state
 
 	save_timer_state
@@ -535,6 +978,14 @@ __kvm_vcpu_return:
 
 	bl __restore_sysregs
 	bl __restore_fpsimd
+
+	skip_debug_state x3, 1f
+	// Clear the dirty flag for the next run, as all the state has
+	// already been saved. Note that we nuke the whole 64bit word.
+	// If we ever add more flags, we'll have to be more careful...
+	str	xzr, [x0, #VCPU_DEBUG_FLAGS]
+	bl	__restore_debug
+1:
 	restore_host_regs
 
 	mov	x0, x1

From 99999d6ea6ff7b0b02a4ac6abef0d49ec1fd9fcb Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 10:32:03 +0100
Subject: [PATCH 0660/1185] arm64: KVM: enable trapping of all debug registers

Enable trapping of the debug registers, preventing the guests to
mess with the host state (and allowing guests to use the debug
infrastructure as well).

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit d329de09333aeee127aaf22eb7cee9c2dc4cf475)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 100494b5c7d4..b72aa9f9215c 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -770,6 +770,14 @@
 	mrs	x2, mdcr_el2
 	and	x2, x2, #MDCR_EL2_HPMN_MASK
 	orr	x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
+	orr	x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
+
+	// Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
+	// if not dirty.
+	ldr	x3, [x0, #VCPU_DEBUG_FLAGS]
+	tbnz	x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
+	orr	x2, x2,  #MDCR_EL2_TDA
+1:
 	msr	mdcr_el2, x2
 .endm
 

From 38ca7b90842e554439e2291803cdb0eb850b4807 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 24 Jun 2014 19:43:15 +0100
Subject: [PATCH 0661/1185] ARM: make it easier to check the CPU part number
 correctly

Ensure that platform maintainers check the CPU part number in the right
manner: the CPU part number is meaningless without also checking the
CPU implement(e|o)r (choose your preferred spelling!)  Provide an
interface which returns both the implementer and part number together,
and update the definitions to include the implementer.

Mark the old function as being deprecated... indeed, using the old
function with the definitions will now always evaluate as false, so
people must update their un-merged code to the new function.  While
this could be avoided by adding new definitions, we'd also have to
create new names for them which would be awkward.

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
(cherry picked from commit af040ffc9ba1e079ee4c0748aff64fa3d4716fa5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/cputype.h   | 35 +++++++++++------
 arch/arm/include/asm/smp_scu.h   |  2 +-
 arch/arm/kernel/perf_event_cpu.c | 64 ++++++++++++++------------------
 arch/arm/kvm/guest.c             |  8 +---
 4 files changed, 53 insertions(+), 56 deletions(-)

diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index dba62cb1ad08..3392fe2d3174 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -43,15 +43,18 @@
 #define ARM_CPU_IMP_ARM			0x41
 #define ARM_CPU_IMP_INTEL		0x69
 
-#define ARM_CPU_PART_ARM1136		0xB360
-#define ARM_CPU_PART_ARM1156		0xB560
-#define ARM_CPU_PART_ARM1176		0xB760
-#define ARM_CPU_PART_ARM11MPCORE	0xB020
-#define ARM_CPU_PART_CORTEX_A8		0xC080
-#define ARM_CPU_PART_CORTEX_A9		0xC090
-#define ARM_CPU_PART_CORTEX_A5		0xC050
-#define ARM_CPU_PART_CORTEX_A15		0xC0F0
-#define ARM_CPU_PART_CORTEX_A7		0xC070
+/* ARM implemented processors */
+#define ARM_CPU_PART_ARM1136		0x4100b360
+#define ARM_CPU_PART_ARM1156		0x4100b560
+#define ARM_CPU_PART_ARM1176		0x4100b760
+#define ARM_CPU_PART_ARM11MPCORE	0x4100b020
+#define ARM_CPU_PART_CORTEX_A8		0x4100c080
+#define ARM_CPU_PART_CORTEX_A9		0x4100c090
+#define ARM_CPU_PART_CORTEX_A5		0x4100c050
+#define ARM_CPU_PART_CORTEX_A7		0x4100c070
+#define ARM_CPU_PART_CORTEX_A12		0x4100c0d0
+#define ARM_CPU_PART_CORTEX_A17		0x4100c0e0
+#define ARM_CPU_PART_CORTEX_A15		0x4100c0f0
 
 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
@@ -122,14 +125,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
 	return (read_cpuid_id() & 0xFF000000) >> 24;
 }
 
-static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
+/*
+ * The CPU part number is meaningless without referring to the CPU
+ * implementer: implementers are free to define their own part numbers
+ * which are permitted to clash with other implementer part numbers.
+ */
+static inline unsigned int __attribute_const__ read_cpuid_part(void)
+{
+	return read_cpuid_id() & 0xff00fff0;
+}
+
+static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void)
 {
 	return read_cpuid_id() & 0xFFF0;
 }
 
 static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
 {
-	return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK;
+	return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
 }
 
 static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
index 18d169373612..1a292d8be988 100644
--- a/arch/arm/include/asm/smp_scu.h
+++ b/arch/arm/include/asm/smp_scu.h
@@ -11,7 +11,7 @@
 
 static inline bool scu_a9_has_base(void)
 {
-	return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+	return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
 }
 
 static inline unsigned long scu_a9_get_base(void)
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 1f2740e3dbc0..0e9609657c79 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -201,49 +201,39 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
 static int probe_current_pmu(struct arm_pmu *pmu)
 {
 	int cpu = get_cpu();
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
 	int ret = -ENODEV;
 
 	pr_info("probing PMU on CPU %d\n", cpu);
 
+	switch (read_cpuid_part()) {
 	/* ARM Ltd CPUs. */
-	if (implementor == ARM_CPU_IMP_ARM) {
-		switch (part_number) {
-		case ARM_CPU_PART_ARM1136:
-		case ARM_CPU_PART_ARM1156:
-		case ARM_CPU_PART_ARM1176:
-			ret = armv6pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_ARM11MPCORE:
-			ret = armv6mpcore_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A8:
-			ret = armv7_a8_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A9:
-			ret = armv7_a9_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A5:
-			ret = armv7_a5_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A15:
-			ret = armv7_a15_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A7:
-			ret = armv7_a7_pmu_init(pmu);
-			break;
-		}
-	/* Intel CPUs [xscale]. */
-	} else if (implementor == ARM_CPU_IMP_INTEL) {
-		switch (xscale_cpu_arch_version()) {
-		case ARM_CPU_XSCALE_ARCH_V1:
-			ret = xscale1pmu_init(pmu);
-			break;
-		case ARM_CPU_XSCALE_ARCH_V2:
-			ret = xscale2pmu_init(pmu);
-			break;
+	case ARM_CPU_PART_ARM1136:
+	case ARM_CPU_PART_ARM1156:
+	case ARM_CPU_PART_ARM1176:
+		ret = armv6pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_ARM11MPCORE:
+		ret = armv6mpcore_pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_CORTEX_A8:
+		ret = armv7_a8_pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_CORTEX_A9:
+		ret = armv7_a9_pmu_init(pmu);
+		break;
+
+	default:
+		if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
+			switch (xscale_cpu_arch_version()) {
+			case ARM_CPU_XSCALE_ARCH_V1:
+				ret = xscale1pmu_init(pmu);
+				break;
+			case ARM_CPU_XSCALE_ARCH_V2:
+				ret = xscale2pmu_init(pmu);
+				break;
+			}
 		}
+		break;
 	}
 
 	put_cpu();
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 986e625b5dbd..813e49258690 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -264,13 +264,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 int __attribute_const__ kvm_target_cpu(void)
 {
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
-
-	if (implementor != ARM_CPU_IMP_ARM)
-		return -EINVAL;
-
-	switch (part_number) {
+	switch (read_cpuid_part()) {
 	case ARM_CPU_PART_CORTEX_A7:
 		return KVM_ARM_TARGET_CORTEX_A7;
 	case ARM_CPU_PART_CORTEX_A15:

From 2d05a876aea8c40db12b9b721be21773aad6057b Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Fri, 25 Jul 2014 06:27:03 -0700
Subject: [PATCH 0662/1185] kvm: Resolve missing-field-initializers warnings

Resolve missing-field-initializers warnings seen in W=2 kernel
builds by having macros generate more elaborated initializers.
That is enough to silence the warnings.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 25f97ff451a4aab534afc1290af97d23ea0b4fb3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/irq_comm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index ced4a542a031..a228ee82bad2 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -323,13 +323,13 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 
 #define IOAPIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
-	  .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
+	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
 #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
 
 #ifdef CONFIG_X86
 #  define PIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
-	  .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
+	  .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
 #  define ROUTING_ENTRY2(irq) \
 	IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
 #else

From 7b6de249336851f2fb8515b2f428bc9e56a8dbb1 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:27:35 +0200
Subject: [PATCH 0663/1185] KVM: Rename and add argument to check_extension

In preparation to make the check_extension function available to VM scope
we add a struct kvm * argument to the function header and rename the function
accordingly. It will still be called from the /dev/kvm fd, but with a NULL
argument for struct kvm *.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c         | 2 +-
 arch/ia64/kvm/kvm-ia64.c   | 2 +-
 arch/powerpc/kvm/powerpc.c | 2 +-
 arch/s390/kvm/kvm-s390.c   | 2 +-
 arch/x86/kvm/x86.c         | 2 +-
 include/linux/kvm_host.h   | 2 +-
 virt/kvm/kvm_main.c        | 6 +++---
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e2c2bfd4da95..0e3d3dc8eea2 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -174,7 +174,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	}
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	switch (ext) {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 53f44bee9ebb..b48aa69e35e1 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn)
 	*(int *)rtn = 0;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 
 	int r;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 750835a4ef70..aacf3e35404f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -300,7 +300,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
 {
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 54612d0e79dd..9133f869b070 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -127,7 +127,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 805c8e92cf66..46a8c74fd431 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2500,7 +2500,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
 	return r;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a3c83491a791..1fb1bd4593a5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -578,7 +578,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg);
 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
 
-int kvm_dev_ioctl_check_extension(long ext);
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
 
 int kvm_get_dirty_log(struct kvm *kvm,
 			struct kvm_dirty_log *log, int *is_dirty);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3db56912caed..e3315356b69a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2564,7 +2564,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	return r;
 }
 
-static long kvm_dev_ioctl_check_extension_generic(long arg)
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 {
 	switch (arg) {
 	case KVM_CAP_USER_MEMORY:
@@ -2588,7 +2588,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 	default:
 		break;
 	}
-	return kvm_dev_ioctl_check_extension(arg);
+	return kvm_vm_ioctl_check_extension(kvm, arg);
 }
 
 static long kvm_dev_ioctl(struct file *filp,
@@ -2607,7 +2607,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_dev_ioctl_create_vm(arg);
 		break;
 	case KVM_CHECK_EXTENSION:
-		r = kvm_dev_ioctl_check_extension_generic(arg);
+		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
 		r = -EINVAL;

From c509fab667c6a02b0763a4343c73d134c6a09328 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:33:08 +0200
Subject: [PATCH 0664/1185] KVM: Allow KVM_CHECK_EXTENSION on the vm fd

The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately
on PPC some of the capabilities change depending on the way a VM was created.

So instead we need a way to expose capabilities as VM ioctl, so that we can
see which VM type we're using (HV or PR). To enable this, add the
KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 92b591a4c46b103ebd3fc0d03a084e1efd331253)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt |  7 ++--
 include/uapi/linux/kvm.h          |  1 +
 virt/kvm/kvm_main.c               | 58 +++++++++++++++++--------------
 3 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 8d135672b69a..257a1f1eecc7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl.
 
 4.4 KVM_CHECK_EXTENSION
 
-Capability: basic
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
 Architectures: all
-Type: system ioctl
+Type: system ioctl, vm ioctl
 Parameters: extension identifier (KVM_CAP_*)
 Returns: 0 if unsupported; 1 (or some other positive integer) if supported
 
@@ -160,6 +160,9 @@ receives an integer that describes the extension availability.
 Generally 0 means no and 1 means yes, but some extensions may report
 additional information in the integer return value.
 
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
 
 4.5 KVM_GET_VCPU_MMAP_SIZE
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 521e4c0a08ac..1af686a82703 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -679,6 +679,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_EL1_32BIT 93
 #define KVM_CAP_EXT_EMUL_CPUID 95
 #define KVM_CAP_ARM_PSCI_0_2 102
+#define KVM_CAP_CHECK_EXTENSION_VM 105
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e3315356b69a..1edb15da7acf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2317,6 +2317,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	return 0;
 }
 
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+{
+	switch (arg) {
+	case KVM_CAP_USER_MEMORY:
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+	case KVM_CAP_SET_BOOT_CPU_ID:
+#endif
+	case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+	case KVM_CAP_SIGNAL_MSI:
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQFD_RESAMPLE:
+#endif
+	case KVM_CAP_CHECK_EXTENSION_VM:
+		return 1;
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQ_ROUTING:
+		return KVM_MAX_IRQ_ROUTES;
+#endif
+	default:
+		break;
+	}
+	return kvm_vm_ioctl_check_extension(kvm, arg);
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2480,6 +2508,9 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_CHECK_EXTENSION:
+		r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
+		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)
@@ -2564,33 +2595,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	return r;
 }
 
-static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
-{
-	switch (arg) {
-	case KVM_CAP_USER_MEMORY:
-	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
-	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
-	case KVM_CAP_SET_BOOT_CPU_ID:
-#endif
-	case KVM_CAP_INTERNAL_ERROR_DATA:
-#ifdef CONFIG_HAVE_KVM_MSI
-	case KVM_CAP_SIGNAL_MSI:
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQFD_RESAMPLE:
-#endif
-		return 1;
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQ_ROUTING:
-		return KVM_MAX_IRQ_ROUTES;
-#endif
-	default:
-		break;
-	}
-	return kvm_vm_ioctl_check_extension(kvm, arg);
-}
-
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {

From edff6ae9e04008800e518454cf7fa0efe3023252 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 25 Jul 2014 16:29:12 +0100
Subject: [PATCH 0665/1185] kvm: arm64: vgic: fix hyp panic with 64k pages on
 juno platform

If the physical address of GICV isn't page-aligned, then we end up
creating a stage-2 mapping of the page containing it, which causes us to
map neighbouring memory locations directly into the guest.

As an example, consider a platform with GICV at physical 0x2c02f000
running a 64k-page host kernel. If qemu maps this into the guest at
0x80010000, then guest physical addresses 0x80010000 - 0x8001efff will
map host physical region 0x2c020000 - 0x2c02efff. Accesses to these
physical regions may cause UNPREDICTABLE behaviour, for example, on the
Juno platform this will cause an SError exception to EL3, which brings
down the entire physical CPU resulting in RCU stalls / HYP panics / host
crashing / wasted weeks of debugging.

SBSA recommends that systems alias the 4k GICV across the bounding 64k
region, in which case GICV physical could be described as 0x2c020000 in
the above scenario.

This patch fixes the problem by failing the vgic probe if the physical
base address or the size of GICV aren't page-aligned. Note that this
generated a warning in dmesg about freeing enabled IRQs, so I had to
move the IRQ enabling later in the probe.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Joel Schopp <joel.schopp@amd.com>
Cc: Don Dutile <ddutile@redhat.com>
Acked-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Joel Schopp <joel.schopp@amd.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 63afbe7a0ac184ef8485dac4914e87b211b5bfaa)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic-v2.c | 16 ++++++++++++++++
 virt/kvm/arm/vgic.c    |  4 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index d6c9c142f813..01124ef3690a 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -231,6 +231,22 @@ int vgic_v2_probe(struct device_node *vgic_node,
 		ret = -ENXIO;
 		goto out_unmap;
 	}
+
+	if (!PAGE_ALIGNED(vcpu_res.start)) {
+		kvm_err("GICV physical address 0x%llx not page aligned\n",
+			(unsigned long long)vcpu_res.start);
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+
+	if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+		kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+			(unsigned long long)resource_size(&vcpu_res),
+			PAGE_SIZE);
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+
 	vgic->vcpu_base = vcpu_res.start;
 
 	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index ede8f6466c95..73eba793b17f 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1587,11 +1587,11 @@ int kvm_vgic_hyp_init(void)
 		goto out_free_irq;
 	}
 
-	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
 	/* Callback into for arch code for setup */
 	vgic_arch_setup(vgic);
 
+	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
 	return 0;
 
 out_free_irq:

From 45104cfeec66c1314a2ae5e4cb436a9ace725e59 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 31 Jul 2014 14:16:39 +0100
Subject: [PATCH 0666/1185] arm64: KVM: GICv3: move system register access to
 msr_s/mrs_s

Commit 72c583951526 (arm64: gicv3: Allow GICv3 compilation with
older binutils) changed the way we express the GICv3 system registers,
but couldn't change the occurences used by KVM as the code wasn't
merged yet.

Just fix the accessors.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit f4c321eb268e932786c112e0b902ee942d91a336)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/vgic-v3-switch.S | 130 ++++++++++++++++----------------
 1 file changed, 65 insertions(+), 65 deletions(-)

diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
index 21e68f606a8f..d16046999e06 100644
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -48,11 +48,11 @@
 	dsb	st
 
 	// Save all interesting registers
-	mrs	x4, ICH_HCR_EL2
-	mrs	x5, ICH_VMCR_EL2
-	mrs	x6, ICH_MISR_EL2
-	mrs	x7, ICH_EISR_EL2
-	mrs	x8, ICH_ELSR_EL2
+	mrs_s	x4, ICH_HCR_EL2
+	mrs_s	x5, ICH_VMCR_EL2
+	mrs_s	x6, ICH_MISR_EL2
+	mrs_s	x7, ICH_EISR_EL2
+	mrs_s	x8, ICH_ELSR_EL2
 
 	str	w4, [x3, #VGIC_V3_CPU_HCR]
 	str	w5, [x3, #VGIC_V3_CPU_VMCR]
@@ -60,9 +60,9 @@
 	str	w7, [x3, #VGIC_V3_CPU_EISR]
 	str	w8, [x3, #VGIC_V3_CPU_ELRSR]
 
-	msr	ICH_HCR_EL2, xzr
+	msr_s	ICH_HCR_EL2, xzr
 
-	mrs	x21, ICH_VTR_EL2
+	mrs_s	x21, ICH_VTR_EL2
 	mvn	w22, w21
 	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
 
@@ -71,22 +71,22 @@
 	br	x24
 
 1:
-	mrs	x20, ICH_LR15_EL2
-	mrs	x19, ICH_LR14_EL2
-	mrs	x18, ICH_LR13_EL2
-	mrs	x17, ICH_LR12_EL2
-	mrs	x16, ICH_LR11_EL2
-	mrs	x15, ICH_LR10_EL2
-	mrs	x14, ICH_LR9_EL2
-	mrs	x13, ICH_LR8_EL2
-	mrs	x12, ICH_LR7_EL2
-	mrs	x11, ICH_LR6_EL2
-	mrs	x10, ICH_LR5_EL2
-	mrs	x9, ICH_LR4_EL2
-	mrs	x8, ICH_LR3_EL2
-	mrs	x7, ICH_LR2_EL2
-	mrs	x6, ICH_LR1_EL2
-	mrs	x5, ICH_LR0_EL2
+	mrs_s	x20, ICH_LR15_EL2
+	mrs_s	x19, ICH_LR14_EL2
+	mrs_s	x18, ICH_LR13_EL2
+	mrs_s	x17, ICH_LR12_EL2
+	mrs_s	x16, ICH_LR11_EL2
+	mrs_s	x15, ICH_LR10_EL2
+	mrs_s	x14, ICH_LR9_EL2
+	mrs_s	x13, ICH_LR8_EL2
+	mrs_s	x12, ICH_LR7_EL2
+	mrs_s	x11, ICH_LR6_EL2
+	mrs_s	x10, ICH_LR5_EL2
+	mrs_s	x9, ICH_LR4_EL2
+	mrs_s	x8, ICH_LR3_EL2
+	mrs_s	x7, ICH_LR2_EL2
+	mrs_s	x6, ICH_LR1_EL2
+	mrs_s	x5, ICH_LR0_EL2
 
 	adr	x24, 1f
 	add	x24, x24, x23
@@ -113,34 +113,34 @@
 	tbnz	w21, #29, 6f	// 6 bits
 	tbz	w21, #30, 5f	// 5 bits
 				// 7 bits
-	mrs	x20, ICH_AP0R3_EL2
+	mrs_s	x20, ICH_AP0R3_EL2
 	str	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
-	mrs	x19, ICH_AP0R2_EL2
+	mrs_s	x19, ICH_AP0R2_EL2
 	str	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
-6:	mrs	x18, ICH_AP0R1_EL2
+6:	mrs_s	x18, ICH_AP0R1_EL2
 	str	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
-5:	mrs	x17, ICH_AP0R0_EL2
+5:	mrs_s	x17, ICH_AP0R0_EL2
 	str	w17, [x3, #VGIC_V3_CPU_AP0R]
 
 	tbnz	w21, #29, 6f	// 6 bits
 	tbz	w21, #30, 5f	// 5 bits
 				// 7 bits
-	mrs	x20, ICH_AP1R3_EL2
+	mrs_s	x20, ICH_AP1R3_EL2
 	str	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
-	mrs	x19, ICH_AP1R2_EL2
+	mrs_s	x19, ICH_AP1R2_EL2
 	str	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
-6:	mrs	x18, ICH_AP1R1_EL2
+6:	mrs_s	x18, ICH_AP1R1_EL2
 	str	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
-5:	mrs	x17, ICH_AP1R0_EL2
+5:	mrs_s	x17, ICH_AP1R0_EL2
 	str	w17, [x3, #VGIC_V3_CPU_AP1R]
 
 	// Restore SRE_EL1 access and re-enable SRE at EL1.
-	mrs	x5, ICC_SRE_EL2
+	mrs_s	x5, ICC_SRE_EL2
 	orr	x5, x5, #ICC_SRE_EL2_ENABLE
-	msr	ICC_SRE_EL2, x5
+	msr_s	ICC_SRE_EL2, x5
 	isb
 	mov	x5, #1
-	msr	ICC_SRE_EL1, x5
+	msr_s	ICC_SRE_EL1, x5
 .endm
 
 /*
@@ -150,7 +150,7 @@
 .macro	restore_vgic_v3_state
 	// Disable SRE_EL1 access. Necessary, otherwise
 	// ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
-	msr	ICC_SRE_EL1, xzr
+	msr_s	ICC_SRE_EL1, xzr
 	isb
 
 	// Compute the address of struct vgic_cpu
@@ -160,34 +160,34 @@
 	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
 	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
 
-	msr	ICH_HCR_EL2, x4
-	msr	ICH_VMCR_EL2, x5
+	msr_s	ICH_HCR_EL2, x4
+	msr_s	ICH_VMCR_EL2, x5
 
-	mrs	x21, ICH_VTR_EL2
+	mrs_s	x21, ICH_VTR_EL2
 
 	tbnz	w21, #29, 6f	// 6 bits
 	tbz	w21, #30, 5f	// 5 bits
 				// 7 bits
 	ldr	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
-	msr	ICH_AP1R3_EL2, x20
+	msr_s	ICH_AP1R3_EL2, x20
 	ldr	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
-	msr	ICH_AP1R2_EL2, x19
+	msr_s	ICH_AP1R2_EL2, x19
 6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
-	msr	ICH_AP1R1_EL2, x18
+	msr_s	ICH_AP1R1_EL2, x18
 5:	ldr	w17, [x3, #VGIC_V3_CPU_AP1R]
-	msr	ICH_AP1R0_EL2, x17
+	msr_s	ICH_AP1R0_EL2, x17
 
 	tbnz	w21, #29, 6f	// 6 bits
 	tbz	w21, #30, 5f	// 5 bits
 				// 7 bits
 	ldr	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
-	msr	ICH_AP0R3_EL2, x20
+	msr_s	ICH_AP0R3_EL2, x20
 	ldr	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
-	msr	ICH_AP0R2_EL2, x19
+	msr_s	ICH_AP0R2_EL2, x19
 6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
-	msr	ICH_AP0R1_EL2, x18
+	msr_s	ICH_AP0R1_EL2, x18
 5:	ldr	w17, [x3, #VGIC_V3_CPU_AP0R]
-	msr	ICH_AP0R0_EL2, x17
+	msr_s	ICH_AP0R0_EL2, x17
 
 	and	w22, w21, #0xf
 	mvn	w22, w21
@@ -220,22 +220,22 @@
 	br	x24
 
 1:
-	msr	ICH_LR15_EL2, x20
-	msr	ICH_LR14_EL2, x19
-	msr	ICH_LR13_EL2, x18
-	msr	ICH_LR12_EL2, x17
-	msr	ICH_LR11_EL2, x16
-	msr	ICH_LR10_EL2, x15
-	msr	ICH_LR9_EL2,  x14
-	msr	ICH_LR8_EL2,  x13
-	msr	ICH_LR7_EL2,  x12
-	msr	ICH_LR6_EL2,  x11
-	msr	ICH_LR5_EL2,  x10
-	msr	ICH_LR4_EL2,   x9
-	msr	ICH_LR3_EL2,   x8
-	msr	ICH_LR2_EL2,   x7
-	msr	ICH_LR1_EL2,   x6
-	msr	ICH_LR0_EL2,   x5
+	msr_s	ICH_LR15_EL2, x20
+	msr_s	ICH_LR14_EL2, x19
+	msr_s	ICH_LR13_EL2, x18
+	msr_s	ICH_LR12_EL2, x17
+	msr_s	ICH_LR11_EL2, x16
+	msr_s	ICH_LR10_EL2, x15
+	msr_s	ICH_LR9_EL2,  x14
+	msr_s	ICH_LR8_EL2,  x13
+	msr_s	ICH_LR7_EL2,  x12
+	msr_s	ICH_LR6_EL2,  x11
+	msr_s	ICH_LR5_EL2,  x10
+	msr_s	ICH_LR4_EL2,   x9
+	msr_s	ICH_LR3_EL2,   x8
+	msr_s	ICH_LR2_EL2,   x7
+	msr_s	ICH_LR1_EL2,   x6
+	msr_s	ICH_LR0_EL2,   x5
 
 	// Ensure that the above will have reached the
 	// (re)distributors. This ensure the guest will read
@@ -244,9 +244,9 @@
 	dsb	sy
 
 	// Prevent the guest from touching the GIC system registers
-	mrs	x5, ICC_SRE_EL2
+	mrs_s	x5, ICC_SRE_EL2
 	and	x5, x5, #~ICC_SRE_EL2_ENABLE
-	msr	ICC_SRE_EL2, x5
+	msr_s	ICC_SRE_EL2, x5
 .endm
 
 ENTRY(__save_vgic_v3_state)
@@ -260,7 +260,7 @@ ENTRY(__restore_vgic_v3_state)
 ENDPROC(__restore_vgic_v3_state)
 
 ENTRY(__vgic_v3_get_ich_vtr_el2)
-	mrs	x0, ICH_VTR_EL2
+	mrs_s	x0, ICH_VTR_EL2
 	ret
 ENDPROC(__vgic_v3_get_ich_vtr_el2)
 

From 099fcd90255453a5976a42c5f9421a6a7ad7ce3d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 31 Jul 2014 11:42:18 +0100
Subject: [PATCH 0667/1185] KVM: arm64: GICv3: mandate page-aligned GICV region

Just like GICv2 was fixed in 63afbe7a0ac1
(kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform),
mandate the GICV region to be both aligned on a page boundary and
its size to be a multiple of page size.

This prevents a guest from being able to poke at regions where we
have no idea what is sitting there.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit fb3ec67942e92e5713e05b7691b277d0a0c0575d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic-v3.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index f01d44685720..1c2c8eef0599 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -215,6 +215,22 @@ int vgic_v3_probe(struct device_node *vgic_node,
 		ret = -ENXIO;
 		goto out;
 	}
+
+	if (!PAGE_ALIGNED(vcpu_res.start)) {
+		kvm_err("GICV physical address 0x%llx not page aligned\n",
+			(unsigned long long)vcpu_res.start);
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+		kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+			(unsigned long long)resource_size(&vcpu_res),
+			PAGE_SIZE);
+		ret = -ENXIO;
+		goto out;
+	}
+
 	vgic->vcpu_base = vcpu_res.start;
 	vgic->vctrl_base = NULL;
 	vgic->type = VGIC_V3;

From dad44b58644918b98d32ed35f28e919e08b0f896 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 1 Aug 2014 12:00:36 +0100
Subject: [PATCH 0668/1185] arm64: KVM: fix 64bit CP15 VM access for 32bit
 guests

Commit f0a3eaff71b8 (ARM64: KVM: fix big endian issue in
access_vm_reg for 32bit guest) changed the way we handle CP15
VM accesses, so that all 64bit accesses are done via vcpu_sys_reg.

This looks like a good idea as it solves indianness issues in an
elegant way, except for one small detail: the register index is
doesn't refer to the same array! We end up corrupting some random
data structure instead.

Fix this by reverting to the original code, except for the introduction
of a vcpu_cp15_64_high macro that deals with the endianness thing.

Tested on Juno with 32bit SMP guests.

Cc: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit dedf97e8ff2c7513b1370e36b56e08b6bd0f0290)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 6 ++++--
 arch/arm64/kvm/sys_regs.c         | 7 +++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 79812be4f25f..e10c45a578e3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -149,9 +149,11 @@ struct kvm_vcpu_arch {
 #define vcpu_cp15(v,r)		((v)->arch.ctxt.copro[(r)])
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 1)])
+#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r))
+#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r) + 1)
 #else
-#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 0)])
+#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r) + 1)
+#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r))
 #endif
 
 struct kvm_vm_stat {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a4fd5267c65b..5805e7c4a4dd 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -135,10 +135,13 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
 	BUG_ON(!p->is_write);
 
 	val = *vcpu_reg(vcpu, p->Rt);
-	if (!p->is_aarch32 || !p->is_32bit)
+	if (!p->is_aarch32) {
 		vcpu_sys_reg(vcpu, r->reg) = val;
-	else
+	} else {
+		if (!p->is_32bit)
+			vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
 		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+	}
 
 	return true;
 }

From 74afc9fff6cdb9df6cd6ee43ddd59465f48aed6d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:09 +1000
Subject: [PATCH 0669/1185] KVM: Don't keep reference to irq routing table in
 irqfd struct

This makes the irqfd code keep a copy of the irq routing table entry
for each irqfd, rather than a reference to the copy in the actual
irq routing table maintained in kvm/virt/irqchip.c.  This will enable
us to change the routing table structure in future, or even not have a
routing table at all on some platforms.

The synchronization that was previously achieved using srcu_dereference
on the read side is now achieved using a seqcount_t structure.  That
ensures that we don't get a halfway-updated copy of the structure if
we read it while another thread is updating it.

We still use srcu_read_lock/unlock around the read side so that when
changing the routing table we can be sure that after calling
synchronize_srcu, nothing will be using the old routing.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 56f89f3629ffd1a21d38c3d0bea23deac0e284ce)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/eventfd.c | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b51c19ffd8fd..16bf26183225 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -33,6 +33,7 @@
 #include <linux/kernel.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <linux/seqlock.h>
 
 #include "iodev.h"
 
@@ -75,7 +76,8 @@ struct _irqfd {
 	struct kvm *kvm;
 	wait_queue_t wait;
 	/* Update side is protected by irqfds.lock */
-	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
+	struct kvm_kernel_irq_routing_entry irq_entry;
+	seqcount_t irq_entry_sc;
 	/* Used for level IRQ fast-path */
 	int gsi;
 	struct work_struct inject;
@@ -223,16 +225,20 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
 	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
 	unsigned long flags = (unsigned long)key;
-	struct kvm_kernel_irq_routing_entry *irq;
+	struct kvm_kernel_irq_routing_entry irq;
 	struct kvm *kvm = irqfd->kvm;
+	unsigned seq;
 	int idx;
 
 	if (flags & POLLIN) {
 		idx = srcu_read_lock(&kvm->irq_srcu);
-		irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu);
+		do {
+			seq = read_seqcount_begin(&irqfd->irq_entry_sc);
+			irq = irqfd->irq_entry;
+		} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
 		/* An event has been signaled, inject an interrupt */
-		if (irq)
-			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+		if (irq.type == KVM_IRQ_ROUTING_MSI)
+			kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
 					false);
 		else
 			schedule_work(&irqfd->inject);
@@ -277,18 +283,20 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 {
 	struct kvm_kernel_irq_routing_entry *e;
 
-	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
-		rcu_assign_pointer(irqfd->irq_entry, NULL);
-		return;
-	}
+	write_seqcount_begin(&irqfd->irq_entry_sc);
+
+	irqfd->irq_entry.type = 0;
+	if (irqfd->gsi >= irq_rt->nr_rt_entries)
+		goto out;
 
 	hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) {
 		/* Only fast-path MSI. */
 		if (e->type == KVM_IRQ_ROUTING_MSI)
-			rcu_assign_pointer(irqfd->irq_entry, e);
-		else
-			rcu_assign_pointer(irqfd->irq_entry, NULL);
+			irqfd->irq_entry = *e;
 	}
+
+ out:
+	write_seqcount_end(&irqfd->irq_entry_sc);
 }
 
 static int
@@ -310,6 +318,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	INIT_LIST_HEAD(&irqfd->list);
 	INIT_WORK(&irqfd->inject, irqfd_inject);
 	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
+	seqcount_init(&irqfd->irq_entry_sc);
 
 	file = eventfd_fget(args->fd);
 	if (IS_ERR(file)) {
@@ -466,14 +475,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
 		if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
 			/*
-			 * This rcu_assign_pointer is needed for when
+			 * This clearing of irq_entry.type is needed for when
 			 * another thread calls kvm_irq_routing_update before
 			 * we flush workqueue below (we synchronize with
 			 * kvm_irq_routing_update using irqfds.lock).
-			 * It is paired with synchronize_srcu done by caller
-			 * of that function.
 			 */
-			rcu_assign_pointer(irqfd->irq_entry, NULL);
+			write_seqcount_begin(&irqfd->irq_entry_sc);
+			irqfd->irq_entry.type = 0;
+			write_seqcount_end(&irqfd->irq_entry_sc);
 			irqfd_deactivate(irqfd);
 		}
 	}

From 28bcfc22334c1214d603e5136b627d02f6eadb5e Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:10 +1000
Subject: [PATCH 0670/1185] KVM: irqchip: Provide and use accessors for irq
 routing table

This provides accessor functions for the KVM interrupt mappings, in
order to reduce the amount of code that accesses the fields of the
kvm_irq_routing_table struct, and restrict that code to one file,
virt/kvm/irqchip.c.  The new functions are kvm_irq_map_gsi(), which
maps from a global interrupt number to a set of IRQ routing entries,
and kvm_irq_map_chip_pin, which maps from IRQ chip and pin numbers to
a global interrupt number.

This also moves the update of kvm_irq_routing_table::chip[][]
into irqchip.c, out of the various kvm_set_routing_entry
implementations.  That means that none of the kvm_set_routing_entry
implementations need the kvm_irq_routing_table argument anymore,
so this removes it.

This does not change any locking or data lifetime rules.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 8ba918d488caded2c4368b0b922eb905fe3bb101)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/powerpc/kvm/mpic.c  |  4 +---
 include/linux/kvm_host.h |  8 ++++++--
 virt/kvm/eventfd.c       | 10 ++++++----
 virt/kvm/irq_comm.c      | 20 +++++++++----------
 virt/kvm/irqchip.c       | 42 ++++++++++++++++++++++++++++++++--------
 5 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 2861ae9eaae6..b58d61039015 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1822,8 +1822,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return 0;
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -1835,7 +1834,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin;
 		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1fb1bd4593a5..f91b5cda1d28 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -709,6 +709,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask);
 
+int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
+		    struct kvm_irq_routing_table *irq_rt, int gsi);
+int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
+			 unsigned irqchip, unsigned pin);
+
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status);
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
@@ -898,8 +903,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
 			unsigned flags);
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 16bf26183225..a75227f7d487 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -282,20 +282,22 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 			 struct kvm_irq_routing_table *irq_rt)
 {
 	struct kvm_kernel_irq_routing_entry *e;
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+	int i, n_entries;
+
+	n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi);
 
 	write_seqcount_begin(&irqfd->irq_entry_sc);
 
 	irqfd->irq_entry.type = 0;
-	if (irqfd->gsi >= irq_rt->nr_rt_entries)
-		goto out;
 
-	hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) {
+	e = entries;
+	for (i = 0; i < n_entries; ++i, ++e) {
 		/* Only fast-path MSI. */
 		if (e->type == KVM_IRQ_ROUTING_MSI)
 			irqfd->irq_entry = *e;
 	}
 
- out:
 	write_seqcount_end(&irqfd->irq_entry_sc);
 }
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a228ee82bad2..175844593243 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,6 +160,7 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
  */
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 {
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
 	struct kvm_irq_routing_table *irq_rt;
@@ -177,14 +178,13 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link) {
-			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
-				ret = kvm_set_msi_inatomic(e, kvm);
-			else
-				ret = -EWOULDBLOCK;
-			break;
-		}
+	if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) {
+		e = &entries[0];
+		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+			ret = kvm_set_msi_inatomic(e, kvm);
+		else
+			ret = -EWOULDBLOCK;
+	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return ret;
 }
@@ -272,8 +272,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -304,7 +303,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin + delta;
 		if (e->irqchip.pin >= max_pin)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index b43c275775cd..f4648dd94888 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,13 +31,37 @@
 #include <trace/events/kvm.h>
 #include "irq.h"
 
+int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
+		    struct kvm_irq_routing_table *irq_rt, int gsi)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	int n = 0;
+
+	if (gsi < irq_rt->nr_rt_entries) {
+		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
+			entries[n] = *e;
+			++n;
+		}
+	}
+
+	return n;
+}
+
+int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
+			 unsigned irqchip, unsigned pin)
+{
+	return irq_rt->chip[irqchip][pin];
+}
+
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -54,13 +78,15 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -115,8 +141,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status)
 {
-	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
-	int ret = -1, i = 0, idx;
+	struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
+	int ret = -1, i, idx;
 	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -127,9 +153,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link)
-			irq_set[i++] = *e;
+	i = kvm_irq_map_gsi(irq_set, irq_rt, irq);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
@@ -171,9 +195,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
 	e->gsi = ue->gsi;
 	e->type = ue->type;
-	r = kvm_set_routing_entry(rt, e, ue);
+	r = kvm_set_routing_entry(e, ue);
 	if (r)
 		goto out;
+	if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
+		rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
 
 	hlist_add_head(&e->link, &rt->map[e->gsi]);
 	r = 0;

From 7563524573fb8d7e723cec38329edf1b3968593c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:11 +1000
Subject: [PATCH 0671/1185] KVM: Move all accesses to kvm::irq_routing into
 irqchip.c

Now that struct _irqfd does not keep a reference to storage pointed
to by the irq_routing field of struct kvm, we can move the statement
that updates it out from under the irqfds.lock and put it in
kvm_set_irq_routing() instead.  That means we then have to take a
srcu_read_lock on kvm->irq_srcu around the irqfd_update call in
kvm_irqfd_assign(), since holding the kvm->irqfds.lock no longer
ensures that that the routing can't change.

Combined with changing kvm_irq_map_gsi() and kvm_irq_map_chip_pin()
to take a struct kvm * argument instead of the pointer to the routing
table, this allows us to to move all references to kvm->irq_routing
into irqchip.c.  That in turn allows us to move the definition of the
kvm_irq_routing_table struct into irqchip.c as well.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 9957c86d659a4d5a2bed25ccbd3bfc9c3f25e658)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h | 35 +++++++----------------------------
 virt/kvm/eventfd.c       | 22 +++++++++-------------
 virt/kvm/irq_comm.c      |  6 ++----
 virt/kvm/irqchip.c       | 39 +++++++++++++++++++++++++--------------
 4 files changed, 43 insertions(+), 59 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f91b5cda1d28..65b6b8d793b5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -301,24 +301,7 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-
-struct kvm_irq_routing_table {
-	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-	struct kvm_kernel_irq_routing_entry *rt_entries;
-	u32 nr_rt_entries;
-	/*
-	 * Array indexed by gsi. Each entry contains list of irq chips
-	 * the gsi is connected to.
-	 */
-	struct hlist_head map[0];
-};
-
-#else
-
-struct kvm_irq_routing_table {};
-
-#endif
+struct kvm_irq_routing_table;
 
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
@@ -377,8 +360,7 @@ struct kvm {
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	/*
-	 * Update side is protected by irq_lock and,
-	 * if configured, irqfds.lock.
+	 * Update side is protected by irq_lock.
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
@@ -709,10 +691,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask);
 
-int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
-		    struct kvm_irq_routing_table *irq_rt, int gsi);
-int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
-			 unsigned irqchip, unsigned pin);
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
 
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status);
@@ -923,7 +904,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
-void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
+void kvm_irq_routing_update(struct kvm *);
 #else
 static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 {
@@ -945,10 +926,8 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-static inline void kvm_irq_routing_update(struct kvm *kvm,
-					  struct kvm_irq_routing_table *irq_rt)
+static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
-	rcu_assign_pointer(kvm->irq_routing, irq_rt);
 }
 #endif
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a75227f7d487..71133644f465 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -278,14 +278,13 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 }
 
 /* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
-			 struct kvm_irq_routing_table *irq_rt)
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	int i, n_entries;
 
-	n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi);
+	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
 
 	write_seqcount_begin(&irqfd->irq_entry_sc);
 
@@ -304,12 +303,12 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct _irqfd *irqfd, *tmp;
 	struct file *file = NULL;
 	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
 	int ret;
 	unsigned int events;
+	int idx;
 
 	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
 	if (!irqfd)
@@ -403,9 +402,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		goto fail;
 	}
 
-	irq_rt = rcu_dereference_protected(kvm->irq_routing,
-					   lockdep_is_held(&kvm->irqfds.lock));
-	irqfd_update(kvm, irqfd, irq_rt);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irqfd_update(kvm, irqfd);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	events = file->f_op->poll(file, &irqfd->pt);
 
@@ -539,20 +538,17 @@ kvm_irqfd_release(struct kvm *kvm)
 }
 
 /*
- * Change irq_routing and irqfd.
+ * Take note of a change in irq routing.
  * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  */
-void kvm_irq_routing_update(struct kvm *kvm,
-			    struct kvm_irq_routing_table *irq_rt)
+void kvm_irq_routing_update(struct kvm *kvm)
 {
 	struct _irqfd *irqfd;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
-	rcu_assign_pointer(kvm->irq_routing, irq_rt);
-
 	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
-		irqfd_update(kvm, irqfd, irq_rt);
+		irqfd_update(kvm, irqfd);
 
 	spin_unlock_irq(&kvm->irqfds.lock);
 }
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 175844593243..963b8995a9e8 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,7 +163,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
-	struct kvm_irq_routing_table *irq_rt;
 	int idx;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -177,8 +176,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 * which is limited to 1:1 GSI mapping.
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) {
+	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
 		e = &entries[0];
 		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
 			ret = kvm_set_msi_inatomic(e, kvm);
@@ -264,7 +262,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	int idx, gsi;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
 			if (kimn->irq == gsi)
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index f4648dd94888..04faac50cef5 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,12 +31,26 @@
 #include <trace/events/kvm.h>
 #include "irq.h"
 
-int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
-		    struct kvm_irq_routing_table *irq_rt, int gsi)
+struct kvm_irq_routing_table {
+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+	struct kvm_kernel_irq_routing_entry *rt_entries;
+	u32 nr_rt_entries;
+	/*
+	 * Array indexed by gsi. Each entry contains list of irq chips
+	 * the gsi is connected to.
+	 */
+	struct hlist_head map[0];
+};
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_kernel_irq_routing_entry *e;
 	int n = 0;
 
+	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
+					lockdep_is_held(&kvm->irq_lock));
 	if (gsi < irq_rt->nr_rt_entries) {
 		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
 			entries[n] = *e;
@@ -47,21 +61,21 @@ int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
 	return n;
 }
 
-int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
-			 unsigned irqchip, unsigned pin)
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
+
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	return irq_rt->chip[irqchip][pin];
 }
 
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -78,15 +92,13 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -143,7 +155,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 {
 	struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
 	int ret = -1, i, idx;
-	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -152,8 +163,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 * writes to the unused one.
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	i = kvm_irq_map_gsi(irq_set, irq_rt, irq);
+	i = kvm_irq_map_gsi(kvm, irq_set, irq);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
@@ -250,7 +260,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
-	kvm_irq_routing_update(kvm, new);
+	rcu_assign_pointer(kvm->irq_routing, new);
+	kvm_irq_routing_update(kvm);
 	mutex_unlock(&kvm->irq_lock);
 
 	synchronize_srcu_expedited(&kvm->irq_srcu);

From c905880e253b8e2f77db26dcb647a1a3bb359b11 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:12 +1000
Subject: [PATCH 0672/1185] KVM: Move irq notifier implementation into
 eventfd.c

This moves the functions kvm_irq_has_notifier(), kvm_notify_acked_irq(),
kvm_register_irq_ack_notifier() and kvm_unregister_irq_ack_notifier()
from irqchip.c to eventfd.c.  The reason for doing this is that those
functions are used in connection with IRQFDs, which are implemented in
eventfd.c.  In future we will want to use IRQFDs on platforms that
don't implement the GSI routing implemented in irqchip.c, so we won't
be compiling in irqchip.c, but we still need the irq notifiers.  The
implementation is unchanged.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e4d57e1ee1ab59f0cef0272800ac6c52e0ec814a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/eventfd.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/irqchip.c | 61 --------------------------------------------
 2 files changed, 63 insertions(+), 61 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 71133644f465..d2b1653c97dd 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -34,7 +34,9 @@
 #include <linux/srcu.h>
 #include <linux/slab.h>
 #include <linux/seqlock.h>
+#include <trace/events/kvm.h>
 
+#include "irq.h"
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
@@ -830,3 +832,64 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	return kvm_assign_ioeventfd(kvm, args);
 }
+
+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi) {
+				srcu_read_unlock(&kvm->irq_srcu, idx);
+				return true;
+			}
+
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	trace_kvm_ack_irq(irqchip, pin);
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi)
+				kian->irq_acked(kian);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_del_init_rcu(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
+	synchronize_srcu(&kvm->irq_srcu);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 04faac50cef5..7f256f31df10 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -69,67 +69,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	return irq_rt->chip[irqchip][pin];
 }
 
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi) {
-				srcu_read_unlock(&kvm->irq_srcu, idx);
-				return true;
-			}
-
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
-
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	trace_kvm_ack_irq(irqchip, pin);
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi)
-				kian->irq_acked(kian);
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-				   struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
-
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				    struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_del_init_rcu(&kian->link);
-	mutex_unlock(&kvm->irq_lock);
-	synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
-
 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
 	struct kvm_kernel_irq_routing_entry route;

From f65b953a7232ae8bb497b226363629f9e49b4792 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:13 +1000
Subject: [PATCH 0673/1185] KVM: Give IRQFD its own separate enabling Kconfig
 option

Currently, the IRQFD code is conditional on CONFIG_HAVE_KVM_IRQ_ROUTING.
So that we can have the IRQFD code compiled in without having the
IRQ routing code, this creates a new CONFIG_HAVE_KVM_IRQFD, makes
the IRQFD code conditional on it instead of CONFIG_HAVE_KVM_IRQ_ROUTING,
and makes all the platforms that currently select HAVE_KVM_IRQ_ROUTING
also select HAVE_KVM_IRQFD.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 297e21053a52f060944e9f0de4c64fad9bcd72fc)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/ia64/kvm/Kconfig    | 1 +
 arch/powerpc/kvm/Kconfig | 1 +
 arch/x86/kvm/Kconfig     | 1 +
 include/linux/kvm_host.h | 8 ++++----
 virt/kvm/Kconfig         | 3 +++
 virt/kvm/eventfd.c       | 6 +++---
 virt/kvm/kvm_main.c      | 2 +-
 7 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 990b86420cc6..3d50ea955c4c 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -25,6 +25,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select KVM_APIC_ARCHITECTURE
 	select KVM_MMIO
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f862579..60019a6fd6bb 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -155,6 +155,7 @@ config KVM_MPIC
 	bool "KVM in-kernel MPIC emulation"
 	depends on KVM && E500
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_MSI
 	help
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b89c5db2b832..bdccfb62aa0d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
 	select MMU_NOTIFIER
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_EVENTFD
 	select KVM_APIC_ARCHITECTURE
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 65b6b8d793b5..35319fe693f8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -413,7 +413,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd_init(void);
 void kvm_irqfd_exit(void);
 #else
@@ -888,20 +888,20 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
-
 #else
 
 static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #endif
 
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
+#ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
 void kvm_irq_routing_update(struct kvm *);
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 13f2d19793e3..fc0c5e603eb4 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -6,6 +6,9 @@ config HAVE_KVM
 config HAVE_KVM_IRQCHIP
        bool
 
+config HAVE_KVM_IRQFD
+       bool
+
 config HAVE_KVM_IRQ_ROUTING
        bool
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index d2b1653c97dd..8be5b6545770 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -39,7 +39,7 @@
 #include "irq.h"
 #include "iodev.h"
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 /*
  * --------------------------------------------------------------------
  * irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -450,7 +450,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 void
 kvm_eventfd_init(struct kvm *kvm)
 {
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
 	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
@@ -459,7 +459,7 @@ kvm_eventfd_init(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->ioeventfds);
 }
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 /*
  * shutdown any irqfd's that match fd+gsi
  */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1edb15da7acf..848a6afab165 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2330,7 +2330,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #ifdef CONFIG_HAVE_KVM_MSI
 	case KVM_CAP_SIGNAL_MSI:
 #endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	case KVM_CAP_IRQFD_RESAMPLE:
 #endif
 	case KVM_CAP_CHECK_EXTENSION_VM:

From 57f984d2c64c8c1e597d021567a379f4f3920b35 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Aug 2014 14:24:45 +0200
Subject: [PATCH 0674/1185] KVM: Move more code under CONFIG_HAVE_KVM_IRQFD

Commits e4d57e1ee1ab (KVM: Move irq notifier implementation into
eventfd.c, 2014-06-30) included the irq notifier code unconditionally
in eventfd.c, while it was under CONFIG_HAVE_KVM_IRQCHIP before.

Similarly, commit 297e21053a52 (KVM: Give IRQFD its own separate enabling
Kconfig option, 2014-06-30) moved code from CONFIG_HAVE_IRQ_ROUTING
to CONFIG_HAVE_KVM_IRQFD but forgot to move the pieces that used to be
under CONFIG_HAVE_KVM_IRQCHIP.

Together, this broke compilation without CONFIG_KVM_XICS.  Fix by adding
or changing the #ifdefs so that they point at CONFIG_HAVE_KVM_IRQFD.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c77dcacb397519b6ade8f08201a4a90a7f4f751e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h   |   2 +
 include/trace/events/kvm.h |   8 +--
 virt/kvm/eventfd.c         | 122 ++++++++++++++++++-------------------
 virt/kvm/kvm_main.c        |   2 +
 4 files changed, 69 insertions(+), 65 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 35319fe693f8..75d911ca47bd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -364,6 +364,8 @@ struct kvm {
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	struct hlist_head irq_ack_notifier_list;
 #endif
 
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 131a0bda7aec..908925ace776 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit,
 		  __entry->errno < 0 ? -__entry->errno : __entry->reason)
 );
 
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
 TRACE_EVENT(kvm_set_irq,
 	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
 	TP_ARGS(gsi, level, irq_source_id),
@@ -57,7 +57,7 @@ TRACE_EVENT(kvm_set_irq,
 	TP_printk("gsi %u level %d source %d",
 		  __entry->gsi, __entry->level, __entry->irq_source_id)
 );
-#endif
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
 
 #if defined(__KVM_HAVE_IOAPIC)
 #define kvm_deliver_mode		\
@@ -124,7 +124,7 @@ TRACE_EVENT(kvm_msi_set_irq,
 
 #endif /* defined(__KVM_HAVE_IOAPIC) */
 
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
 
 TRACE_EVENT(kvm_ack_irq,
 	TP_PROTO(unsigned int irqchip, unsigned int pin),
@@ -149,7 +149,7 @@ TRACE_EVENT(kvm_ack_irq,
 #endif
 );
 
-#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
 
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 8be5b6545770..67563284f7b9 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -445,6 +445,67 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	kfree(irqfd);
 	return ret;
 }
+
+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi) {
+				srcu_read_unlock(&kvm->irq_srcu, idx);
+				return true;
+			}
+
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	trace_kvm_ack_irq(irqchip, pin);
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi)
+				kian->irq_acked(kian);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_del_init_rcu(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
+	synchronize_srcu(&kvm->irq_srcu);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
 #endif
 
 void
@@ -832,64 +893,3 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	return kvm_assign_ioeventfd(kvm, args);
 }
-
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi) {
-				srcu_read_unlock(&kvm->irq_srcu, idx);
-				return true;
-			}
-
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
-
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	trace_kvm_ack_irq(irqchip, pin);
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi)
-				kian->irq_acked(kian);
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-				   struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
-
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				    struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_del_init_rcu(&kian->link);
-	mutex_unlock(&kvm->irq_lock);
-	synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 848a6afab165..50f947301fa7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -464,6 +464,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 

From 7fdbe25673a0bcd4c38968ec2cd048a21f1fbb93 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 19 Aug 2014 16:45:56 +0200
Subject: [PATCH 0675/1185] KVM: avoid unnecessary synchronize_rcu

We dont have to wait for a grace period if there is no oldpid that
we are going to free. putpid also checks for NULL, so this patch
only fences synchronize_rcu.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 7103f60de8bed21a0ad5d15d2ad5b7a333dda201)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 50f947301fa7..aa5057e7a1b9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 		struct pid *oldpid = vcpu->pid;
 		struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
 		rcu_assign_pointer(vcpu->pid, newpid);
-		synchronize_rcu();
+		if (oldpid)
+			synchronize_rcu();
 		put_pid(oldpid);
 	}
 	cpu = get_cpu();

From ae38cfa961f9605be43060001ccc41d39427cb64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 21 Aug 2014 18:08:05 +0200
Subject: [PATCH 0676/1185] KVM: add kvm_arch_sched_in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce preempt notifiers for architecture specific code.
Advantage over creating a new notifier in every arch is slightly simpler
code and guaranteed call order with respect to kvm_sched_in.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e790d9ef6405633b007339d746b709aed43a928d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c         | 4 ++++
 arch/powerpc/kvm/powerpc.c | 4 ++++
 arch/s390/kvm/kvm-s390.c   | 4 ++++
 arch/x86/kvm/x86.c         | 4 ++++
 include/linux/kvm_host.h   | 2 ++
 virt/kvm/kvm_main.c        | 2 ++
 6 files changed, 20 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 0e3d3dc8eea2..d55786c579bf 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index aacf3e35404f..e4c719d49e16 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -533,6 +533,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_BOOKE
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9133f869b070..46392afd043d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -325,6 +325,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	/* Nothing todo */
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_regs(&vcpu->arch.host_fpregs);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 46a8c74fd431..a348daad61fd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6809,6 +6809,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	if (type)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 75d911ca47bd..4c12f314aab2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -600,6 +600,8 @@ void kvm_arch_exit(void);
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index aa5057e7a1b9..2659da911d99 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3063,6 +3063,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 	if (vcpu->preempted)
 		vcpu->preempted = false;
 
+	kvm_arch_sched_in(vcpu, cpu);
+
 	kvm_arch_vcpu_load(vcpu, cpu);
 }
 

From 7ec68cffdc4807a95ae1db300bf857084f0c94da Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Aug 2014 12:15:00 +0200
Subject: [PATCH 0677/1185] KVM: Introduce gfn_to_hva_memslot_prot

To support read-only memory regions on arm and arm64, we have a need to
resolve a gfn to an hva given a pointer to a memslot to avoid looping
through the memslots twice and to reuse the hva error checking of
gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and
refactor gfn_to_hva_prot() to use this function.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4c12f314aab2..59d50379c2e4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -504,6 +504,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
+				      bool *writable);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2659da911d99..84cee09da140 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1076,9 +1076,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
  * If writable is set to false, the hva returned by this function is only
  * allowed to be read.
  */
-unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+				      gfn_t gfn, bool *writable)
 {
-	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 	unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
 
 	if (!kvm_is_error_hva(hva) && writable)
@@ -1087,6 +1087,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 	return hva;
 }
 
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+{
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+	return gfn_to_hva_memslot_prot(slot, gfn, writable);
+}
+
 static int kvm_read_hva(void *data, void __user *hva, int len)
 {
 	return __copy_from_user(data, hva, len);

From 7e7ef8405025cd7af730fce40ace2a9c77ee9970 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Aug 2014 12:18:04 +0200
Subject: [PATCH 0678/1185] arm/arm64: KVM: Support KVM_CAP_READONLY_MEM

When userspace loads code and data in a read-only memory regions, KVM
needs to be able to handle this on arm and arm64.  Specifically this is
used when running code directly from a read-only flash device; the
common scenario is a UEFI blob loaded with the -bios option in QEMU.

Note that the MMIO exit on writes to a read-only memory is ABI and can
be used to emulate block-erase style flash devices.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 98047888bb9fd57734028c44ec17413ddd623958)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/uapi/asm/kvm.h   |  1 +
 arch/arm/kvm/arm.c                |  1 +
 arch/arm/kvm/mmu.c                | 22 ++++++++--------------
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 4 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index e6ebdd3471e5..51257fda254b 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@
 
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index d55786c579bf..35dc889df3d2 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -188,6 +188,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ARM_PSCI:
 	case KVM_CAP_ARM_PSCI_0_2:
+	case KVM_CAP_READONLY_MEM:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16e7994bf347..62f5642153f9 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -747,14 +747,13 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  struct kvm_memory_slot *memslot,
+			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
 {
 	int ret;
 	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
 	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
-	unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
@@ -863,7 +862,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	unsigned long fault_status;
 	phys_addr_t fault_ipa;
 	struct kvm_memory_slot *memslot;
-	bool is_iabt;
+	unsigned long hva;
+	bool is_iabt, write_fault, writable;
 	gfn_t gfn;
 	int ret, idx;
 
@@ -884,7 +884,10 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	gfn = fault_ipa >> PAGE_SHIFT;
-	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+	memslot = gfn_to_memslot(vcpu->kvm, gfn);
+	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
 		if (is_iabt) {
 			/* Prefetch Abort on I/O address */
 			kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -892,13 +895,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			goto out_unlock;
 		}
 
-		if (fault_status != FSC_FAULT) {
-			kvm_err("Unsupported fault status on io memory: %#lx\n",
-				fault_status);
-			ret = -EFAULT;
-			goto out_unlock;
-		}
-
 		/*
 		 * The IPA is reported as [MAX:12], so we need to
 		 * complement it with the bottom 12 bits from the
@@ -910,9 +906,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		goto out_unlock;
 	}
 
-	memslot = gfn_to_memslot(vcpu->kvm, gfn);
-
-	ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index e633ff8cdec8..f4ec5a674d05 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -37,6 +37,7 @@
 
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))

From 276359920df8032e689497456c0a3c6dcc1b5100 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:20 +0100
Subject: [PATCH 0679/1185] KVM: ARM/arm64: fix non-const declaration of
 function returning const

Sparse kicks up about a type mismatch for kvm_target_cpu:

arch/arm64/kvm/guest.c:271:25: error: symbol 'kvm_target_cpu' redeclared with different type (originally declared at ./arch/arm64/include/asm/kvm_host.h:45) - different modifiers

so fix this by adding the missing const attribute to the function
declaration.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 6951e48bff0b55d2a8e825a953fc1f8e3a34bf1c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   | 2 +-
 arch/arm64/include/asm/kvm_host.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6dfb404f6c46..fcb12a6f7db5 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -42,7 +42,7 @@
 
 struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e10c45a578e3..44094e559848 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -42,7 +42,7 @@
 #define KVM_VCPU_MAX_FEATURES 3
 
 struct kvm_vcpu;
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(long ext);
 

From 0251cb8ae768874a988c39a908fbcc8dfd82429a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:21 +0100
Subject: [PATCH 0680/1185] KVM: ARM/arm64: fix broken __percpu annotation

Running sparse results in a bunch of noisy address space mismatches
thanks to the broken __percpu annotation on kvm_get_running_vcpus.

This function returns a pcpu pointer to a pointer, not a pointer to a
pcpu pointer. This patch fixes the annotation, which kills the warnings
from sparse.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4000be423cb01a8d09de878bb8184511c49d4238)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c                | 2 +-
 arch/arm64/include/asm/kvm_host.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 35dc889df3d2..79268a12a49e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -82,7 +82,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
 /**
  * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
  */
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
 {
 	return &kvm_arm_running_vcpu;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 44094e559848..50431d36732b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -193,7 +193,7 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
 }
 
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
 

From 563d813862bf2c44e1e9f0d59facf8e0ce303355 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:22 +0100
Subject: [PATCH 0681/1185] KVM: ARM/arm64: avoid returning negative error code
 as bool

is_valid_cache returns true if the specified cache is valid.
Unfortunately, if the parameter passed it out of range, we return
-ENOENT, which ends up as true leading to potential hilarity.

This patch returns false on the failure path instead.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 18d457661fb9fa69352822ab98d39331c3d0e571)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c     | 2 +-
 arch/arm64/kvm/sys_regs.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 37a0fe1bb9bb..7928dbdf2102 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -791,7 +791,7 @@ static bool is_valid_cache(u32 val)
 	u32 level, ctype;
 
 	if (val >= CSSELR_MAX)
-		return -ENOENT;
+		return false;
 
 	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
         level = (val >> 1);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5805e7c4a4dd..4cc3b719208e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1218,7 +1218,7 @@ static bool is_valid_cache(u32 val)
 	u32 level, ctype;
 
 	if (val >= CSSELR_MAX)
-		return -ENOENT;
+		return false;
 
 	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
 	level = (val >> 1);

From 1453b5c1052996b983f6d9ae043dec64e089c0c8 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:23 +0100
Subject: [PATCH 0682/1185] KVM: ARM/arm64: return -EFAULT if copy_from_user
 fails in set_timer_reg

We currently return the number of bytes not copied if set_timer_reg
fails, which is almost certainly not what userspace would like.

This patch returns -EFAULT instead.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit bd218bce92d3868ba4fe5e9e3eb8199d2aa614af)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/guest.c   | 2 +-
 arch/arm64/kvm/guest.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 813e49258690..cc0b78769bd8 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -163,7 +163,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 
 	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
 	if (ret != 0)
-		return ret;
+		return -EFAULT;
 
 	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
 }
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 8d1ec2887a26..76794692c20b 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -174,7 +174,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 
 	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
 	if (ret != 0)
-		return ret;
+		return -EFAULT;
 
 	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
 }

From 02b1b15d1265067d246eb8017c27c75d2fac5dae Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:24 +0100
Subject: [PATCH 0683/1185] KVM: vgic: return int instead of bool when checking
 I/O ranges

vgic_ioaddr_overlap claims to return a bool, but in reality it returns
an int. Shut sparse up by fixing the type signature.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1fa451bcc67fa921a04c5fac8dbcde7844d54512)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 73eba793b17f..d1cfe672b9d7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1690,7 +1690,7 @@ int kvm_vgic_create(struct kvm *kvm)
 	return ret;
 }
 
-static bool vgic_ioaddr_overlap(struct kvm *kvm)
+static int vgic_ioaddr_overlap(struct kvm *kvm)
 {
 	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
 	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;

From 8ec715912ec282c0c560407fe72ae80b14c8d320 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:25 +0100
Subject: [PATCH 0684/1185] KVM: vgic: declare probe function pointer as const

We extract the vgic probe function from the of_device_id data pointer,
which is const. Kill the sparse warning by ensuring that the local
function pointer is also marked as const.

Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit de56fb1923ca11f428bf557870e0faa99f38762e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index d1cfe672b9d7..efe6eee2e7eb 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1557,8 +1557,8 @@ static const struct of_device_id vgic_ids[] = {
 int kvm_vgic_hyp_init(void)
 {
 	const struct of_device_id *matched_id;
-	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
-			  const struct vgic_params **);
+	const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+				const struct vgic_params **);
 	struct device_node *vgic_node;
 	int ret;
 

From d3b49fbe5764b06372df1dea9c9bea5b8373f49c Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 26 Aug 2014 14:00:37 +0200
Subject: [PATCH 0685/1185] KVM: Unconditionally export KVM_CAP_READONLY_MEM

The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that
userspace can, at run-time, determine if a feature is supported or not.
This allows KVM to being supporting a new feature with a new kernel
version without any need to update user space.  Unfortunately, since the
definition of KVM_CAP_READONLY_MEM was guarded by #ifdef
__KVM_HAVE_READONLY_MEM, such discovery still required a user space
update.

Therefore, unconditionally export KVM_CAP_READONLY_MEM and change the
in-kernel conditional to rely on __KVM_HAVE_READONLY_MEM.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0f8a4de3e088797576ac76200b634b802e5c7781)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/uapi/linux/kvm.h | 2 --
 virt/kvm/kvm_main.c      | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1af686a82703..023ec6132966 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -662,9 +662,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
-#ifdef __KVM_HAVE_READONLY_MEM
 #define KVM_CAP_READONLY_MEM 81
-#endif
 #define KVM_CAP_IRQFD_RESAMPLE 82
 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83
 #define KVM_CAP_PPC_HTAB_FD 84
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 84cee09da140..0cb02c749622 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -709,7 +709,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 {
 	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
 
-#ifdef KVM_CAP_READONLY_MEM
+#ifdef __KVM_HAVE_READONLY_MEM
 	valid_flags |= KVM_MEM_READONLY;
 #endif
 

From 177e51e89403b6843d068a1dae873650053b32a4 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 26 Aug 2014 14:00:38 +0200
Subject: [PATCH 0686/1185] KVM: Unconditionally export KVM_CAP_USER_NMI

The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that
userspace can, at run-time, determine if a feature is supported or not.
This allows KVM to being supporting a new feature with a new kernel
version without any need to update user space.  Unfortunately, since the
definition of KVM_CAP_USER_NMI was guarded by #ifdef
__KVM_HAVE_USER_NMI, such discovery still required a user space update.

Therefore, unconditionally export KVM_CAP_USER_NMI and change the
the typo in the comment for the IOCTL number definition as well.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 44b5ce73c99c389817be71b9161bceb197d40ecb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/uapi/linux/kvm.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 023ec6132966..bbc1f8a09eb8 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -578,9 +578,7 @@ struct kvm_ppc_smmu_info {
 #endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#ifdef __KVM_HAVE_USER_NMI
 #define KVM_CAP_USER_NMI 22
-#endif
 #ifdef __KVM_HAVE_GUEST_DEBUG
 #define KVM_CAP_SET_GUEST_DEBUG 23
 #endif
@@ -995,7 +993,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_S390_INITIAL_RESET    _IO(KVMIO,   0x97)
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
-/* Available with KVM_CAP_NMI */
+/* Available with KVM_CAP_USER_NMI */
 #define KVM_NMI                   _IO(KVMIO,   0x9a)
 /* Available with KVM_CAP_SET_GUEST_DEBUG */
 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)

From 48bc31d532d18a5e64c5ef1f20be161d0b834d11 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 29 Aug 2014 14:01:17 +0200
Subject: [PATCH 0687/1185] KVM: forward declare structs in kvm_types.h

Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid
"'struct foo' declared inside parameter list" warnings (and consequent
breakage due to conflicting types).

Move them from individual files to a generic place in linux/kvm_types.h.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 656473003bc7e056c3bbd4a4d9832dad01e86f76)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h     |  7 ++-----
 arch/arm64/include/asm/kvm_host.h   |  6 ++----
 arch/ia64/include/asm/kvm_host.h    |  3 ---
 arch/mips/include/asm/kvm_host.h    |  5 -----
 arch/powerpc/include/asm/kvm_host.h |  5 -----
 arch/s390/include/asm/kvm_host.h    |  3 +++
 arch/x86/include/asm/kvm_host.h     |  4 ----
 include/linux/kvm_host.h            |  6 ------
 include/linux/kvm_types.h           | 14 ++++++++++++++
 9 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index fcb12a6f7db5..0bc5295bbfdf 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
 #ifndef __ARM_KVM_HOST_H__
 #define __ARM_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -40,7 +42,6 @@
 
 #include <kvm/arm_vgic.h>
 
-struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -149,20 +150,17 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 u64 kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
@@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
 int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
 unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-struct kvm_one_reg;
 int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 50431d36732b..ac99093d004e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
 #ifndef __ARM64_KVM_HOST_H__
 #define __ARM64_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -41,7 +43,6 @@
 
 #define KVM_VCPU_MAX_FEATURES 3
 
-struct kvm_vcpu;
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(long ext);
@@ -164,18 +165,15 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 989dd3fe8de1..65088aa016c0 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -238,9 +238,6 @@ struct kvm_vm_data {
 #define KVM_NR_PAGE_SIZES	1
 #define KVM_PAGES_PER_HPAGE(x)	1
 
-struct kvm;
-struct kvm_vcpu;
-
 struct kvm_mmio_req {
 	uint64_t addr;          /*  physical address		*/
 	uint64_t size;          /*  size in bytes		*/
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 4d6fa0bf1305..d56ee50d6885 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -71,11 +71,6 @@
 #define CAUSEB_DC       27
 #define CAUSEF_DC       (_ULCAST_(1)   << 27)
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-struct kvm_interrupt;
-
 extern atomic_t kvm_mips_instance;
 extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn);
 extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index af326cde7cb6..0a3238026fd7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,7 +53,6 @@
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
-struct kvm;
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
@@ -81,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 /* Physical Address Mask - allowed range of real mode RAM access */
 #define KVM_PAM			0x0fffffffffffffffULL
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-
 struct lppaca;
 struct slb_shadow;
 struct dtl_entry;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 16bd5d169cdb..42913475fc6c 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
 
 #ifndef ASM_KVM_HOST_H
 #define ASM_KVM_HOST_H
+
+#include <linux/types.h>
 #include <linux/hrtimer.h>
 #include <linux/interrupt.h>
+#include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 53db582487c9..bd6f3529453d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -103,10 +103,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 
 #define ASYNC_PF_PER_VCPU 64
 
-struct kvm_vcpu;
-struct kvm;
-struct kvm_async_pf;
-
 enum kvm_reg {
 	VCPU_REGS_RAX = 0,
 	VCPU_REGS_RCX = 1,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 59d50379c2e4..b8cb3c2d893e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -129,8 +129,6 @@ static inline bool is_error_page(struct page *page)
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 
-struct kvm;
-struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
 extern spinlock_t kvm_lock;
@@ -301,8 +299,6 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
-struct kvm_irq_routing_table;
-
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
@@ -994,8 +990,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 
 extern bool kvm_rebooting;
 
-struct kvm_device_ops;
-
 struct kvm_device {
 	struct kvm_device_ops *ops;
 	struct kvm *kvm;
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b0bcce0ddc95..b606bb689a3e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -17,6 +17,20 @@
 #ifndef __KVM_TYPES_H__
 #define __KVM_TYPES_H__
 
+struct kvm;
+struct kvm_async_pf;
+struct kvm_device_ops;
+struct kvm_interrupt;
+struct kvm_irq_routing_table;
+struct kvm_memory_slot;
+struct kvm_one_reg;
+struct kvm_run;
+struct kvm_userspace_memory_region;
+struct kvm_vcpu;
+struct kvm_vcpu_init;
+
+enum kvm_mr_change;
+
 #include <asm/types.h>
 
 /*

From 3dcac226202ec154b5c29bcd968e7b894e61c031 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 28 Aug 2014 15:13:02 +0200
Subject: [PATCH 0688/1185] KVM: static inline empty kvm_arch functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using static inline is going to save few bytes and cycles.
For example on powerpc, the difference is 700 B after stripping.
(5 kB before)

This patch also deals with two overlooked empty functions:
kvm_arch_flush_shadow was not removed from arch/mips/kvm/mips.c
  2df72e9bc KVM: split kvm_arch_flush_shadow
and kvm_arch_sched_in never made it into arch/ia64/kvm/kvm-ia64.c.
  e790d9ef6 KVM: add kvm_arch_sched_in

Signed-off-by: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0865e636aef751966e6e0f8950a26bc7391e923c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h     |  6 ++++
 arch/arm/kvm/arm.c                  | 19 -------------
 arch/arm64/include/asm/kvm_host.h   |  6 ++++
 arch/ia64/include/asm/kvm_host.h    | 12 ++++++++
 arch/ia64/kvm/kvm-ia64.c            | 30 --------------------
 arch/mips/include/asm/kvm_host.h    | 11 ++++++++
 arch/powerpc/include/asm/kvm_host.h |  8 ++++++
 arch/powerpc/kvm/powerpc.c          | 28 -------------------
 arch/s390/include/asm/kvm_host.h    | 14 ++++++++++
 arch/s390/kvm/kvm-s390.c            | 43 -----------------------------
 10 files changed, 57 insertions(+), 120 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 0bc5295bbfdf..2b17f6ab9642 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,4 +230,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 79268a12a49e..882f7e856b6b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 void kvm_arch_check_processor_compat(void *rtn)
 {
 	*(int *)rtn = 0;
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
 
 /**
  * kvm_arch_init_vm - initializes a VM data structure
@@ -285,14 +274,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index ac99093d004e..eaf689c48a59 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -242,4 +242,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 	}
 }
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 65088aa016c0..cf03097176b1 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -596,6 +596,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu);
 struct kvm *kvm_arch_alloc_vm(void);
 void kvm_arch_free_vm(struct kvm *kvm);
 
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		const struct kvm_memory_slot *old,
+		enum kvm_mr_change change) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+
 #endif /* __ASSEMBLY__*/
 
 #endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index b48aa69e35e1..b1e074d299ea 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1363,10 +1363,6 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 	}
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_iommu_unmap_guest(kvm);
@@ -1375,10 +1371,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_release_vm_pages(kvm);
 }
 
-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	if (cpu != vcpu->cpu) {
@@ -1467,7 +1459,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kfree(vcpu->arch.apic);
 }
 
-
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -1550,21 +1541,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
 	return 0;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		struct kvm_memory_slot *memslot,
 		struct kvm_userspace_memory_region *mem,
@@ -1596,14 +1578,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	return 0;
 }
 
-void kvm_arch_commit_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
-		const struct kvm_memory_slot *old,
-		enum kvm_mr_change change)
-{
-	return;
-}
-
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
 	kvm_flush_remote_tlbs(kvm);
@@ -1852,10 +1826,6 @@ int kvm_arch_hardware_setup(void)
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
 	return __apic_accept_irq(vcpu, irq->vector);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index d56ee50d6885..279376e90c7f 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -654,5 +654,16 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size);
 extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0a3238026fd7..90a9c0e4952c 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -623,4 +623,12 @@ struct kvm_vcpu_arch {
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_exit(void) {}
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e4c719d49e16..7a75d6dbf610 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -251,19 +251,11 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 void kvm_arch_check_processor_compat(void *rtn)
 {
 	*(int *)rtn = kvmppc_core_check_processor_compat();
@@ -296,10 +288,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
@@ -421,10 +409,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 	return kvmppc_core_create_memslot(kvm, slot, npages);
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_userspace_memory_region *mem,
@@ -441,10 +425,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	kvmppc_core_commit_memory_region(kvm, mem, old);
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot)
 {
@@ -533,10 +513,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_BOOKE
@@ -1134,7 +1110,3 @@ int kvm_arch_init(void *opaque)
 {
 	return 0;
 }
-
-void kvm_arch_exit(void)
-{
-}
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 42913475fc6c..d45e11dac5cf 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -269,4 +269,18 @@ struct kvm_arch{
 };
 
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_check_processor_compat(void *rtn) {}
+static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+
 #endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 46392afd043d..1fe3bc10c87f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -92,10 +92,6 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
@@ -105,19 +101,11 @@ void kvm_arch_hardware_unsetup(void)
 {
 }
 
-void kvm_arch_check_processor_compat(void *rtn)
-{
-}
-
 int kvm_arch_init(void *opaque)
 {
 	return 0;
 }
 
-void kvm_arch_exit(void)
-{
-}
-
 /* Section: device related */
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg)
@@ -289,10 +277,6 @@ static void kvm_free_vcpus(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_free_vcpus(kvm);
@@ -320,15 +304,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-	/* Nothing todo */
-}
-
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_regs(&vcpu->arch.host_fpregs);
@@ -975,21 +950,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
 	return 0;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
@@ -1035,15 +1001,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	return;
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-}
-
 static int __init kvm_s390_init(void)
 {
 	int ret;

From df6fef6a404701ff6249330215c31f2a2b041d7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 28 Aug 2014 15:13:03 +0200
Subject: [PATCH 0689/1185] KVM: remove garbage arg to *hardware_{en,dis}able
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the beggining was on_each_cpu(), which required an unused argument to
kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten.

Remove unnecessary arguments that stem from this.

Signed-off-by: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 13a34e067eab24fec882e1834fbf2cc31911d474)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h     |  2 +-
 arch/arm/kvm/arm.c                  |  2 +-
 arch/arm64/include/asm/kvm_host.h   |  2 +-
 arch/ia64/kvm/kvm-ia64.c            |  4 ++--
 arch/mips/include/asm/kvm_host.h    |  2 +-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/kvm/powerpc.c          |  2 +-
 arch/s390/include/asm/kvm_host.h    |  2 +-
 arch/s390/kvm/kvm-s390.c            |  2 +-
 arch/x86/include/asm/kvm_host.h     |  4 ++--
 arch/x86/kvm/svm.c                  |  4 ++--
 arch/x86/kvm/vmx.c                  |  4 ++--
 arch/x86/kvm/x86.c                  | 12 ++++++------
 include/linux/kvm_host.h            |  4 ++--
 virt/kvm/kvm_main.c                 |  4 ++--
 15 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 2b17f6ab9642..46e5d4da1989 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 882f7e856b6b..c8ff64b54459 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -87,7 +87,7 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index eaf689c48a59..bcde41905746 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 	}
 }
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index b1e074d299ea..c9aa236dc29b 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	long  status;
 	long  tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
 
 	long status;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 279376e90c7f..5e3f4b0f18c8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -654,7 +654,7 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size);
 extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 90a9c0e4952c..f391f3fbde8b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -623,7 +623,7 @@ struct kvm_vcpu_arch {
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 7a75d6dbf610..ea4cfdc991da 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -246,7 +246,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	return r;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d45e11dac5cf..99971dfc6b9a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -270,7 +270,7 @@ struct kvm_arch{
 
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_check_processor_compat(void *rtn) {}
 static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1fe3bc10c87f..412fbc5dc688 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -86,7 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 static unsigned long long *facilities;
 
 /* Section: not file related */
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	/* every s390 is virtualization enabled ;-) */
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bd6f3529453d..5137dca9e9d6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -634,8 +634,8 @@ struct msr_data {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	int (*hardware_enable)(void *dummy);
-	void (*hardware_disable)(void *dummy);
+	int (*hardware_enable)(void);
+	void (*hardware_disable)(void);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a14a6eaf871d..934befea3e36 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -606,7 +606,7 @@ static int has_svm(void)
 	return 1;
 }
 
-static void svm_hardware_disable(void *garbage)
+static void svm_hardware_disable(void)
 {
 	/* Make sure we clean up behind us */
 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -617,7 +617,7 @@ static void svm_hardware_disable(void *garbage)
 	amd_pmu_disable_virt();
 }
 
-static int svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void)
 {
 
 	struct svm_cpu_data *sd;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5402c94ab768..af423252c265 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2566,7 +2566,7 @@ static void kvm_cpu_vmxon(u64 addr)
 			: "memory", "cc");
 }
 
-static int hardware_enable(void *garbage)
+static int hardware_enable(void)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2630,7 +2630,7 @@ static void kvm_cpu_vmxoff(void)
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 }
 
-static void hardware_disable(void *garbage)
+static void hardware_disable(void)
 {
 	if (vmm_exclusive) {
 		vmclear_local_loaded_vmcss();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a348daad61fd..37d9503b81ea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -239,7 +239,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 }
 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
 
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
 {
 	unsigned int cpu = smp_processor_id();
 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -6603,7 +6603,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
 	kvm_rip_write(vcpu, 0);
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
@@ -6614,7 +6614,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	bool stable, backwards_tsc = false;
 
 	kvm_shared_msr_cpu_online();
-	ret = kvm_x86_ops->hardware_enable(garbage);
+	ret = kvm_x86_ops->hardware_enable();
 	if (ret != 0)
 		return ret;
 
@@ -6694,10 +6694,10 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
-	kvm_x86_ops->hardware_disable(garbage);
-	drop_user_return_notifiers(garbage);
+	kvm_x86_ops->hardware_disable();
+	drop_user_return_notifiers();
 }
 
 int kvm_arch_hardware_setup(void)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b8cb3c2d893e..ce4cf2f1c419 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -608,8 +608,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
-int kvm_arch_hardware_enable(void *garbage);
-void kvm_arch_hardware_disable(void *garbage);
+int kvm_arch_hardware_enable(void);
+void kvm_arch_hardware_disable(void);
 int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0cb02c749622..556049f120b7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2669,7 +2669,7 @@ static void hardware_enable_nolock(void *junk)
 
 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
 
-	r = kvm_arch_hardware_enable(NULL);
+	r = kvm_arch_hardware_enable();
 
 	if (r) {
 		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2694,7 +2694,7 @@ static void hardware_disable_nolock(void *junk)
 	if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
 	cpumask_clear_cpu(cpu, cpus_hardware_enabled);
-	kvm_arch_hardware_disable(NULL);
+	kvm_arch_hardware_disable();
 }
 
 static void hardware_disable(void)

From 25523c743ec5bf8ebe707ac8a8112eac0a2d42b6 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 4 Sep 2014 21:13:31 +0200
Subject: [PATCH 0690/1185] KVM: remove redundant check of in_spin_loop

The expression `vcpu->spin_loop.in_spin_loop' is always true,
because it is evaluated only when the condition
`!vcpu->spin_loop.in_spin_loop' is false.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 34656113182b704682e23d1363417536addfec97)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 556049f120b7..025f8300b6f0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1774,8 +1774,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 	bool eligible;
 
 	eligible = !vcpu->spin_loop.in_spin_loop ||
-			(vcpu->spin_loop.in_spin_loop &&
-			 vcpu->spin_loop.dy_eligible);
+		    vcpu->spin_loop.dy_eligible;
 
 	if (vcpu->spin_loop.in_spin_loop)
 		kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);

From c4e4c7bf26b78b6cb8fa985c19854c8f6e9abc20 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 4 Sep 2014 21:13:32 +0200
Subject: [PATCH 0691/1185] KVM: remove redundant assigment of return value in
 kvm_dev_ioctl

The first statement of kvm_dev_ioctl is
        long r = -EINVAL;

No need to reassign the same value.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a13f533b2f1d53a7c0baa7490498caeab7bc8ba5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 025f8300b6f0..d201466921bf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2611,7 +2611,6 @@ static long kvm_dev_ioctl(struct file *filp,
 
 	switch (ioctl) {
 	case KVM_GET_API_VERSION:
-		r = -EINVAL;
 		if (arg)
 			goto out;
 		r = KVM_API_VERSION;
@@ -2623,7 +2622,6 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
-		r = -EINVAL;
 		if (arg)
 			goto out;
 		r = PAGE_SIZE;     /* struct kvm_run */

From e665e3aae1e8c2e47e467c13c10e5b92c4be61a5 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 4 Sep 2014 21:13:33 +0200
Subject: [PATCH 0692/1185] KVM: remove redundant assignments in
 __kvm_set_memory_region

__kvm_set_memory_region sets r to EINVAL very early.
Doing it again is not necessary. The same is true later on, where
r is assigned -ENOMEM twice.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit f2a25160887e00434ce1361007009120e1fecbda)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d201466921bf..3ed27192a849 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -778,7 +778,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
-	r = -EINVAL;
 	if (npages > KVM_MEM_MAX_NR_PAGES)
 		goto out;
 
@@ -792,7 +791,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	new.npages = npages;
 	new.flags = mem->flags;
 
-	r = -EINVAL;
 	if (npages) {
 		if (!old.npages)
 			change = KVM_MR_CREATE;
@@ -848,7 +846,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-		r = -ENOMEM;
 		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
 				GFP_KERNEL);
 		if (!slots)

From cf0dfca545678c6dc257a75094f13515e0bf6c69 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 9 Sep 2014 11:27:09 +0100
Subject: [PATCH 0693/1185] ARM/arm64: KVM: fix use of WnR bit in
 kvm_is_write_fault()

The ISS encoding for an exception from a Data Abort has a WnR
bit[6] that indicates whether the Data Abort was caused by a
read or a write instruction. While there are several fields
in the encoding that are only valid if the ISV bit[24] is set,
WnR is not one of them, so we can read it unconditionally.

Instead of fixing both implementations of kvm_is_write_fault()
in place, reimplement it just once using kvm_vcpu_dabt_iswrite(),
which already does the right thing with respect to the WnR bit.
Also fix up the callers to pass 'vcpu'

Acked-by: Laszlo Ersek <lersek@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit a7d079cea2dffb112e26da2566dd84c0ef1fce97)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   | 11 -----------
 arch/arm/kvm/mmu.c               | 12 ++++++++++--
 arch/arm64/include/asm/kvm_mmu.h | 13 -------------
 3 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5cc0b0f5f72f..3f688b458143 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -78,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 	flush_pmd_entry(pte);
 }
 
-static inline bool kvm_is_write_fault(unsigned long hsr)
-{
-	unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
-	if (hsr_ec == HSR_EC_IABT)
-		return false;
-	else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR))
-		return false;
-	else
-		return true;
-}
-
 static inline void kvm_clean_pgd(pgd_t *pgd)
 {
 	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 62f5642153f9..bb06f76a8f89 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -746,6 +746,14 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
 	return false;
 }
 
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -760,7 +768,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	pfn_t pfn;
 	pgprot_t mem_type = PAGE_S2;
 
-	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
 		kvm_err("Unexpected L2 read permission error\n");
 		return -EFAULT;
@@ -886,7 +894,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	gfn = fault_ipa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
-	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	write_fault = kvm_is_write_fault(vcpu);
 	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
 		if (is_iabt) {
 			/* Prefetch Abort on I/O address */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 8e138c7c53ac..737da742b293 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -93,19 +93,6 @@ void kvm_clear_hyp_idmap(void);
 #define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
 #define	kvm_set_pmd(pmdp, pmd)		set_pmd(pmdp, pmd)
 
-static inline bool kvm_is_write_fault(unsigned long esr)
-{
-	unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
-
-	if (esr_ec == ESR_EL2_EC_IABT)
-		return false;
-
-	if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
-		return false;
-
-	return true;
-}
-
 static inline void kvm_clean_pgd(pgd_t *pgd) {}
 static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
 static inline void kvm_clean_pte(pte_t *pte) {}

From cdead48c82db0e501edfce2c85b7834de2960d6f Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Mon, 1 Sep 2014 09:36:08 +0100
Subject: [PATCH 0694/1185] KVM: EVENTFD: remove inclusion of irq.h

No more needed. irq.h would be void on ARM.

Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 0ba09511ddc3ff0b462f37b4fe4b9c4dccc054ec)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/eventfd.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 67563284f7b9..71ed39941b9c 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,7 +36,6 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#include "irq.h"
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQFD

From 86318858caaa5a5e5b58f7bd2624c2a953eddf76 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:33 +0100
Subject: [PATCH 0695/1185] KVM: device: add simple registration mechanism for
 kvm_device_ops

kvm_ioctl_create_device currently has knowledge of all the device types
and their associated ops. This is fairly inflexible when adding support
for new in-kernel device emulations, so move what we currently have out
into a table, which can support dynamic registration of ops by new
drivers for virtual hardware.

Cc: Alex Williamson <Alex.Williamson@redhat.com>
Cc: Alex Graf <agraf@suse.de>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d60eacb07053142bfb9b41582074a89a790a9d46)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  1 +
 include/uapi/linux/kvm.h | 21 ++++++++++----
 virt/kvm/kvm_main.c      | 60 ++++++++++++++++++++++++----------------
 3 files changed, 53 insertions(+), 29 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ce4cf2f1c419..fc7b4270bc4a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1022,6 +1022,7 @@ struct kvm_device_ops {
 void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index bbc1f8a09eb8..00d2c69a3cb6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -848,14 +848,25 @@ struct kvm_device_attr {
 	__u64	addr;		/* userspace address of attr data */
 };
 
-#define KVM_DEV_TYPE_FSL_MPIC_20	1
-#define KVM_DEV_TYPE_FSL_MPIC_42	2
-#define KVM_DEV_TYPE_XICS		3
-#define KVM_DEV_TYPE_VFIO		4
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
-#define KVM_DEV_TYPE_ARM_VGIC_V2	5
+
+enum kvm_device_type {
+	KVM_DEV_TYPE_FSL_MPIC_20	= 1,
+#define KVM_DEV_TYPE_FSL_MPIC_20	KVM_DEV_TYPE_FSL_MPIC_20
+	KVM_DEV_TYPE_FSL_MPIC_42,
+#define KVM_DEV_TYPE_FSL_MPIC_42	KVM_DEV_TYPE_FSL_MPIC_42
+	KVM_DEV_TYPE_XICS,
+#define KVM_DEV_TYPE_XICS		KVM_DEV_TYPE_XICS
+	KVM_DEV_TYPE_VFIO,
+#define KVM_DEV_TYPE_VFIO		KVM_DEV_TYPE_VFIO
+	KVM_DEV_TYPE_ARM_VGIC_V2,
+#define KVM_DEV_TYPE_ARM_VGIC_V2	KVM_DEV_TYPE_ARM_VGIC_V2
+	KVM_DEV_TYPE_FLIC,
+#define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
+	KVM_DEV_TYPE_MAX,
+};
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3ed27192a849..df006d12c14b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2261,6 +2261,37 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
 	return filp->private_data;
 }
 
+static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
+#ifdef CONFIG_KVM_MPIC
+	[KVM_DEV_TYPE_FSL_MPIC_20]	= &kvm_mpic_ops,
+	[KVM_DEV_TYPE_FSL_MPIC_42]	= &kvm_mpic_ops,
+#endif
+
+#ifdef CONFIG_KVM_XICS
+	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
+#endif
+
+#ifdef CONFIG_KVM_VFIO
+	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
+#endif
+
+#ifdef CONFIG_KVM_ARM_VGIC
+	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
+#endif
+};
+
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+{
+	if (type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENOSPC;
+
+	if (kvm_device_ops_table[type] != NULL)
+		return -EEXIST;
+
+	kvm_device_ops_table[type] = ops;
+	return 0;
+}
+
 static int kvm_ioctl_create_device(struct kvm *kvm,
 				   struct kvm_create_device *cd)
 {
@@ -2269,31 +2300,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
 	int ret;
 
-	switch (cd->type) {
-#ifdef CONFIG_KVM_MPIC
-	case KVM_DEV_TYPE_FSL_MPIC_20:
-	case KVM_DEV_TYPE_FSL_MPIC_42:
-		ops = &kvm_mpic_ops;
-		break;
-#endif
-#ifdef CONFIG_KVM_XICS
-	case KVM_DEV_TYPE_XICS:
-		ops = &kvm_xics_ops;
-		break;
-#endif
-#ifdef CONFIG_KVM_VFIO
-	case KVM_DEV_TYPE_VFIO:
-		ops = &kvm_vfio_ops;
-		break;
-#endif
-#ifdef CONFIG_KVM_ARM_VGIC
-	case KVM_DEV_TYPE_ARM_VGIC_V2:
-		ops = &kvm_arm_vgic_v2_ops;
-		break;
-#endif
-	default:
+	if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENODEV;
+
+	ops = kvm_device_ops_table[cd->type];
+	if (ops == NULL)
 		return -ENODEV;
-	}
 
 	if (test)
 		return 0;

From 9e95eca3b761209d08e97144de3962f355ee2078 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:34 +0100
Subject: [PATCH 0696/1185] KVM: ARM: vgic: register kvm_device_ops dynamically

Now that we have a dynamic means to register kvm_device_ops, use that
for the ARM VGIC, instead of relying on the static table.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c06a841bf36340e9e917ce60d11a6425ac85d0bd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |   1 -
 virt/kvm/arm/vgic.c      | 157 ++++++++++++++++++++-------------------
 virt/kvm/kvm_main.c      |   4 -
 3 files changed, 79 insertions(+), 83 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fc7b4270bc4a..ed20bc0d209b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1027,7 +1027,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
-extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index efe6eee2e7eb..8c95b2468cd8 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1522,83 +1522,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void vgic_init_maintenance_interrupt(void *info)
-{
-	enable_percpu_irq(vgic->maint_irq, 0);
-}
-
-static int vgic_cpu_notify(struct notifier_block *self,
-			   unsigned long action, void *cpu)
-{
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		vgic_init_maintenance_interrupt(NULL);
-		break;
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic->maint_irq);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block vgic_cpu_nb = {
-	.notifier_call = vgic_cpu_notify,
-};
-
-static const struct of_device_id vgic_ids[] = {
-	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
-	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
-	{},
-};
-
-int kvm_vgic_hyp_init(void)
-{
-	const struct of_device_id *matched_id;
-	const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
-				const struct vgic_params **);
-	struct device_node *vgic_node;
-	int ret;
-
-	vgic_node = of_find_matching_node_and_match(NULL,
-						    vgic_ids, &matched_id);
-	if (!vgic_node) {
-		kvm_err("error: no compatible GIC node found\n");
-		return -ENODEV;
-	}
-
-	vgic_probe = matched_id->data;
-	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
-	if (ret)
-		return ret;
-
-	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
-				 "vgic", kvm_get_running_vcpus());
-	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
-		return ret;
-	}
-
-	ret = __register_cpu_notifier(&vgic_cpu_nb);
-	if (ret) {
-		kvm_err("Cannot register vgic CPU notifier\n");
-		goto out_free_irq;
-	}
-
-	/* Callback into for arch code for setup */
-	vgic_arch_setup(vgic);
-
-	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
-	return 0;
-
-out_free_irq:
-	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
-	return ret;
-}
-
 /**
  * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
@@ -2062,7 +1985,7 @@ static int vgic_create(struct kvm_device *dev, u32 type)
 	return kvm_vgic_create(dev->kvm);
 }
 
-struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
 	.name = "kvm-arm-vgic",
 	.create = vgic_create,
 	.destroy = vgic_destroy,
@@ -2070,3 +1993,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
 	.get_attr = vgic_get_attr,
 	.has_attr = vgic_has_attr,
 };
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+	enable_percpu_irq(vgic->maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+			   unsigned long action, void *cpu)
+{
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		vgic_init_maintenance_interrupt(NULL);
+		break;
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		disable_percpu_irq(vgic->maint_irq);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+	.notifier_call = vgic_cpu_notify,
+};
+
+static const struct of_device_id vgic_ids[] = {
+	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+	{},
+};
+
+int kvm_vgic_hyp_init(void)
+{
+	const struct of_device_id *matched_id;
+	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+			  const struct vgic_params **);
+	struct device_node *vgic_node;
+	int ret;
+
+	vgic_node = of_find_matching_node_and_match(NULL,
+						    vgic_ids, &matched_id);
+	if (!vgic_node) {
+		kvm_err("error: no compatible GIC node found\n");
+		return -ENODEV;
+	}
+
+	vgic_probe = matched_id->data;
+	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+	if (ret)
+		return ret;
+
+	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
+				 "vgic", kvm_get_running_vcpus());
+	if (ret) {
+		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+		return ret;
+	}
+
+	ret = __register_cpu_notifier(&vgic_cpu_nb);
+	if (ret) {
+		kvm_err("Cannot register vgic CPU notifier\n");
+		goto out_free_irq;
+	}
+
+	/* Callback into for arch code for setup */
+	vgic_arch_setup(vgic);
+
+	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+	return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
+				       KVM_DEV_TYPE_ARM_VGIC_V2);
+
+out_free_irq:
+	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
+	return ret;
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index df006d12c14b..111d560418f3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2274,10 +2274,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_VFIO
 	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
-
-#ifdef CONFIG_KVM_ARM_VGIC
-	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)

From 7bd2f48101c8f43cd36e9920c7315df8575b61a8 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:36 +0100
Subject: [PATCH 0697/1185] KVM: VFIO: register kvm_device_ops dynamically

Now that we have a dynamic means to register kvm_device_ops, use that
for the VFIO kvm device, instead of relying on the static table.

This is achieved by a module_init call to register the ops with KVM.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Alex Williamson <Alex.Williamson@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 80ce1639727e9d38729c34f162378508c307ca25)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  1 -
 virt/kvm/kvm_main.c      |  4 ----
 virt/kvm/vfio.c          | 22 +++++++++++++++-------
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ed20bc0d209b..f64e941a4213 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1026,7 +1026,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
-extern struct kvm_device_ops kvm_vfio_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 111d560418f3..f019669674a5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2270,10 +2270,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_XICS
 	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
 #endif
-
-#ifdef CONFIG_KVM_VFIO
-	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 597c258245ea..475487e238e1 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -189,6 +189,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 	kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
 }
 
+static int kvm_vfio_create(struct kvm_device *dev, u32 type);
+
+static struct kvm_device_ops kvm_vfio_ops = {
+	.name = "kvm-vfio",
+	.create = kvm_vfio_create,
+	.destroy = kvm_vfio_destroy,
+	.set_attr = kvm_vfio_set_attr,
+	.has_attr = kvm_vfio_has_attr,
+};
+
 static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 {
 	struct kvm_device *tmp;
@@ -211,10 +221,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 	return 0;
 }
 
-struct kvm_device_ops kvm_vfio_ops = {
-	.name = "kvm-vfio",
-	.create = kvm_vfio_create,
-	.destroy = kvm_vfio_destroy,
-	.set_attr = kvm_vfio_set_attr,
-	.has_attr = kvm_vfio_has_attr,
-};
+static int __init kvm_vfio_ops_init(void)
+{
+	return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
+}
+module_init(kvm_vfio_ops_init);

From fa0603dfed451534fda7856d1301a6f81ae5de82 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 9 Jun 2014 12:27:18 +0200
Subject: [PATCH 0698/1185] arm/arm64: KVM: Rename irq_state to irq_pending

The irq_state field on the distributor struct is ambiguous in its
meaning; the comment says it's the level of the input put, but that
doesn't make much sense for edge-triggered interrupts.  The code
actually uses this state variable to check if the interrupt is in the
pending state on the distributor so clarify the comment and rename the
actual variable and accessor methods.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 227844f53864077ccaefe01d0960fcccc03445ce)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  4 ++--
 virt/kvm/arm/vgic.c    | 52 +++++++++++++++++++++---------------------
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 35b0c121bb65..388d442eecb5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -140,8 +140,8 @@ struct vgic_dist {
 	/* Interrupt enabled (one bit per IRQ) */
 	struct vgic_bitmap	irq_enabled;
 
-	/* Interrupt 'pin' level */
-	struct vgic_bitmap	irq_state;
+	/* Interrupt state is pending on the distributor */
+	struct vgic_bitmap	irq_pending;
 
 	/* Level-triggered interrupt in progress */
 	struct vgic_bitmap	irq_active;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 8c95b2468cd8..3d04a6de6f23 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -37,7 +37,7 @@
  *
  * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
  *   something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_state vgic
+ * - VGIC pending interrupts are stored on the vgic.irq_pending vgic
  *   bitmap (this bitmap is updated by both user land ioctls and guest
  *   mmio ops, and other in-kernel peripherals such as the
  *   arch. timers) and indicate the 'wire' state.
@@ -45,8 +45,8 @@
  *   recalculated
  * - To calculate the oracle, we need info for each cpu from
  *   compute_pending_for_cpu, which considers:
- *   - PPI: dist->irq_state & dist->irq_enable
- *   - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
+ *   - PPI: dist->irq_pending & dist->irq_enable
+ *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
  *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
  *     registers, stored on each vcpu. We only keep one bit of
  *     information per interrupt, making sure that only one vcpu can
@@ -221,21 +221,21 @@ static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
+	return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
 }
 
-static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
+	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
 }
 
-static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
+	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
 }
 
 static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
@@ -409,7 +409,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 					struct kvm_exit_mmio *mmio,
 					phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
+	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
 				       vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
@@ -425,7 +425,7 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  struct kvm_exit_mmio *mmio,
 					  phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
+	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
 				       vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
@@ -651,7 +651,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * is fine, then we are only setting a few bits that were
 		 * already set.
 		 */
-		vgic_dist_irq_set(vcpu, lr.irq);
+		vgic_dist_irq_set_pending(vcpu, lr.irq);
 		if (lr.irq < VGIC_NR_SGIS)
 			dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
 		lr.state &= ~LR_STATE_PENDING;
@@ -932,7 +932,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 	kvm_for_each_vcpu(c, vcpu, kvm) {
 		if (target_cpus & 1) {
 			/* Flag the SGI as pending */
-			vgic_dist_irq_set(vcpu, sgi);
+			vgic_dist_irq_set_pending(vcpu, sgi);
 			dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
 			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
 		}
@@ -952,11 +952,11 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 	pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
 	pend_shared = vcpu->arch.vgic_cpu.pending_shared;
 
-	pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
+	pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
 	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
 	bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
 
-	pending = vgic_bitmap_get_shared_map(&dist->irq_state);
+	pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
 	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
 	bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
 	bitmap_and(pend_shared, pend_shared,
@@ -1160,7 +1160,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 	 * our emulated gic and can get rid of them.
 	 */
 	if (!sources) {
-		vgic_dist_irq_clear(vcpu, irq);
+		vgic_dist_irq_clear_pending(vcpu, irq);
 		vgic_cpu_irq_clear(vcpu, irq);
 		return true;
 	}
@@ -1175,7 +1175,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
 
 	if (vgic_queue_irq(vcpu, 0, irq)) {
 		if (vgic_irq_is_edge(vcpu, irq)) {
-			vgic_dist_irq_clear(vcpu, irq);
+			vgic_dist_irq_clear_pending(vcpu, irq);
 			vgic_cpu_irq_clear(vcpu, irq);
 		} else {
 			vgic_irq_set_active(vcpu, irq);
@@ -1376,7 +1376,7 @@ static void vgic_kick_vcpus(struct kvm *kvm)
 
 static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 {
-	int is_edge = vgic_irq_is_edge(vcpu, irq);
+	int edge_triggered = vgic_irq_is_edge(vcpu, irq);
 	int state = vgic_dist_irq_is_pending(vcpu, irq);
 
 	/*
@@ -1384,26 +1384,26 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 	 * - edge triggered and we have a rising edge
 	 * - level triggered and we change level
 	 */
-	if (is_edge)
+	if (edge_triggered)
 		return level > state;
 	else
 		return level != state;
 }
 
-static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
+static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 				  unsigned int irq_num, bool level)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct kvm_vcpu *vcpu;
-	int is_edge, is_level;
+	int edge_triggered, level_triggered;
 	int enabled;
 	bool ret = true;
 
 	spin_lock(&dist->lock);
 
 	vcpu = kvm_get_vcpu(kvm, cpuid);
-	is_edge = vgic_irq_is_edge(vcpu, irq_num);
-	is_level = !is_edge;
+	edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
+	level_triggered = !edge_triggered;
 
 	if (!vgic_validate_injection(vcpu, irq_num, level)) {
 		ret = false;
@@ -1418,9 +1418,9 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
 	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
 
 	if (level)
-		vgic_dist_irq_set(vcpu, irq_num);
+		vgic_dist_irq_set_pending(vcpu, irq_num);
 	else
-		vgic_dist_irq_clear(vcpu, irq_num);
+		vgic_dist_irq_clear_pending(vcpu, irq_num);
 
 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
 
@@ -1429,7 +1429,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
 		goto out;
 	}
 
-	if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
+	if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) {
 		/*
 		 * Level interrupt in progress, will be picked up
 		 * when EOId.
@@ -1466,7 +1466,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level)
 {
-	if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
+	if (vgic_update_irq_pending(kvm, cpuid, irq_num, level))
 		vgic_kick_vcpus(kvm);
 
 	return 0;

From 25b3fb8068b572c2b9f549b17f513092309aa610 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 9 Jun 2014 12:55:13 +0200
Subject: [PATCH 0699/1185] arm/arm64: KVM: Rename irq_active to irq_queued

We have a special bitmap on the distributor struct to keep track of when
level-triggered interrupts are queued on the list registers.  This was
named irq_active, which is confusing, because the active state of an
interrupt as per the GIC spec is a different thing, not specifically
related to edge-triggered/level-triggered configurations but rather
indicates an interrupt which has been ack'ed but not yet eoi'ed.

Rename the bitmap and the corresponding accessor functions to irq_queued
to clarify what this is actually used for.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit dbf20f9d8105cca531614c8bff9a74351e8e67e7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  4 ++--
 virt/kvm/arm/vgic.c    | 33 +++++++++++++++++++--------------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 388d442eecb5..7d8e61fa9928 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -143,8 +143,8 @@ struct vgic_dist {
 	/* Interrupt state is pending on the distributor */
 	struct vgic_bitmap	irq_pending;
 
-	/* Level-triggered interrupt in progress */
-	struct vgic_bitmap	irq_active;
+	/* Level-triggered interrupt queued on VCPU interface */
+	struct vgic_bitmap	irq_queued;
 
 	/* Interrupt priority. Not used yet. */
 	struct vgic_bytemap	irq_priority;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 3d04a6de6f23..769cc7177f10 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -60,12 +60,12 @@
  * the 'line' again. This is achieved as such:
  *
  * - When a level interrupt is moved onto a vcpu, the corresponding
- *   bit in irq_active is set. As long as this bit is set, the line
+ *   bit in irq_queued is set. As long as this bit is set, the line
  *   will be ignored for further interrupts. The interrupt is injected
  *   into the vcpu with the GICH_LR_EOI bit set (generate a
  *   maintenance interrupt on EOI).
  * - When the interrupt is EOIed, the maintenance interrupt fires,
- *   and clears the corresponding bit in irq_active. This allow the
+ *   and clears the corresponding bit in irq_queued. This allows the
  *   interrupt line to be sampled again.
  */
 
@@ -196,25 +196,25 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
 	return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
 }
 
-static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+	return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
 }
 
-static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
 }
 
-static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
 }
 
 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
@@ -256,6 +256,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
 			  vcpu->arch.vgic_cpu.pending_shared);
 }
 
+static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
+{
+	return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
+}
+
 static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
 {
 	return le32_to_cpu(*((u32 *)mmio->data)) & mask;
@@ -1079,8 +1084,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 
 		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
 			vgic_retire_lr(lr, vlr.irq, vcpu);
-			if (vgic_irq_is_active(vcpu, vlr.irq))
-				vgic_irq_clear_active(vcpu, vlr.irq);
+			if (vgic_irq_is_queued(vcpu, vlr.irq))
+				vgic_irq_clear_queued(vcpu, vlr.irq);
 		}
 	}
 }
@@ -1170,7 +1175,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 
 static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
 {
-	if (vgic_irq_is_active(vcpu, irq))
+	if (!vgic_can_sample_irq(vcpu, irq))
 		return true; /* level interrupt, already queued */
 
 	if (vgic_queue_irq(vcpu, 0, irq)) {
@@ -1178,7 +1183,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
 			vgic_dist_irq_clear_pending(vcpu, irq);
 			vgic_cpu_irq_clear(vcpu, irq);
 		} else {
-			vgic_irq_set_active(vcpu, irq);
+			vgic_irq_set_queued(vcpu, irq);
 		}
 
 		return true;
@@ -1262,7 +1267,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
-			vgic_irq_clear_active(vcpu, vlr.irq);
+			vgic_irq_clear_queued(vcpu, vlr.irq);
 			WARN_ON(vlr.state & LR_STATE_MASK);
 			vlr.state = 0;
 			vgic_set_lr(vcpu, lr, vlr);
@@ -1429,7 +1434,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 		goto out;
 	}
 
-	if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) {
+	if (!vgic_can_sample_irq(vcpu, irq_num)) {
 		/*
 		 * Level interrupt in progress, will be picked up
 		 * when EOId.

From 520445af8ef7b1e8d17b692c0957b3802b90d98a Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 22:37:33 +0200
Subject: [PATCH 0700/1185] arm/arm64: KVM: vgic: Clear queued flags on unqueue

If we unqueue a level-triggered interrupt completely, and the LR does
not stick around in the active state (and will therefore no longer
generate a maintenance interrupt), then we should clear the queued flag
so that the vgic can actually queue this level-triggered interrupt at a
later time and deal with its pending state then.

Note: This should actually be properly fixed to handle the active state
on the distributor.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit cced50c9280ef7ca1af48080707a170efa1adfa0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 769cc7177f10..c7d068976069 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -667,8 +667,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * active), then the LR does not hold any useful info and can
 		 * be marked as free for other use.
 		 */
-		if (!(lr.state & LR_STATE_MASK))
+		if (!(lr.state & LR_STATE_MASK)) {
 			vgic_retire_lr(i, lr.irq, vcpu);
+			vgic_irq_clear_queued(vcpu, lr.irq);
+		}
 
 		/* Finally update the VGIC state. */
 		vgic_update_state(vcpu->kvm);

From fe8a7fe10d123135d9f8fee2eb0b0c14b31d609d Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 21:54:51 +0200
Subject: [PATCH 0701/1185] arm/arm64: KVM: vgic: Improve handling of
 GICD_I{CS}PENDRn

Writes to GICD_ISPENDRn and GICD_ICPENDRn are currently not handled
correctly for level-triggered interrupts.  The spec states that for
level-triggered interrupts, writes to the GICD_ISPENDRn activate the
output of a flip-flop which is in turn or'ed with the actual input
interrupt signal.  Correspondingly, writes to GICD_ICPENDRn simply
deactivates the output of that flip-flop, but does not (of course) affect
the external input signal.  Reads from GICC_IAR will also deactivate the
flip-flop output.

This requires us to track the state of the level-input separately from
the state in the flip-flop.  We therefore introduce two new variables on
the distributor struct to track these two states.  Astute readers may
notice that this is introducing more state than required (because an OR
of the two states gives you the pending state), but the remaining vgic
code uses the pending bitmap for optimized operations to figure out, at
the end of the day, if an interrupt is pending or not on the distributor
side.  Refactoring the code to consider the two state variables all the
places where we currently access the precomputed pending value, did not
look pretty.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit faa1b46c3e9f4d40359aee04ff275eea5f4cae3a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  16 +++++-
 virt/kvm/arm/vgic.c    | 119 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 123 insertions(+), 12 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7d8e61fa9928..f074539c6ac5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -140,9 +140,23 @@ struct vgic_dist {
 	/* Interrupt enabled (one bit per IRQ) */
 	struct vgic_bitmap	irq_enabled;
 
-	/* Interrupt state is pending on the distributor */
+	/* Level-triggered interrupt external input is asserted */
+	struct vgic_bitmap	irq_level;
+
+	/*
+	 * Interrupt state is pending on the distributor
+	 */
 	struct vgic_bitmap	irq_pending;
 
+	/*
+	 * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered
+	 * interrupts.  Essentially holds the state of the flip-flop in
+	 * Figure 4-10 on page 4-101 in ARM IHI 0048B.b.
+	 * Once set, it is only cleared for level-triggered interrupts on
+	 * guest ACKs (when we queue it) or writes to GICD_ICPENDRn.
+	 */
+	struct vgic_bitmap	irq_soft_pend;
+
 	/* Level-triggered interrupt queued on VCPU interface */
 	struct vgic_bitmap	irq_queued;
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index c7d068976069..07e97de1851b 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -67,6 +67,11 @@
  * - When the interrupt is EOIed, the maintenance interrupt fires,
  *   and clears the corresponding bit in irq_queued. This allows the
  *   interrupt line to be sampled again.
+ * - Note that level-triggered interrupts can also be set to pending from
+ *   writes to GICD_ISPENDRn and lowering the external input line does not
+ *   cause the interrupt to become inactive in such a situation.
+ *   Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
+ *   inactive as long as the external input line is held high.
  */
 
 #define VGIC_ADDR_UNDEF		(-1)
@@ -217,6 +222,41 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
 	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
 }
 
+static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
+}
+
 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -414,11 +454,26 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 					struct kvm_exit_mmio *mmio,
 					phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
-				       vcpu->vcpu_id, offset);
+	u32 *reg;
+	u32 level_mask;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
+	level_mask = (~(*reg));
+
+	/* Mark both level and edge triggered irqs as pending */
+	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
 	if (mmio->is_write) {
+		/* Set the soft-pending flag only for level-triggered irqs */
+		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+					  vcpu->vcpu_id, offset);
+		vgic_reg_access(mmio, reg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+		*reg &= level_mask;
+
 		vgic_update_state(vcpu->kvm);
 		return true;
 	}
@@ -430,11 +485,27 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  struct kvm_exit_mmio *mmio,
 					  phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
-				       vcpu->vcpu_id, offset);
+	u32 *level_active;
+	u32 *reg;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
 	if (mmio->is_write) {
+		/* Re-set level triggered level-active interrupts */
+		level_active = vgic_bitmap_get_reg(&dist->irq_level,
+					  vcpu->vcpu_id, offset);
+		reg = vgic_bitmap_get_reg(&dist->irq_pending,
+					  vcpu->vcpu_id, offset);
+		*reg |= *level_active;
+
+		/* Clear soft-pending flags */
+		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+					  vcpu->vcpu_id, offset);
+		vgic_reg_access(mmio, reg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
 		vgic_update_state(vcpu->kvm);
 		return true;
 	}
@@ -1268,17 +1339,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 
 		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+			WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
 
 			vgic_irq_clear_queued(vcpu, vlr.irq);
 			WARN_ON(vlr.state & LR_STATE_MASK);
 			vlr.state = 0;
 			vgic_set_lr(vcpu, lr, vlr);
 
+			/*
+			 * If the IRQ was EOIed it was also ACKed and we we
+			 * therefore assume we can clear the soft pending
+			 * state (should it had been set) for this interrupt.
+			 *
+			 * Note: if the IRQ soft pending state was set after
+			 * the IRQ was acked, it actually shouldn't be
+			 * cleared, but we have no way of knowing that unless
+			 * we start trapping ACKs when the soft-pending state
+			 * is set.
+			 */
+			vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+
 			/* Any additional pending interrupt? */
-			if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) {
+			if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
 				vgic_cpu_irq_set(vcpu, vlr.irq);
 				level_pending = true;
 			} else {
+				vgic_dist_irq_clear_pending(vcpu, vlr.irq);
 				vgic_cpu_irq_clear(vcpu, vlr.irq);
 			}
 
@@ -1384,17 +1470,19 @@ static void vgic_kick_vcpus(struct kvm *kvm)
 static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 {
 	int edge_triggered = vgic_irq_is_edge(vcpu, irq);
-	int state = vgic_dist_irq_is_pending(vcpu, irq);
 
 	/*
 	 * Only inject an interrupt if:
 	 * - edge triggered and we have a rising edge
 	 * - level triggered and we change level
 	 */
-	if (edge_triggered)
+	if (edge_triggered) {
+		int state = vgic_dist_irq_is_pending(vcpu, irq);
 		return level > state;
-	else
+	} else {
+		int state = vgic_dist_irq_get_level(vcpu, irq);
 		return level != state;
+	}
 }
 
 static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
@@ -1424,10 +1512,19 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
 	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
 
-	if (level)
+	if (level) {
+		if (level_triggered)
+			vgic_dist_irq_set_level(vcpu, irq_num);
 		vgic_dist_irq_set_pending(vcpu, irq_num);
-	else
-		vgic_dist_irq_clear_pending(vcpu, irq_num);
+	} else {
+		if (level_triggered) {
+			vgic_dist_irq_clear_level(vcpu, irq_num);
+			if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+				vgic_dist_irq_clear_pending(vcpu, irq_num);
+		} else {
+			vgic_dist_irq_clear_pending(vcpu, irq_num);
+		}
+	}
 
 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
 

From dd684bca5c33f00bdb31d2fb9819d6581489104f Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 22:30:45 +0200
Subject: [PATCH 0702/1185] arm/arm64: KVM: vgic: Fix SGI writes to
 GICD_I{CS}PENDR0

Writes to GICD_ISPENDR0 and GICD_ICPENDR0 ignore all settings of the
pending state for SGIs.  Make sure the implementation handles this
correctly.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 9da48b5502622f9f0e49df957521ec43a0c9f4c1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 07e97de1851b..4936a68d4b9b 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -454,7 +454,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 					struct kvm_exit_mmio *mmio,
 					phys_addr_t offset)
 {
-	u32 *reg;
+	u32 *reg, orig;
 	u32 level_mask;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
@@ -463,6 +463,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 
 	/* Mark both level and edge triggered irqs as pending */
 	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+	orig = *reg;
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
 
@@ -474,6 +475,12 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 				ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
 		*reg &= level_mask;
 
+		/* Ignore writes to SGIs */
+		if (offset < 2) {
+			*reg &= ~0xffff;
+			*reg |= orig & 0xffff;
+		}
+
 		vgic_update_state(vcpu->kvm);
 		return true;
 	}
@@ -486,10 +493,11 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  phys_addr_t offset)
 {
 	u32 *level_active;
-	u32 *reg;
+	u32 *reg, orig;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
 	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+	orig = *reg;
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
 	if (mmio->is_write) {
@@ -500,6 +508,12 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  vcpu->vcpu_id, offset);
 		*reg |= *level_active;
 
+		/* Ignore writes to SGIs */
+		if (offset < 2) {
+			*reg &= ~0xffff;
+			*reg |= orig & 0xffff;
+		}
+
 		/* Clear soft-pending flags */
 		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
 					  vcpu->vcpu_id, offset);

From ce492b1937416dc0f6da42a887b5190b1ca9b4fe Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 22:34:04 +0200
Subject: [PATCH 0703/1185] arm/arm64: KVM: vgic: Clarify and correct vgic
 documentation

The VGIC virtual distributor implementation documentation was written a
very long time ago, before the true nature of the beast had been
partially absorbed into my bloodstream.  Clarify the docs.

Plus, it fixes an actual bug.  ICFRn, pfff.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 7e362919a59e6fc60e08ad1cf0b047291d1ca2e9)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 4936a68d4b9b..ff88dbcacc29 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -36,21 +36,22 @@
  * How the whole thing works (courtesy of Christoffer Dall):
  *
  * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
- *   something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_pending vgic
- *   bitmap (this bitmap is updated by both user land ioctls and guest
- *   mmio ops, and other in-kernel peripherals such as the
- *   arch. timers) and indicate the 'wire' state.
+ *   something is pending on the CPU interface.
+ * - Interrupts that are pending on the distributor are stored on the
+ *   vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
+ *   ioctls and guest mmio ops, and other in-kernel peripherals such as the
+ *   arch. timers).
  * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
  *   recalculated
  * - To calculate the oracle, we need info for each cpu from
  *   compute_pending_for_cpu, which considers:
  *   - PPI: dist->irq_pending & dist->irq_enable
  *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
- *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
+ *   - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
  *     registers, stored on each vcpu. We only keep one bit of
  *     information per interrupt, making sure that only one vcpu can
  *     accept the interrupt.
+ * - If any of the above state changes, we must recalculate the oracle.
  * - The same is true when injecting an interrupt, except that we only
  *   consider a single interrupt at a time. The irq_spi_cpu array
  *   contains the target CPU for each SPI.

From aef14f44d897ca82917d6e5d5ad847c4a9e24c40 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:00 +0100
Subject: [PATCH 0704/1185] KVM: ARM: vgic: plug irq injection race

As it stands, nothing prevents userspace from injecting an interrupt
before the guest's GIC is actually initialized.

This goes unnoticed so far (as everything is pretty much statically
allocated), but ends up exploding in a spectacular way once we switch
to a more dynamic allocation (the GIC data structure isn't there yet).

The fix is to test for the "ready" flag in the VGIC distributor before
trying to inject the interrupt. Note that in order to avoid breaking
userspace, we have to ignore what is essentially an error.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 71afaba4a2e98bb7bdeba5078370ab43d46e67a1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index ff88dbcacc29..5744a49d7680 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1585,7 +1585,8 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level)
 {
-	if (vgic_update_irq_pending(kvm, cpuid, irq_num, level))
+	if (likely(vgic_initialized(kvm)) &&
+	    vgic_update_irq_pending(kvm, cpuid, irq_num, level))
 		vgic_kick_vcpus(kvm);
 
 	return 0;

From 38cb2f7b9a9ea6312429da05575a4bf6e697b573 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:01 +0100
Subject: [PATCH 0705/1185] arm/arm64: KVM: vgic: switch to dynamic allocation

So far, all the VGIC data structures are statically defined by the
*maximum* number of vcpus and interrupts it supports. It means that
we always have to oversize it to cater for the worse case.

Start by changing the data structures to be dynamically sizeable,
and allocate them at runtime.

The sizes are still very static though.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit c1bfb577addd4867a82c4f235824a315d5afb94a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c     |   3 +
 include/kvm/arm_vgic.h |  76 ++++++++++---
 virt/kvm/arm/vgic.c    | 243 +++++++++++++++++++++++++++++++++++------
 3 files changed, 269 insertions(+), 53 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c8ff64b54459..9e374158363a 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -161,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 			kvm->vcpus[i] = NULL;
 		}
 	}
+
+	kvm_vgic_destroy(kvm);
 }
 
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -243,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_memory_caches(vcpu);
 	kvm_timer_vcpu_terminate(vcpu);
+	kvm_vgic_vcpu_destroy(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f074539c6ac5..fd1b8f252da1 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -54,19 +54,33 @@
  * - a bunch of shared interrupts (SPI)
  */
 struct vgic_bitmap {
-	union {
-		u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
-	} percpu[VGIC_MAX_CPUS];
-	union {
-		u32 reg[VGIC_NR_SHARED_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
-	} shared;
+	/*
+	 * - One UL per VCPU for private interrupts (assumes UL is at
+	 *   least 32 bits)
+	 * - As many UL as necessary for shared interrupts.
+	 *
+	 * The private interrupts are accessed via the "private"
+	 * field, one UL per vcpu (the state for vcpu n is in
+	 * private[n]). The shared interrupts are accessed via the
+	 * "shared" pointer (IRQn state is at bit n-32 in the bitmap).
+	 */
+	unsigned long *private;
+	unsigned long *shared;
 };
 
 struct vgic_bytemap {
-	u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
-	u32 shared[VGIC_NR_SHARED_IRQS  / 4];
+	/*
+	 * - 8 u32 per VCPU for private interrupts
+	 * - As many u32 as necessary for shared interrupts.
+	 *
+	 * The private interrupts are accessed via the "private"
+	 * field, (the state for vcpu n is in private[n*8] to
+	 * private[n*8 + 7]). The shared interrupts are accessed via
+	 * the "shared" pointer (IRQn state is at byte (n-32)%4 of the
+	 * shared[(n-32)/4] word).
+	 */
+	u32 *private;
+	u32 *shared;
 };
 
 struct kvm_vcpu;
@@ -127,6 +141,9 @@ struct vgic_dist {
 	bool			in_kernel;
 	bool			ready;
 
+	int			nr_cpus;
+	int			nr_irqs;
+
 	/* Virtual control interface mapping */
 	void __iomem		*vctrl_base;
 
@@ -166,15 +183,36 @@ struct vgic_dist {
 	/* Level/edge triggered */
 	struct vgic_bitmap	irq_cfg;
 
-	/* Source CPU per SGI and target CPU */
-	u8			irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
+	/*
+	 * Source CPU per SGI and target CPU:
+	 *
+	 * Each byte represent a SGI observable on a VCPU, each bit of
+	 * this byte indicating if the corresponding VCPU has
+	 * generated this interrupt. This is a GICv2 feature only.
+	 *
+	 * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are
+	 * the SGIs observable on VCPUn.
+	 */
+	u8			*irq_sgi_sources;
 
-	/* Target CPU for each IRQ */
-	u8			irq_spi_cpu[VGIC_NR_SHARED_IRQS];
-	struct vgic_bitmap	irq_spi_target[VGIC_MAX_CPUS];
+	/*
+	 * Target CPU for each SPI:
+	 *
+	 * Array of available SPI, each byte indicating the target
+	 * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32].
+	 */
+	u8			*irq_spi_cpu;
+
+	/*
+	 * Reverse lookup of irq_spi_cpu for faster compute pending:
+	 *
+	 * Array of bitmaps, one per VCPU, describing if IRQn is
+	 * routed to a particular VCPU.
+	 */
+	struct vgic_bitmap	*irq_spi_target;
 
 	/* Bitmap indicating which CPU has something pending */
-	unsigned long		irq_pending_on_cpu;
+	unsigned long		*irq_pending_on_cpu;
 #endif
 };
 
@@ -204,11 +242,11 @@ struct vgic_v3_cpu_if {
 struct vgic_cpu {
 #ifdef CONFIG_KVM_ARM_VGIC
 	/* per IRQ to LR mapping */
-	u8		vgic_irq_lr_map[VGIC_NR_IRQS];
+	u8		*vgic_irq_lr_map;
 
 	/* Pending interrupts on this VCPU */
 	DECLARE_BITMAP(	pending_percpu, VGIC_NR_PRIVATE_IRQS);
-	DECLARE_BITMAP(	pending_shared, VGIC_NR_SHARED_IRQS);
+	unsigned long	*pending_shared;
 
 	/* Bitmap of used/free list registers */
 	DECLARE_BITMAP(	lr_used, VGIC_V2_MAX_LRS);
@@ -239,7 +277,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
+void kvm_vgic_destroy(struct kvm *kvm);
 int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 5744a49d7680..102dde58c549 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -95,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
 static void vgic_kick_vcpus(struct kvm *kvm);
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
@@ -105,10 +106,8 @@ static const struct vgic_ops *vgic_ops;
 static const struct vgic_params *vgic;
 
 /*
- * struct vgic_bitmap contains unions that provide two views of
- * the same data. In one case it is an array of registers of
- * u32's, and in the other case it is a bitmap of unsigned
- * longs.
+ * struct vgic_bitmap contains a bitmap made of unsigned longs, but
+ * extracts u32s out of them.
  *
  * This does not work on 64-bit BE systems, because the bitmap access
  * will store two consecutive 32-bit words with the higher-addressed
@@ -124,23 +123,45 @@ static const struct vgic_params *vgic;
 #define REG_OFFSET_SWIZZLE	0
 #endif
 
+static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
+{
+	int nr_longs;
+
+	nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+
+	b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
+	if (!b->private)
+		return -ENOMEM;
+
+	b->shared = b->private + nr_cpus;
+
+	return 0;
+}
+
+static void vgic_free_bitmap(struct vgic_bitmap *b)
+{
+	kfree(b->private);
+	b->private = NULL;
+	b->shared = NULL;
+}
+
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
 {
 	offset >>= 2;
 	if (!offset)
-		return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE);
+		return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
 	else
-		return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
+		return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
 }
 
 static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
 				   int cpuid, int irq)
 {
 	if (irq < VGIC_NR_PRIVATE_IRQS)
-		return test_bit(irq, x->percpu[cpuid].reg_ul);
+		return test_bit(irq, x->private + cpuid);
 
-	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
+	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
 }
 
 static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
@@ -149,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
 	unsigned long *reg;
 
 	if (irq < VGIC_NR_PRIVATE_IRQS) {
-		reg = x->percpu[cpuid].reg_ul;
+		reg = x->private + cpuid;
 	} else {
-		reg =  x->shared.reg_ul;
+		reg = x->shared;
 		irq -= VGIC_NR_PRIVATE_IRQS;
 	}
 
@@ -163,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
 
 static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
 {
-	if (unlikely(cpuid >= VGIC_MAX_CPUS))
-		return NULL;
-	return x->percpu[cpuid].reg_ul;
+	return x->private + cpuid;
 }
 
 static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
 {
-	return x->shared.reg_ul;
+	return x->shared;
+}
+
+static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
+{
+	int size;
+
+	size  = nr_cpus * VGIC_NR_PRIVATE_IRQS;
+	size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
+
+	x->private = kzalloc(size, GFP_KERNEL);
+	if (!x->private)
+		return -ENOMEM;
+
+	x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
+	return 0;
+}
+
+static void vgic_free_bytemap(struct vgic_bytemap *b)
+{
+	kfree(b->private);
+	b->private = NULL;
+	b->shared = NULL;
 }
 
 static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
 {
-	offset >>= 2;
-	BUG_ON(offset > (VGIC_NR_IRQS / 4));
-	if (offset < 8)
-		return x->percpu[cpuid] + offset;
-	else
-		return x->shared + offset - 8;
+	u32 *reg;
+
+	if (offset < VGIC_NR_PRIVATE_IRQS) {
+		reg = x->private;
+		offset += cpuid * VGIC_NR_PRIVATE_IRQS;
+	} else {
+		reg = x->shared;
+		offset -= VGIC_NR_PRIVATE_IRQS;
+	}
+
+	return reg + (offset / sizeof(u32));
 }
 
 #define VGIC_CFG_LEVEL	0
@@ -744,7 +790,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 */
 		vgic_dist_irq_set_pending(vcpu, lr.irq);
 		if (lr.irq < VGIC_NR_SGIS)
-			dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
+			*vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
 		lr.state &= ~LR_STATE_PENDING;
 		vgic_set_lr(vcpu, i, lr);
 
@@ -778,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 	/* Copy source SGIs from distributor side */
 	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
 		int shift = 8 * (sgi - min_sgi);
-		reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
+		reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
 	}
 
 	mmio_data_write(mmio, ~0, reg);
@@ -802,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 	/* Clear pending SGIs on the distributor */
 	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
 		u8 mask = reg >> (8 * (sgi - min_sgi));
+		u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
 		if (set) {
-			if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
+			if ((*src & mask) != mask)
 				updated = true;
-			dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
+			*src |= mask;
 		} else {
-			if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
+			if (*src & mask)
 				updated = true;
-			dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
+			*src &= ~mask;
 		}
 	}
 
@@ -993,6 +1040,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	return true;
 }
 
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
+{
+	return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
+}
+
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 {
 	struct kvm *kvm = vcpu->kvm;
@@ -1026,7 +1078,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 		if (target_cpus & 1) {
 			/* Flag the SGI as pending */
 			vgic_dist_irq_set_pending(vcpu, sgi);
-			dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
+			*vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
 			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
 		}
 
@@ -1073,14 +1125,14 @@ static void vgic_update_state(struct kvm *kvm)
 	int c;
 
 	if (!dist->enabled) {
-		set_bit(0, &dist->irq_pending_on_cpu);
+		set_bit(0, dist->irq_pending_on_cpu);
 		return;
 	}
 
 	kvm_for_each_vcpu(c, vcpu, kvm) {
 		if (compute_pending_for_cpu(vcpu)) {
 			pr_debug("CPU%d has pending interrupts\n", c);
-			set_bit(c, &dist->irq_pending_on_cpu);
+			set_bit(c, dist->irq_pending_on_cpu);
 		}
 	}
 }
@@ -1237,14 +1289,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 	int vcpu_id = vcpu->vcpu_id;
 	int c;
 
-	sources = dist->irq_sgi_sources[vcpu_id][irq];
+	sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
 
 	for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
 		if (vgic_queue_irq(vcpu, c, irq))
 			clear_bit(c, &sources);
 	}
 
-	dist->irq_sgi_sources[vcpu_id][irq] = sources;
+	*vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
 
 	/*
 	 * If the sources bitmap has been cleared it means that we
@@ -1332,7 +1384,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 		 * us. Claim we don't have anything pending. We'll
 		 * adjust that if needed while exiting.
 		 */
-		clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
+		clear_bit(vcpu_id, dist->irq_pending_on_cpu);
 	}
 }
 
@@ -1430,7 +1482,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	/* Check if we still have something up our sleeve... */
 	pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
 	if (level_pending || pending < vgic->nr_lr)
-		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
+		set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 }
 
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
@@ -1464,7 +1516,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 	if (!irqchip_in_kernel(vcpu->kvm))
 		return 0;
 
-	return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
+	return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 }
 
 static void vgic_kick_vcpus(struct kvm *kvm)
@@ -1559,7 +1611,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
 	if (level) {
 		vgic_cpu_irq_set(vcpu, irq_num);
-		set_bit(cpuid, &dist->irq_pending_on_cpu);
+		set_bit(cpuid, dist->irq_pending_on_cpu);
 	}
 
 out:
@@ -1603,6 +1655,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+	kfree(vgic_cpu->pending_shared);
+	kfree(vgic_cpu->vgic_irq_lr_map);
+	vgic_cpu->pending_shared = NULL;
+	vgic_cpu->vgic_irq_lr_map = NULL;
+}
+
+static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+	int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+	vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+
+	if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+		kvm_vgic_vcpu_destroy(vcpu);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 /**
  * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
  * @vcpu: pointer to the vcpu struct
@@ -1642,6 +1720,97 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+void kvm_vgic_destroy(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_vgic_vcpu_destroy(vcpu);
+
+	vgic_free_bitmap(&dist->irq_enabled);
+	vgic_free_bitmap(&dist->irq_level);
+	vgic_free_bitmap(&dist->irq_pending);
+	vgic_free_bitmap(&dist->irq_soft_pend);
+	vgic_free_bitmap(&dist->irq_queued);
+	vgic_free_bitmap(&dist->irq_cfg);
+	vgic_free_bytemap(&dist->irq_priority);
+	if (dist->irq_spi_target) {
+		for (i = 0; i < dist->nr_cpus; i++)
+			vgic_free_bitmap(&dist->irq_spi_target[i]);
+	}
+	kfree(dist->irq_sgi_sources);
+	kfree(dist->irq_spi_cpu);
+	kfree(dist->irq_spi_target);
+	kfree(dist->irq_pending_on_cpu);
+	dist->irq_sgi_sources = NULL;
+	dist->irq_spi_cpu = NULL;
+	dist->irq_spi_target = NULL;
+	dist->irq_pending_on_cpu = NULL;
+}
+
+/*
+ * Allocate and initialize the various data structures. Must be called
+ * with kvm->lock held!
+ */
+static int vgic_init_maps(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int nr_cpus, nr_irqs;
+	int ret, i;
+
+	nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS;
+	nr_irqs = dist->nr_irqs = VGIC_NR_IRQS;
+
+	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
+	ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
+
+	if (ret)
+		goto out;
+
+	dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
+	dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
+	dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
+				       GFP_KERNEL);
+	dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+					   GFP_KERNEL);
+	if (!dist->irq_sgi_sources ||
+	    !dist->irq_spi_cpu ||
+	    !dist->irq_spi_target ||
+	    !dist->irq_pending_on_cpu) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < nr_cpus; i++)
+		ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
+					nr_cpus, nr_irqs);
+
+	if (ret)
+		goto out;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
+		if (ret) {
+			kvm_err("VGIC: Failed to allocate vcpu memory\n");
+			break;
+		}
+	}
+
+out:
+	if (ret)
+		kvm_vgic_destroy(kvm);
+
+	return ret;
+}
+
 /**
  * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
@@ -1722,6 +1891,10 @@ int kvm_vgic_create(struct kvm *kvm)
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 
+	ret = vgic_init_maps(kvm);
+	if (ret)
+		kvm_err("Unable to allocate maps\n");
+
 out_unlock:
 	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
 		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);

From ac2409584bc7bb3b8cff23884e65cf341860d674 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:02 +0100
Subject: [PATCH 0706/1185] arm/arm64: KVM: vgic: Parametrize
 VGIC_NR_SHARED_IRQS

Having a dynamic number of supported interrupts means that we
cannot relly on VGIC_NR_SHARED_IRQS being fixed anymore.

Instead, make it take the distributor structure as a parameter,
so it can return the right value.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit fb65ab63b8cae510ea1e43e68b5da2f9980aa6d5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  1 -
 virt/kvm/arm/vgic.c    | 16 +++++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index fd1b8f252da1..b2f9936df319 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -29,7 +29,6 @@
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_NR_SHARED_IRQS	(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
 #define VGIC_MAX_CPUS		KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 102dde58c549..a24fdacf5381 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1086,11 +1086,17 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 	}
 }
 
+static int vgic_nr_shared_irqs(struct vgic_dist *dist)
+{
+	return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
+}
+
 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
 	unsigned long pending_private, pending_shared;
+	int nr_shared = vgic_nr_shared_irqs(dist);
 	int vcpu_id;
 
 	vcpu_id = vcpu->vcpu_id;
@@ -1103,15 +1109,15 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 
 	pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
 	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
-	bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
+	bitmap_and(pend_shared, pending, enabled, nr_shared);
 	bitmap_and(pend_shared, pend_shared,
 		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
-		   VGIC_NR_SHARED_IRQS);
+		   nr_shared);
 
 	pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
-	pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
+	pending_shared = find_first_bit(pend_shared, nr_shared);
 	return (pending_private < VGIC_NR_PRIVATE_IRQS ||
-		pending_shared < VGIC_NR_SHARED_IRQS);
+		pending_shared < vgic_nr_shared_irqs(dist));
 }
 
 /*
@@ -1368,7 +1374,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 	}
 
 	/* SPIs */
-	for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
+	for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
 		if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
 			overflow = 1;
 	}

From 3e0fb55b4af81f2364b1969bdbf31bf67883a4d5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:03 +0100
Subject: [PATCH 0707/1185] arm/arm64: KVM: vgic: kill VGIC_MAX_CPUS

We now have the information about the number of CPU interfaces in
the distributor itself. Let's get rid of VGIC_MAX_CPUS, and just
rely on KVM_MAX_VCPUS where we don't have the choice. Yet.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit fc675e355e705a046df7b635d3f3330c0ad94569)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 3 +--
 virt/kvm/arm/vgic.c    | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index b2f9936df319..3b73d7845124 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -29,13 +29,12 @@
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_MAX_CPUS		KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
 #define VGIC_V3_MAX_LRS		16
 
 /* Sanity checks... */
-#if (VGIC_MAX_CPUS > 8)
+#if (KVM_MAX_VCPUS > 8)
 #error	Invalid number of CPU interfaces
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index a24fdacf5381..df0700bd0f23 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1297,7 +1297,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 
 	sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
 
-	for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
+	for_each_set_bit(c, &sources, dist->nr_cpus) {
 		if (vgic_queue_irq(vcpu, c, irq))
 			clear_bit(c, &sources);
 	}
@@ -1700,7 +1700,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int i;
 
-	if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
+	if (vcpu->vcpu_id >= dist->nr_cpus)
 		return -EBUSY;
 
 	for (i = 0; i < VGIC_NR_IRQS; i++) {
@@ -1767,7 +1767,7 @@ static int vgic_init_maps(struct kvm *kvm)
 	int nr_cpus, nr_irqs;
 	int ret, i;
 
-	nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS;
+	nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS;
 	nr_irqs = dist->nr_irqs = VGIC_NR_IRQS;
 
 	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);

From 35a089f06fa6d1f02265be8b1e31e3775003d5c8 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:04 +0100
Subject: [PATCH 0708/1185] arm/arm64: KVM: vgic: handle out-of-range MMIO
 accesses

Now that we can (almost) dynamically size the number of interrupts,
we're facing an interesting issue:

We have to evaluate at runtime whether or not an access hits a valid
register, based on the sizing of this particular instance of the
distributor. Furthermore, the GIC spec says that accessing a reserved
register is RAZ/WI.

For this, add a new field to our range structure, indicating the number
of bits a single interrupts uses. That allows us to find out whether or
not the access is in range.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit c3c918361adcceb816c92b21dd95d2b46fb96a8f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  3 ++-
 virt/kvm/arm/vgic.c    | 56 +++++++++++++++++++++++++++++++++---------
 2 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 3b73d7845124..2767f939f47c 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -32,6 +32,7 @@
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
 #define VGIC_V3_MAX_LRS		16
+#define VGIC_MAX_IRQS		1024
 
 /* Sanity checks... */
 #if (KVM_MAX_VCPUS > 8)
@@ -42,7 +43,7 @@
 #error "VGIC_NR_IRQS must be a multiple of 32"
 #endif
 
-#if (VGIC_NR_IRQS > 1024)
+#if (VGIC_NR_IRQS > VGIC_MAX_IRQS)
 #error "VGIC_NR_IRQS must be <= 1024"
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index df0700bd0f23..de975c908301 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -895,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
 struct mmio_range {
 	phys_addr_t base;
 	unsigned long len;
+	int bits_per_irq;
 	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
 			    phys_addr_t offset);
 };
@@ -903,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = {
 	{
 		.base		= GIC_DIST_CTRL,
 		.len		= 12,
+		.bits_per_irq	= 0,
 		.handle_mmio	= handle_mmio_misc,
 	},
 	{
 		.base		= GIC_DIST_IGROUP,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_DIST_ENABLE_SET,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_set_enable_reg,
 	},
 	{
 		.base		= GIC_DIST_ENABLE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_clear_enable_reg,
 	},
 	{
 		.base		= GIC_DIST_PENDING_SET,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_set_pending_reg,
 	},
 	{
 		.base		= GIC_DIST_PENDING_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_clear_pending_reg,
 	},
 	{
 		.base		= GIC_DIST_ACTIVE_SET,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_DIST_ACTIVE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_DIST_PRI,
-		.len		= VGIC_NR_IRQS,
+		.len		= VGIC_MAX_IRQS,
+		.bits_per_irq	= 8,
 		.handle_mmio	= handle_mmio_priority_reg,
 	},
 	{
 		.base		= GIC_DIST_TARGET,
-		.len		= VGIC_NR_IRQS,
+		.len		= VGIC_MAX_IRQS,
+		.bits_per_irq	= 8,
 		.handle_mmio	= handle_mmio_target_reg,
 	},
 	{
 		.base		= GIC_DIST_CONFIG,
-		.len		= VGIC_NR_IRQS / 4,
+		.len		= VGIC_MAX_IRQS / 4,
+		.bits_per_irq	= 2,
 		.handle_mmio	= handle_mmio_cfg_reg,
 	},
 	{
@@ -990,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges,
 	return NULL;
 }
 
+static bool vgic_validate_access(const struct vgic_dist *dist,
+				 const struct mmio_range *range,
+				 unsigned long offset)
+{
+	int irq;
+
+	if (!range->bits_per_irq)
+		return true;	/* Not an irq-based access */
+
+	irq = offset * 8 / range->bits_per_irq;
+	if (irq >= dist->nr_irqs)
+		return false;
+
+	return true;
+}
+
 /**
  * vgic_handle_mmio - handle an in-kernel MMIO access
  * @vcpu:	pointer to the vcpu performing the access
@@ -1029,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 	spin_lock(&vcpu->kvm->arch.vgic.lock);
 	offset = mmio->phys_addr - range->base - base;
-	updated_state = range->handle_mmio(vcpu, mmio, offset);
+	if (vgic_validate_access(dist, range, offset)) {
+		updated_state = range->handle_mmio(vcpu, mmio, offset);
+	} else {
+		vgic_reg_access(mmio, NULL, offset,
+				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+		updated_state = false;
+	}
 	spin_unlock(&vcpu->kvm->arch.vgic.lock);
 	kvm_prepare_mmio(run, mmio);
 	kvm_handle_mmio_return(vcpu, run);

From ddfab003a2dbff1e226a24860e8f4f91a7d3d131 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:05 +0100
Subject: [PATCH 0709/1185] arm/arm64: KVM: vgic: kill VGIC_NR_IRQS

Nuke VGIC_NR_IRQS entierly, now that the distributor instance
contains the number of IRQ allocated to this GIC.

Also add VGIC_NR_IRQS_LEGACY to preserve the current API.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 5fb66da64064d0cb8dcce4cc8bf4cb1b921b13a0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  6 +++---
 virt/kvm/arm/vgic.c    | 17 +++++++++++------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 2767f939f47c..aa20d4a7242f 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,7 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
-#define VGIC_NR_IRQS		256
+#define VGIC_NR_IRQS_LEGACY	256
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
@@ -39,11 +39,11 @@
 #error	Invalid number of CPU interfaces
 #endif
 
-#if (VGIC_NR_IRQS & 31)
+#if (VGIC_NR_IRQS_LEGACY & 31)
 #error "VGIC_NR_IRQS must be a multiple of 32"
 #endif
 
-#if (VGIC_NR_IRQS > VGIC_MAX_IRQS)
+#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS)
 #error "VGIC_NR_IRQS must be <= 1024"
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index de975c908301..49501bbc2709 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -439,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
 
 	case 4:			/* GICD_TYPER */
 		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
-		reg |= (VGIC_NR_IRQS >> 5) - 1;
+		reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
 		vgic_reg_access(mmio, &reg, word_offset,
 				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
 		break;
@@ -1277,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	struct vgic_lr vlr;
 	int lr;
 
 	/* Sanitize the input... */
 	BUG_ON(sgi_source_id & ~7);
 	BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
-	BUG_ON(irq >= VGIC_NR_IRQS);
+	BUG_ON(irq >= dist->nr_irqs);
 
 	kvm_debug("Queue IRQ%d\n", irq);
 
@@ -1515,7 +1516,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 
 		vlr = vgic_get_lr(vcpu, lr);
 
-		BUG_ON(vlr.irq >= VGIC_NR_IRQS);
+		BUG_ON(vlr.irq >= dist->nr_irqs);
 		vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
 	}
 
@@ -1737,7 +1738,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	if (vcpu->vcpu_id >= dist->nr_cpus)
 		return -EBUSY;
 
-	for (i = 0; i < VGIC_NR_IRQS; i++) {
+	for (i = 0; i < dist->nr_irqs; i++) {
 		if (i < VGIC_NR_PPIS)
 			vgic_bitmap_set_irq_val(&dist->irq_enabled,
 						vcpu->vcpu_id, i, 1);
@@ -1802,7 +1803,11 @@ static int vgic_init_maps(struct kvm *kvm)
 	int ret, i;
 
 	nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS;
-	nr_irqs = dist->nr_irqs = VGIC_NR_IRQS;
+
+	if (!dist->nr_irqs)
+		dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
+
+	nr_irqs = dist->nr_irqs;
 
 	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
 	ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
@@ -1886,7 +1891,7 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
-	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
+	for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
 		vgic_set_target_reg(kvm, 0, i);
 
 	kvm->arch.vgic.ready = true;

From b9ca28a414c2e5dfb707bcba625eb76f334c787f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:06 +0100
Subject: [PATCH 0710/1185] arm/arm64: KVM: vgic: delay vgic allocation until
 init time

It is now quite easy to delay the allocation of the vgic tables
until we actually require it to be up and running (when the first
vcpu is kicking around, or someones tries to access the GIC registers).

This allow us to allocate memory for the exact number of CPUs we
have. As nobody configures the number of interrupts just yet,
use a fallback to VGIC_NR_IRQS_LEGACY.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 4956f2bc1fdee4bc336532f3f34635a8534cedfd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c     |  7 -------
 include/kvm/arm_vgic.h |  1 -
 virt/kvm/arm/vgic.c    | 42 +++++++++++++++++++++++++++++-------------
 3 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e374158363a..072a2084005c 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -261,16 +261,9 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	int ret;
-
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
 
-	/* Set up VGIC */
-	ret = kvm_vgic_vcpu_init(vcpu);
-	if (ret)
-		return ret;
-
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
 
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index aa20d4a7242f..2f2aac8448a4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -277,7 +277,6 @@ int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
 void kvm_vgic_destroy(struct kvm *kvm);
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 49501bbc2709..e7bca4bb7fd1 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1729,15 +1729,12 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
  * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
  * this vcpu and enable the VGIC for this VCPU
  */
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int i;
 
-	if (vcpu->vcpu_id >= dist->nr_cpus)
-		return -EBUSY;
-
 	for (i = 0; i < dist->nr_irqs; i++) {
 		if (i < VGIC_NR_PPIS)
 			vgic_bitmap_set_irq_val(&dist->irq_enabled,
@@ -1757,8 +1754,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	vgic_cpu->nr_lr = vgic->nr_lr;
 
 	vgic_enable(vcpu);
-
-	return 0;
 }
 
 void kvm_vgic_destroy(struct kvm *kvm)
@@ -1802,8 +1797,17 @@ static int vgic_init_maps(struct kvm *kvm)
 	int nr_cpus, nr_irqs;
 	int ret, i;
 
-	nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS;
+	if (dist->nr_cpus)	/* Already allocated */
+		return 0;
 
+	nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
+	if (!nr_cpus)		/* No vcpus? Can't be good... */
+		return -EINVAL;
+
+	/*
+	 * If nobody configured the number of interrupts, use the
+	 * legacy one.
+	 */
 	if (!dist->nr_irqs)
 		dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
 
@@ -1849,6 +1853,9 @@ static int vgic_init_maps(struct kvm *kvm)
 		}
 	}
 
+	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+		vgic_set_target_reg(kvm, 0, i);
+
 out:
 	if (ret)
 		kvm_vgic_destroy(kvm);
@@ -1867,6 +1874,7 @@ static int vgic_init_maps(struct kvm *kvm)
  */
 int kvm_vgic_init(struct kvm *kvm)
 {
+	struct kvm_vcpu *vcpu;
 	int ret = 0, i;
 
 	if (!irqchip_in_kernel(kvm))
@@ -1884,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
+	ret = vgic_init_maps(kvm);
+	if (ret) {
+		kvm_err("Unable to allocate maps\n");
+		goto out;
+	}
+
 	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
 				    vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
 	if (ret) {
@@ -1891,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
-	for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
-		vgic_set_target_reg(kvm, 0, i);
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_vgic_vcpu_init(vcpu);
 
 	kvm->arch.vgic.ready = true;
 out:
+	if (ret)
+		kvm_vgic_destroy(kvm);
 	mutex_unlock(&kvm->lock);
 	return ret;
 }
@@ -1936,10 +1952,6 @@ int kvm_vgic_create(struct kvm *kvm)
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 
-	ret = vgic_init_maps(kvm);
-	if (ret)
-		kvm_err("Unable to allocate maps\n");
-
 out_unlock:
 	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
 		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
@@ -2140,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 
 	mutex_lock(&dev->kvm->lock);
 
+	ret = vgic_init_maps(dev->kvm);
+	if (ret)
+		goto out;
+
 	if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
 		ret = -EINVAL;
 		goto out;

From e1fde0a1e746c70d026c1388c9d1c0bce278ae01 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:07 +0100
Subject: [PATCH 0711/1185] arm/arm64: KVM: vgic: make number of irqs a
 configurable attribute

In order to make the number of interrupts configurable, use the new
fancy device management API to add KVM_DEV_ARM_VGIC_GRP_NR_IRQS as
a VGIC configurable attribute.

Userspace can now specify the exact size of the GIC (by increments
of 32 interrupts).

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit a98f26f183801685ef57333de4bafd4bbc692c7c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 .../virtual/kvm/devices/arm-vgic.txt          | 10 +++++
 arch/arm/include/uapi/asm/kvm.h               |  1 +
 arch/arm64/include/uapi/asm/kvm.h             |  1 +
 virt/kvm/arm/vgic.c                           | 37 +++++++++++++++++++
 4 files changed, 49 insertions(+)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 7f4e91b1316b..df8b0c7540b6 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -71,3 +71,13 @@ Groups:
   Errors:
     -ENODEV: Getting or setting this register is not yet supported
     -EBUSY: One or more VCPUs are running
+
+  KVM_DEV_ARM_VGIC_GRP_NR_IRQS
+  Attributes:
+    A value describing the number of interrupts (SGI, PPI and SPI) for
+    this GIC instance, ranging from 64 to 1024, in increments of 32.
+
+  Errors:
+    -EINVAL: Value set is out of the expected range
+    -EBUSY: Value has already be set, or GIC has already been initialized
+            with default values.
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 51257fda254b..09ee408c1a67 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -174,6 +174,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index f4ec5a674d05..8e38878c87c6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -160,6 +160,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e7bca4bb7fd1..43b56c696752 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2253,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
 		return vgic_attr_regs_access(dev, attr, &reg, true);
 	}
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		u32 val;
+		int ret = 0;
+
+		if (get_user(val, uaddr))
+			return -EFAULT;
+
+		/*
+		 * We require:
+		 * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
+		 * - at most 1024 interrupts
+		 * - a multiple of 32 interrupts
+		 */
+		if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
+		    val > VGIC_MAX_IRQS ||
+		    (val & 31))
+			return -EINVAL;
+
+		mutex_lock(&dev->kvm->lock);
+
+		if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+			ret = -EBUSY;
+		else
+			dev->kvm->arch.vgic.nr_irqs = val;
+
+		mutex_unlock(&dev->kvm->lock);
+
+		return ret;
+	}
 
 	}
 
@@ -2289,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		r = put_user(reg, uaddr);
 		break;
 	}
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
+		break;
+	}
 
 	}
 
@@ -2325,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
 		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
 		return vgic_has_attr_regs(vgic_cpu_ranges, offset);
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+		return 0;
 	}
 	return -ENXIO;
 }

From b6c20297368fc6c782c0e5709f0d261bfdee2f6e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 25 Sep 2014 18:41:07 +0200
Subject: [PATCH 0712/1185] arm/arm64: KVM: Fix set_clear_sgi_pend_reg offset

The sgi values calculated in read_set_clear_sgi_pend_reg() and
write_set_clear_sgi_pend_reg() were horribly incorrectly multiplied by 4
with catastrophic results in that subfunctions ended up overwriting
memory not allocated for the expected purpose.

This showed up as bugs in kfree() and the kernel complaining a lot of
you turn on memory debugging.

This addresses: http://marc.info/?l=kvm&m=141164910007868&w=2

Reported-by: Shannon Zhao <zhaoshenglong@huawei.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 0fea6d7628ed6e25a9ee1b67edf7c859718d39e8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 43b56c696752..506693152e47 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -816,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int sgi;
-	int min_sgi = (offset & ~0x3) * 4;
+	int min_sgi = (offset & ~0x3);
 	int max_sgi = min_sgi + 3;
 	int vcpu_id = vcpu->vcpu_id;
 	u32 reg = 0;
@@ -837,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int sgi;
-	int min_sgi = (offset & ~0x3) * 4;
+	int min_sgi = (offset & ~0x3);
 	int max_sgi = min_sgi + 3;
 	int vcpu_id = vcpu->vcpu_id;
 	u32 reg;

From 17434ac66579bb31d6a129edb7dec7989b0a7a37 Mon Sep 17 00:00:00 2001
From: Joel Schopp <joel.schopp@amd.com>
Date: Wed, 9 Jul 2014 11:17:04 -0500
Subject: [PATCH 0713/1185] arm/arm64: KVM: Fix VTTBR_BADDR_MASK and pgd alloc

The current aarch64 calculation for VTTBR_BADDR_MASK masks only 39 bits
and not all the bits in the PA range. This is clearly a bug that
manifests itself on systems that allocate memory in the higher address
space range.

 [ Modified from Joel's original patch to be based on PHYS_MASK_SHIFT
   instead of a hard-coded value and to move the alignment check of the
   allocation to mmu.c.  Also added a comment explaining why we hardcode
   the IPA range and changed the stage-2 pgd allocation to be based on
   the 40 bit IPA range instead of the maximum possible 48 bit PA range.
   - Christoffer ]

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Joel Schopp <joel.schopp@amd.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit dbff124e29fa24aff9705b354b5f4648cd96e0bb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c               |  4 ++--
 arch/arm64/include/asm/kvm_arm.h | 13 ++++++++++++-
 arch/arm64/include/asm/kvm_mmu.h |  5 ++---
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 072a2084005c..15111ca6fe30 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -410,9 +410,9 @@ static void update_vttbr(struct kvm *kvm)
 
 	/* update vttbr to be used with the new vmid */
 	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
 	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
-	kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
-	kvm->arch.vttbr |= vmid;
+	kvm->arch.vttbr = pgd_phys | vmid;
 
 	spin_unlock(&kvm_vmid_lock);
 }
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520459ed..7fd3e27e3ccc 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -122,6 +122,17 @@
 #define VTCR_EL2_T0SZ_MASK	0x3f
 #define VTCR_EL2_T0SZ_40B	24
 
+/*
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together.
+ *
+ * The magic numbers used for VTTBR_X in this patch can be found in Tables
+ * D4-23 and D4-25 in ARM DDI 0487A.b.
+ */
 #ifdef CONFIG_ARM64_64K_PAGES
 /*
  * Stage2 translation configuration:
@@ -149,7 +160,7 @@
 #endif
 
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK  (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
 #define VTTBR_VMID_SHIFT  (48LLU)
 #define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 737da742b293..a030d163840b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -59,10 +59,9 @@
 #define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
 
 /*
- * Align KVM with the kernel's view of physical memory. Should be
- * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ * We currently only support a 40bit IPA.
  */
-#define KVM_PHYS_SHIFT	PHYS_MASK_SHIFT
+#define KVM_PHYS_SHIFT	(40)
 #define KVM_PHYS_SIZE	(1UL << KVM_PHYS_SHIFT)
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1UL)
 

From 7f79ef21e90a97b535ca0f181929754cd72bda10 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 26 Sep 2014 12:29:34 +0200
Subject: [PATCH 0714/1185] arm/arm64: KVM: Report correct FSC for unsupported
 fault types

When we catch something that's not a permission fault or a translation
fault, we log the unsupported FSC in the kernel log, but we were masking
off the bottom bits of the FSC which was not very helpful.

Also correctly report the FSC for data and instruction faults rather
than telling people it was a DFCS, which doesn't exist in the ARM ARM.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 0496daa5cf99741ce8db82686b4c7446a37feabb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   | 5 +++++
 arch/arm/kvm/mmu.c                   | 8 +++++---
 arch/arm64/include/asm/kvm_emulate.h | 5 +++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 69b746955fca..b9db269c6e61 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -148,6 +148,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
 }
 
 static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
 }
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index bb06f76a8f89..eea03069161b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -882,10 +882,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
 
 	/* Check the stage-2 fault is trans. fault or write fault */
-	fault_status = kvm_vcpu_trap_get_fault(vcpu);
+	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
-		kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
-			kvm_vcpu_trap_get_class(vcpu), fault_status);
+		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+			kvm_vcpu_trap_get_class(vcpu),
+			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+			(unsigned long)kvm_vcpu_get_hsr(vcpu));
 		return -EFAULT;
 	}
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fdc3e21abd8d..5674a55b5518 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -173,6 +173,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
 }
 
 static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
 }

From 2dc5e2cf90915059fbc39ef380605aef6adaef47 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 22 Sep 2014 15:52:48 +0100
Subject: [PATCH 0715/1185] arm: kvm: fix CPU hotplug

On some platforms with no power management capabilities, the hotplug
implementation is allowed to return from a smp_ops.cpu_die() call as a
function return. Upon a CPU onlining event, the KVM CPU notifier tries
to reinstall the hyp stub, which fails on platform where no reset took
place following a hotplug event, with the message:

CPU1: smp_ops.cpu_die() returned, trying to resuscitate
CPU1: Booted secondary processor
Kernel panic - not syncing: unexpected prefetch abort in Hyp mode at: 0x80409540
unexpected data abort in Hyp mode at: 0x80401fe8
unexpected HVC/SVC trap in Hyp mode at: 0x805c6170

since KVM code is trying to reinstall the stub on a system where it is
already configured.

To prevent this issue, this patch adds a check in the KVM hotplug
notifier that detects if the HYP stub really needs re-installing when a
CPU is onlined and skips the installation call if the stub is already in
place, which means that the CPU has not been reset.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 37a34ac1d4775aafbc73b9db53c7daebbbc67e6a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 15111ca6fe30..d0c8ee654bbf 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -808,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self,
 	switch (action) {
 	case CPU_STARTING:
 	case CPU_STARTING_FROZEN:
-		cpu_init_hyp_mode(NULL);
+		if (__hyp_get_vectors() == hyp_default_vectors)
+			cpu_init_hyp_mode(NULL);
 		break;
 	}
 

From ad4ef3e73e40166a585d71f36a521f9f6d5db3c4 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 2 Oct 2014 11:53:28 +0200
Subject: [PATCH 0716/1185] Revert "arm, kvm: fix double lock on
 cpu_add_remove_lock"

This reverts commit d77503eadd2f16f2900b9be79a1dc6f37e8cd579.  The whole
register cpu hotplug fix series has not been applied, so LSK is released
without this fix.  If we ever include that series in LSK later, then
this can be fixed later too.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 553f809e23f00976caea7a1ebdabaa58a6383e7d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c | 2 +-
 virt/kvm/arm/vgic.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 22fa819a9b6a..5081e809821f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -277,7 +277,7 @@ int kvm_timer_hyp_init(void)
 
 	host_vtimer_irq = ppi;
 
-	err = __register_cpu_notifier(&kvm_timer_cpu_nb);
+	err = register_cpu_notifier(&kvm_timer_cpu_nb);
 	if (err) {
 		kvm_err("Cannot register timer CPU notifier\n");
 		goto out_free;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 506693152e47..8e1dc03342c3 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2444,7 +2444,7 @@ int kvm_vgic_hyp_init(void)
 		return ret;
 	}
 
-	ret = __register_cpu_notifier(&vgic_cpu_nb);
+	ret = register_cpu_notifier(&vgic_cpu_nb);
 	if (ret) {
 		kvm_err("Cannot register vgic CPU notifier\n");
 		goto out_free_irq;

From 7ebda0194ff1872ef36d93cf309cc4043ffba4ee Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 3 Jul 2013 15:02:11 -0700
Subject: [PATCH 0717/1185] include/linux/mm.h: add PAGE_ALIGNED() helper

To test whether an address is aligned to PAGE_SIZE.

Cc: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(cherry picked from commit 0fa73b86ef0797ca4fde5334117ca0b330f08030)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/mm.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e0c8528a41a4..f42c5baa47cc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -52,6 +52,9 @@ extern unsigned long sysctl_admin_reserve_kbytes;
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
 
+/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */
+#define PAGE_ALIGNED(addr)	IS_ALIGNED((unsigned long)addr, PAGE_SIZE)
+
 /*
  * Linux kernel virtual memory manager primitives.
  * The idea being to have a "virtual" mm in the same way

From 8032ebeefc762abdcc8f14f08e9e1d3839996063 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Tue, 25 Feb 2014 08:41:09 +0100
Subject: [PATCH 0718/1185] ARM: 7990/1: asm: rename logical shift macros push
 pull into lspush lspull

Renames logical shift macros, 'push' and 'pull', defined in
arch/arm/include/asm/assembler.h, into 'lspush' and 'lspull'.
That eliminates name conflict between 'push' logical shift macro
and 'push' instruction mnemonic. That allows assembler.h to be
included in .S files that use 'push' instruction.

Suggested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
(cherry picked from commit d98b90ea22b0a28d9d787769704a9cf1ea5a513a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/assembler.h      |   8 +-
 arch/arm/lib/copy_template.S          |  36 ++---
 arch/arm/lib/csumpartialcopygeneric.S |  96 ++++++-------
 arch/arm/lib/io-readsl.S              |  12 +-
 arch/arm/lib/io-writesl.S             |  12 +-
 arch/arm/lib/memmove.S                |  36 ++---
 arch/arm/lib/uaccess.S                | 192 +++++++++++++-------------
 7 files changed, 196 insertions(+), 196 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 05ee9eebad6b..9271457adac8 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -30,8 +30,8 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define lspull          lsr
+#define lspush          lsl
 #define get_byte_0      lsl #0
 #define get_byte_1	lsr #8
 #define get_byte_2	lsr #16
@@ -41,8 +41,8 @@
 #define put_byte_2	lsl #16
 #define put_byte_3	lsl #24
 #else
-#define pull            lsl
-#define push            lsr
+#define lspull          lsl
+#define lspush          lsr
 #define get_byte_0	lsr #24
 #define get_byte_1	lsr #16
 #define get_byte_2	lsr #8
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8fb007..3bc8eb811a73 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -197,24 +197,24 @@
 
 12:	PLD(	pld	[r1, #124]		)
 13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, pull #\pull
+		mov	r3, lr, lspull #\pull
 		subs	r2, r2, #32
 		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
+		orr	r3, r3, r4, lspush #\push
+		mov	r4, r4, lspull #\pull
+		orr	r4, r4, r5, lspush #\push
+		mov	r5, r5, lspull #\pull
+		orr	r5, r5, r6, lspush #\push
+		mov	r6, r6, lspull #\pull
+		orr	r6, r6, r7, lspush #\push
+		mov	r7, r7, lspull #\pull
+		orr	r7, r7, r8, lspush #\push
+		mov	r8, r8, lspull #\pull
+		orr	r8, r8, r9, lspush #\push
+		mov	r9, r9, lspull #\pull
+		orr	r9, r9, ip, lspush #\push
+		mov	ip, ip, lspull #\pull
+		orr	ip, ip, lr, lspush #\push
 		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -225,10 +225,10 @@
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, lspull #\pull
 		ldr1w	r1, lr, abort=21f
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, lspush #\push
 		str1w	r0, r3, abort=21f
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d620a5f22a09..d6e742d24007 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -141,7 +141,7 @@ FN_ENTRY
 		tst	len, #2
 		mov	r5, r4, get_byte_0
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
@@ -171,23 +171,23 @@ FN_ENTRY
 		cmp	ip, #2
 		beq	.Lsrc2_aligned
 		bhi	.Lsrc3_aligned
-		mov	r4, r5, pull #8		@ C = 0
+		mov	r4, r5, lspull #8		@ C = 0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		mov	r7, r7, pull #8
-		orr	r7, r7, r8, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
+		mov	r7, r7, lspull #8
+		orr	r7, r7, r8, lspush #24
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #8
+		mov	r4, r8, lspull #8
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -196,50 +196,50 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #8
+		mov	r4, r6, lspull #8
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #24
+		orr	r4, r4, r5, lspush #24
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #8
+		mov	r4, r5, lspull #8
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
 		tst	len, #2
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_2
 		b	.Lexit
 
-.Lsrc2_aligned:	mov	r4, r5, pull #16
+.Lsrc2_aligned:	mov	r4, r5, lspull #16
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		mov	r7, r7, pull #16
-		orr	r7, r7, r8, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
+		mov	r7, r7, lspull #16
+		orr	r7, r7, r8, lspush #16
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #16
+		mov	r4, r8, lspull #16
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -248,20 +248,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #16
+		mov	r4, r6, lspull #16
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #16
+		orr	r4, r4, r5, lspush #16
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #16
+		mov	r4, r5, lspull #16
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -276,24 +276,24 @@ FN_ENTRY
 		load1b	r5
 		b	.Lexit
 
-.Lsrc3_aligned:	mov	r4, r5, pull #24
+.Lsrc3_aligned:	mov	r4, r5, lspull #24
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		mov	r7, r7, pull #24
-		orr	r7, r7, r8, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
+		mov	r7, r7, lspull #24
+		orr	r7, r7, r8, lspush #8
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #24
+		mov	r4, r8, lspull #24
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -302,20 +302,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #24
+		mov	r4, r6, lspull #24
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #8
+		orr	r4, r4, r5, lspush #8
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #24
+		mov	r4, r5, lspull #24
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -326,7 +326,7 @@ FN_ENTRY
 		load1l	r4
 		mov	r5, r4, get_byte_0
 		strb	r5, [dst], #1
-		adcs	sum, sum, r4, push #24
+		adcs	sum, sum, r4, lspush #24
 		mov	r5, r4, get_byte_1
 		b	.Lexit
 FN_EXIT
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 5fb97e7f9f4b..7a7430950c79 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -47,25 +47,25 @@ ENTRY(__raw_readsl)
 		strb	ip, [r1], #1
 
 4:		subs	r2, r2, #1
-		mov	ip, r3, pull #24
+		mov	ip, r3, lspull #24
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #8
+		orrne	ip, ip, r3, lspush #8
 		strne	ip, [r1], #4
 		bne	4b
 		b	8f
 
 5:		subs	r2, r2, #1
-		mov	ip, r3, pull #16
+		mov	ip, r3, lspull #16
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #16
+		orrne	ip, ip, r3, lspush #16
 		strne	ip, [r1], #4
 		bne	5b
 		b	7f
 
 6:		subs	r2, r2, #1
-		mov	ip, r3, pull #8
+		mov	ip, r3, lspull #8
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #24
+		orrne	ip, ip, r3, lspush #24
 		strne	ip, [r1], #4
 		bne	6b
 
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index 8d3b7813725c..d0d104a0dd11 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -41,26 +41,26 @@ ENTRY(__raw_writesl)
 		blt	5f
 		bgt	6f
 
-4:		mov	ip, r3, pull #16
+4:		mov	ip, r3, lspull #16
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #16
+		orr	ip, ip, r3, lspush #16
 		str	ip, [r0]
 		bne	4b
 		mov	pc, lr
 
-5:		mov	ip, r3, pull #8
+5:		mov	ip, r3, lspull #8
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #24
+		orr	ip, ip, r3, lspush #24
 		str	ip, [r0]
 		bne	5b
 		mov	pc, lr
 
-6:		mov	ip, r3, pull #24
+6:		mov	ip, r3, lspull #24
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #8
+		orr	ip, ip, r3, lspush #8
 		str	ip, [r0]
 		bne	6b
 		mov	pc, lr
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 938fc14f962d..d1fc0c0c342c 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -147,24 +147,24 @@ ENTRY(memmove)
 
 12:	PLD(	pld	[r1, #-128]		)
 13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
+		mov     lr, r3, lspush #\push
 		subs    r2, r2, #32
 		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
+		orr     lr, lr, ip, lspull #\pull
+		mov     ip, ip, lspush #\push
+		orr     ip, ip, r9, lspull #\pull
+		mov     r9, r9, lspush #\push
+		orr     r9, r9, r8, lspull #\pull
+		mov     r8, r8, lspush #\push
+		orr     r8, r8, r7, lspull #\pull
+		mov     r7, r7, lspush #\push
+		orr     r7, r7, r6, lspull #\pull
+		mov     r6, r6, lspush #\push
+		orr     r6, r6, r5, lspull #\pull
+		mov     r5, r5, lspush #\push
+		orr     r5, r5, r4, lspull #\pull
+		mov     r4, r4, lspush #\push
+		orr     r4, r4, r3, lspull #\pull
 		stmdb   r0!, {r4 - r9, ip, lr}
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -175,10 +175,10 @@ ENTRY(memmove)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov     lr, r3, push #\push
+15:		mov     lr, r3, lspush #\push
 		ldr	r3, [r1, #-4]!
 		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
+		orr	lr, lr, r3, lspull #\pull
 		str	lr, [r0, #-4]!
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index 5c908b1cb8ed..e50520904b76 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -117,9 +117,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -131,30 +131,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_1rem8lp
 
-.Lc2u_1cpy8lp:	mov	r3, r7, pull #8
+.Lc2u_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_1cpy8lp
 
 .Lc2u_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_1fupi
@@ -172,9 +172,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -186,30 +186,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_2rem8lp
 
-.Lc2u_2cpy8lp:	mov	r3, r7, pull #16
+.Lc2u_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_2cpy8lp
 
 .Lc2u_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_2fupi
@@ -227,9 +227,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -241,30 +241,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_3rem8lp
 
-.Lc2u_3cpy8lp:	mov	r3, r7, pull #24
+.Lc2u_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_3cpy8lp
 
 .Lc2u_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_3fupi
@@ -382,9 +382,9 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 .Lcfu_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -396,30 +396,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lcfu_1rem8lp
 
-.Lcfu_1cpy8lp:	mov	r3, r7, pull #8
+.Lcfu_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_1cpy8lp
 
 .Lcfu_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_1fupi
@@ -437,9 +437,9 @@ USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
 .Lcfu_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -452,30 +452,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		blt	.Lcfu_2rem8lp
 
 
-.Lcfu_2cpy8lp:	mov	r3, r7, pull #16
+.Lcfu_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_2cpy8lp
 
 .Lcfu_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_2fupi
@@ -493,9 +493,9 @@ USER(	TUSER(	ldrgtb) r3, [r1], #0)			@ May fault
 .Lcfu_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -507,30 +507,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lcfu_3rem8lp
 
-.Lcfu_3cpy8lp:	mov	r3, r7, pull #24
+.Lcfu_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}
 		subs	ip, ip, #16
 		bpl	.Lcfu_3cpy8lp
 
 .Lcfu_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_3fupi

From 0db5306859a69c38c695400c34bde7b7df5f1806 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 10 May 2013 18:07:19 +0100
Subject: [PATCH 0719/1185] ARM: barrier: allow options to be passed to memory
 barrier instructions

On ARMv7, the memory barrier instructions take an optional `option'
field which can be used to constrain the effects of a memory barrier
based on shareability and access type.

This patch allows the caller to pass these options if required, and
updates the smp_*() barriers to request inner-shareable barriers,
affecting only stores for the _wmb variant. wmb() is also changed to
use the -st version of dsb.

Reported-by: Albin Tonnerre <albin.tonnerre@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 3ea128065ed20d33bd02ff6dab689f88e38000be)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/assembler.h |  4 ++--
 arch/arm/include/asm/barrier.h   | 32 ++++++++++++++++----------------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 9271457adac8..cab788045029 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -212,9 +212,9 @@
 #ifdef CONFIG_SMP
 #if __LINUX_ARM_ARCH__ >= 7
 	.ifeqs "\mode","arm"
-	ALT_SMP(dmb)
+	ALT_SMP(dmb	ish)
 	.else
-	ALT_SMP(W(dmb))
+	ALT_SMP(W(dmb)	ish)
 	.endif
 #elif __LINUX_ARM_ARCH__ == 6
 	ALT_SMP(mcr	p15, 0, r0, c7, c10, 5)	@ dmb
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 8dcd9c702d90..60f15e274e6d 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -14,27 +14,27 @@
 #endif
 
 #if __LINUX_ARM_ARCH__ >= 7
-#define isb() __asm__ __volatile__ ("isb" : : : "memory")
-#define dsb() __asm__ __volatile__ ("dsb" : : : "memory")
-#define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
+#define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory")
+#define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory")
+#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
 #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6
-#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
 				    : : "r" (0) : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
+#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
 				    : : "r" (0) : "memory")
 #elif defined(CONFIG_CPU_FA526)
-#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
 				    : : "r" (0) : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
+#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #else
-#define isb() __asm__ __volatile__ ("" : : : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define isb(x) __asm__ __volatile__ ("" : : : "memory")
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
+#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #endif
 
 #ifdef CONFIG_ARCH_HAS_BARRIERS
@@ -42,7 +42,7 @@
 #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
 #define mb()		do { dsb(); outer_sync(); } while (0)
 #define rmb()		dsb()
-#define wmb()		mb()
+#define wmb()		do { dsb(st); outer_sync(); } while (0)
 #else
 #define mb()		barrier()
 #define rmb()		barrier()
@@ -54,9 +54,9 @@
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #else
-#define smp_mb()	dmb()
-#define smp_rmb()	dmb()
-#define smp_wmb()	dmb()
+#define smp_mb()	dmb(ish)
+#define smp_rmb()	smp_mb()
+#define smp_wmb()	dmb(ishst)
 #endif
 
 #define read_barrier_depends()		do { } while(0)

From 63d832d826b75fec2ed062e5a5dc107c5a5db158 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Wed, 31 Jul 2013 12:44:42 -0400
Subject: [PATCH 0720/1185] ARM: mm: Introduce virt_to_idmap() with an arch
 hook

On some PAE systems (e.g. TI Keystone), memory is above the
32-bit addressable limit, and the interconnect provides an
aliased view of parts of physical memory in the 32-bit addressable
space.  This alias is strictly for boot time usage, and is not
otherwise usable because of coherency limitations. On such systems,
the idmap mechanism needs to take this aliased mapping into account.

This patch introduces virt_to_idmap() and a arch function pointer which
can be populated by platform which needs it. Also populate necessary
idmap spots with now available virt_to_idmap(). Avoided #ifdef approach
to be compatible with multi-platform builds.

Most architecture won't touch it and in that case virt_to_idmap()
fall-back to existing virt_to_phys() macro.

Cc: Russell King <linux@arm.linux.org.uk>

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
(cherry picked from commit 4dc9a81715973cb137a14399420bb35b0ed7d6ef)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/memory.h | 16 ++++++++++++++++
 arch/arm/kernel/smp.c         |  4 ++--
 arch/arm/mm/idmap.c           |  5 +++--
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 57870ab313c5..21b458e6b0b8 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -157,6 +157,7 @@
  */
 #define __PV_BITS_31_24	0x81000000
 
+extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
 extern unsigned long __pv_phys_offset;
 #define PHYS_OFFSET __pv_phys_offset
 
@@ -232,6 +233,21 @@ static inline void *phys_to_virt(phys_addr_t x)
 #define __va(x)			((void *)__phys_to_virt((unsigned long)(x)))
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
 
+/*
+ * These are for systems that have a hardware interconnect supported alias of
+ * physical memory for idmap purposes.  Most cases should leave these
+ * untouched.
+ */
+static inline phys_addr_t __virt_to_idmap(unsigned long x)
+{
+	if (arch_virt_to_idmap)
+		return arch_virt_to_idmap(x);
+	else
+		return __virt_to_phys(x);
+}
+
+#define virt_to_idmap(x)	__virt_to_idmap((unsigned long)(x))
+
 /*
  * Virtual <-> DMA view memory address translations
  * Again, these are *only* valid on the kernel direct mapped RAM
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5919eb451bb9..5a8ad2c8eda0 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -87,8 +87,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
-	secondary_data.pgdir = virt_to_phys(idmap_pgd);
-	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+	secondary_data.pgdir = virt_to_idmap(idmap_pgd);
+	secondary_data.swapper_pg_dir = virt_to_idmap(swapper_pg_dir);
 	__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
 
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 83cb3ac27095..c0a1e48f6733 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -10,6 +10,7 @@
 #include <asm/system_info.h>
 
 pgd_t *idmap_pgd;
+phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
 
 #ifdef CONFIG_ARM_LPAE
 static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
@@ -67,8 +68,8 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start,
 	unsigned long addr, end;
 	unsigned long next;
 
-	addr = virt_to_phys(text_start);
-	end = virt_to_phys(text_end);
+	addr = virt_to_idmap(text_start);
+	end = virt_to_idmap(text_end);
 
 	prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
 

From 6ea0cfc5fd25e82d2bdb101cff47c86c30200927 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@linaro.org>
Date: Fri, 17 May 2013 12:32:55 +0100
Subject: [PATCH 0721/1185] ARM: mm: correct pte_same behaviour for LPAE.

For 3 levels of paging the PTE_EXT_NG bit will be set for user
address ptes that are written to a page table but not for ptes
created with mk_pte.

This can cause some comparison tests made by pte_same to fail
spuriously and lead to other problems.

To correct this behaviour, we mask off PTE_EXT_NG for any pte that
is present before running the comparison.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit dde1b65110353517816bcbc58539463396202244)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/pgtable-3level.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 86b8fe398b95..70f041cb50d1 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -166,6 +166,23 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 		clean_pmd_entry(pmdp);	\
 	} while (0)
 
+/*
+ * For 3 levels of paging the PTE_EXT_NG bit will be set for user address ptes
+ * that are written to a page table but not for ptes created with mk_pte.
+ *
+ * In hugetlb_no_page, a new huge pte (new_pte) is generated and passed to
+ * hugetlb_cow, where it is compared with an entry in a page table.
+ * This comparison test fails erroneously leading ultimately to a memory leak.
+ *
+ * To correct this behaviour, we mask off PTE_EXT_NG for any pte that is
+ * present before running the comparison.
+ */
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(pte_a,pte_b)	((pte_present(pte_a) ? pte_val(pte_a) & ~PTE_EXT_NG	\
+					: pte_val(pte_a))				\
+				== (pte_present(pte_b) ? pte_val(pte_b) & ~PTE_EXT_NG	\
+					: pte_val(pte_b)))
+
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext)))
 
 #endif /* __ASSEMBLY__ */

From 6b7d79d0df097d917ab0cd4f78c86938d8652cc7 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@linaro.org>
Date: Fri, 17 May 2013 12:33:28 +0100
Subject: [PATCH 0722/1185] ARM: mm: Add support for flushing HugeTLB pages.

On ARM we use the __flush_dcache_page function to flush the dcache
of pages when needed; usually when the PG_dcache_clean bit is unset
and we are setting a PTE.

A HugeTLB page is represented as a compound page consisting of an
array of pages. Thus to flush the dcache of a HugeTLB page, one must
flush more than a single page.

This patch modifies __flush_dcache_page such that all constituent
pages of a HugeTLB page are flushed.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 0b19f93351dd68cb68a1a5b2d74e13d2ddfcfc64)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/mm/flush.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 32aa5861119f..c9e37aac450b 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -17,6 +17,7 @@
 #include <asm/highmem.h>
 #include <asm/smp_plat.h>
 #include <asm/tlbflush.h>
+#include <linux/hugetlb.h>
 
 #include "mm.h"
 
@@ -168,19 +169,23 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
 	 * coherent with the kernels mapping.
 	 */
 	if (!PageHighMem(page)) {
-		__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
+		size_t page_size = PAGE_SIZE << compound_order(page);
+		__cpuc_flush_dcache_area(page_address(page), page_size);
 	} else {
-		void *addr;
-
+		unsigned long i;
 		if (cache_is_vipt_nonaliasing()) {
-			addr = kmap_atomic(page);
-			__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-			kunmap_atomic(addr);
-		} else {
-			addr = kmap_high_get(page);
-			if (addr) {
+			for (i = 0; i < (1 << compound_order(page)); i++) {
+				void *addr = kmap_atomic(page);
 				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-				kunmap_high(page);
+				kunmap_atomic(addr);
+			}
+		} else {
+			for (i = 0; i < (1 << compound_order(page)); i++) {
+				void *addr = kmap_high_get(page);
+				if (addr) {
+					__cpuc_flush_dcache_area(addr, PAGE_SIZE);
+					kunmap_high(page);
+				}
 			}
 		}
 	}

From b76e94dbf7d662625f45dc874e4aa60f3dd1ef70 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 25 Jul 2012 14:32:38 +0100
Subject: [PATCH 0723/1185] ARM: mm: HugeTLB support for LPAE systems.

This patch adds support for hugetlbfs based on the x86 implementation.
It allows mapping of 2MB sections (see Documentation/vm/hugetlbpage.txt
for usage). The 64K pages configuration is not supported (section size
is 512MB in this case).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[steve.capper@linaro.org: symbolic constants replace numbers in places.
Split up into multiple files, to simplify future non-LPAE support,
removed huge_pmd_share code, as this is very rarely executed,
Added PROT_NONE support].
Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>

(cherry picked from commit 1355e2a6eb88f04d76125c057dc5fca64d4b6a9e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/Kconfig                            |   4 +
 arch/arm/include/asm/hugetlb-3level.h       |  71 ++++++++++++++
 arch/arm/include/asm/hugetlb.h              |  84 ++++++++++++++++
 arch/arm/include/asm/pgtable-3level-hwdef.h |   2 +
 arch/arm/include/asm/pgtable-3level.h       |  11 +++
 arch/arm/mm/Makefile                        |   1 +
 arch/arm/mm/dma-mapping.c                   |   2 +-
 arch/arm/mm/fsr-3level.c                    |   2 +-
 arch/arm/mm/hugetlbpage.c                   | 101 ++++++++++++++++++++
 9 files changed, 276 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/include/asm/hugetlb-3level.h
 create mode 100644 arch/arm/include/asm/hugetlb.h
 create mode 100644 arch/arm/mm/hugetlbpage.c

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 136f263ed47b..3d232df071c2 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1731,6 +1731,10 @@ config HW_PERF_EVENTS
 	  Enable hardware performance counter support for perf events. If
 	  disabled, perf events will use software events only.
 
+config SYS_SUPPORTS_HUGETLBFS
+       def_bool y
+       depends on ARM_LPAE
+
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h
new file mode 100644
index 000000000000..d4014fbe5ea3
--- /dev/null
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/include/asm/hugetlb-3level.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARM_HUGETLB_3LEVEL_H
+#define _ASM_ARM_HUGETLB_3LEVEL_H
+
+
+/*
+ * If our huge pte is non-zero then mark the valid bit.
+ * This allows pte_present(huge_ptep_get(ptep)) to return true for non-zero
+ * ptes.
+ * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes).
+ */
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	pte_t retval = *ptep;
+	if (pte_val(retval))
+		pte_val(retval) |= L_PTE_VALID;
+	return retval;
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+#endif /* _ASM_ARM_HUGETLB_3LEVEL_H */
diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h
new file mode 100644
index 000000000000..1f1b1cd112f3
--- /dev/null
+++ b/arch/arm/include/asm/hugetlb.h
@@ -0,0 +1,84 @@
+/*
+ * arch/arm/include/asm/hugetlb.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARM_HUGETLB_H
+#define _ASM_ARM_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
+
+#include <asm/hugetlb-3level.h>
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr, unsigned long len)
+{
+	return 0;
+}
+
+static inline int prepare_hugepage_range(struct file *file,
+					 unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+
+#endif /* _ASM_ARM_HUGETLB_H */
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 18f5cef82ad5..42df407ee3e3 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -30,6 +30,7 @@
 #define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
 #define PMD_TYPE_TABLE		(_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 1) << 0)
+#define PMD_TABLE_BIT		(_AT(pmdval_t, 1) << 1)
 #define PMD_BIT4		(_AT(pmdval_t, 0))
 #define PMD_DOMAIN(x)		(_AT(pmdval_t, 0))
 #define PMD_APTABLE_SHIFT	(61)
@@ -66,6 +67,7 @@
 #define PTE_TYPE_MASK		(_AT(pteval_t, 3) << 0)
 #define PTE_TYPE_FAULT		(_AT(pteval_t, 0) << 0)
 #define PTE_TYPE_PAGE		(_AT(pteval_t, 3) << 0)
+#define PTE_TABLE_BIT		(_AT(pteval_t, 1) << 1)
 #define PTE_BUFFERABLE		(_AT(pteval_t, 1) << 2)		/* AttrIndx[0] */
 #define PTE_CACHEABLE		(_AT(pteval_t, 1) << 3)		/* AttrIndx[1] */
 #define PTE_EXT_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 70f041cb50d1..d1bcd8226cb1 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -61,6 +61,14 @@
 
 #define USER_PTRS_PER_PGD	(PAGE_OFFSET / PGDIR_SIZE)
 
+/*
+ * Hugetlb definitions.
+ */
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
 /*
  * "Linux" PTE definitions for LPAE.
  *
@@ -185,6 +193,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext)))
 
+#define pte_huge(pte)		(pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_PGTABLE_3LEVEL_H */
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 9e51be96f635..224a9cc09877 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES)		+= proc-syms.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 
 obj-$(CONFIG_CPU_ABRT_NOMMU)	+= abort-nommu.o
 obj-$(CONFIG_CPU_ABRT_EV4)	+= abort-ev4.o
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ef3e0f3aac96..9674476a75dc 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -250,7 +250,7 @@ static void __dma_free_buffer(struct page *page, size_t size)
 
 #ifdef CONFIG_MMU
 #ifdef CONFIG_HUGETLB_PAGE
-#error ARM Coherent DMA allocator does not (yet) support huge TLB
+#warning ARM Coherent DMA allocator does not (yet) support huge TLB
 #endif
 
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c
index 05a4e9431836..e115fc7a69bd 100644
--- a/arch/arm/mm/fsr-3level.c
+++ b/arch/arm/mm/fsr-3level.c
@@ -13,7 +13,7 @@ static struct fsr_info fsr_info[] = {
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
 	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
-	{ do_sect_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
 	{ do_bad,		SIGBUS,  0,		"synchronous external abort"	},
 	{ do_bad,		SIGBUS,  0,		"asynchronous external abort"	},
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
new file mode 100644
index 000000000000..3d1e4a205b0b
--- /dev/null
+++ b/arch/arm/mm/hugetlbpage.c
@@ -0,0 +1,101 @@
+/*
+ * arch/arm/mm/hugetlbpage.c
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+/*
+ * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot
+ * of type casting from pmd_t * to pte_t *.
+ */
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_present(*pgd)) {
+		pud = pud_offset(pgd, addr);
+		if (pud_present(*pud))
+			pmd = pmd_offset(pud, addr);
+	}
+
+	return (pte_t *)pmd;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (pud)
+		pte = (pte_t *)pmd_alloc(mm, pud, addr);
+
+	return pte;
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pmd);
+	if (page)
+		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
+	return page;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
+}

From 3b132a8c46e7d0a95ceb75098aeeb06958fa7670 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 25 Jul 2012 14:39:26 +0100
Subject: [PATCH 0724/1185] ARM: mm: Transparent huge page support for LPAE
 systems.

The patch adds support for THP (transparent huge pages) to LPAE
systems. When this feature is enabled, the kernel tries to map
anonymous pages as 2MB sections where possible.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[steve.capper@linaro.org: symbolic constants used, value of
PMD_SECT_SPLITTING adjusted, tlbflush.h included in pgtable.h,
added PROT_NONE support.]
Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>

(cherry picked from commit 8d962507007357d6fbbcbdd1647faa389a9aed6d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/Kconfig                            |  4 ++
 arch/arm/include/asm/pgtable-3level-hwdef.h |  2 +
 arch/arm/include/asm/pgtable-3level.h       | 60 +++++++++++++++++++++
 arch/arm/include/asm/pgtable.h              |  3 ++
 arch/arm/include/asm/tlb.h                  |  6 +++
 arch/arm/include/asm/tlbflush.h             |  2 +
 arch/arm/mm/fsr-3level.c                    |  2 +-
 7 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 3d232df071c2..f6c0bc429513 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1735,6 +1735,10 @@ config SYS_SUPPORTS_HUGETLBFS
        def_bool y
        depends on ARM_LPAE
 
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       def_bool y
+       depends on ARM_LPAE
+
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 42df407ee3e3..f088c864c992 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -42,6 +42,8 @@
  */
 #define PMD_SECT_BUFFERABLE	(_AT(pmdval_t, 1) << 2)
 #define PMD_SECT_CACHEABLE	(_AT(pmdval_t, 1) << 3)
+#define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
+#define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_nG		(_AT(pmdval_t, 1) << 11)
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index d1bcd8226cb1..54733e5ef7a1 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -87,6 +87,11 @@
 #define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 56)	/* unused */
 #define L_PTE_NONE		(_AT(pteval_t, 1) << 57)	/* PROT_NONE */
 
+#define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
+#define PMD_SECT_DIRTY		(_AT(pmdval_t, 1) << 55)
+#define PMD_SECT_SPLITTING	(_AT(pmdval_t, 1) << 56)
+#define PMD_SECT_NONE		(_AT(pmdval_t, 1) << 57)
+
 /*
  * To be used in assembly code with the upper page attributes.
  */
@@ -196,6 +201,61 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 #define pte_huge(pte)		(pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
 #define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
 
+#define pmd_young(pmd)		(pmd_val(pmd) & PMD_SECT_AF)
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		(!(pmd_val(pmd) & PMD_SECT_RDONLY))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#endif
+
+#define PMD_BIT_FUNC(fn,op) \
+static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
+
+PMD_BIT_FUNC(wrprotect,	|= PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkold,	&= ~PMD_SECT_AF);
+PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
+PMD_BIT_FUNC(mkwrite,   &= ~PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkdirty,   |= PMD_SECT_DIRTY);
+PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
+
+#define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+
+#define pmd_pfn(pmd)		(((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */
+#define pmd_mknotpresent(pmd)	(__pmd(0))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY |
+				PMD_SECT_VALID | PMD_SECT_NONE;
+	pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
+	return pmd;
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
+{
+	BUG_ON(addr >= TASK_SIZE);
+
+	/* create a faulting entry if PROT_NONE protected */
+	if (pmd_val(pmd) & PMD_SECT_NONE)
+		pmd_val(pmd) &= ~PMD_SECT_VALID;
+
+	*pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
+	flush_pmd_entry(pmdp);
+}
+
+static inline int has_transparent_hugepage(void)
+{
+	return 1;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_PGTABLE_3LEVEL_H */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 9bcd262a9008..eaedce7b7e3a 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -24,6 +24,9 @@
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
 
+
+#include <asm/tlbflush.h>
+
 #ifdef CONFIG_ARM_LPAE
 #include <asm/pgtable-3level.h>
 #else
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index bdf2b8458ec1..46e7cfb3e721 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -204,6 +204,12 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 #endif
 }
 
+static inline void
+tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
+{
+	tlb_add_flush(tlb, addr);
+}
+
 #define pte_free_tlb(tlb, ptep, addr)	__pte_free_tlb(tlb, ptep, addr)
 #define pmd_free_tlb(tlb, pmdp, addr)	__pmd_free_tlb(tlb, pmdp, addr)
 #define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index a3625d141c1d..c37459299fc9 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -535,6 +535,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 }
 #endif
 
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
 #endif
 
 #endif /* CONFIG_MMU */
diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c
index e115fc7a69bd..ab4409a2307e 100644
--- a/arch/arm/mm/fsr-3level.c
+++ b/arch/arm/mm/fsr-3level.c
@@ -9,7 +9,7 @@ static struct fsr_info fsr_info[] = {
 	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved access flag fault"	},
 	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
-	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
 	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},

From aacf6b4861e74ecee74f6641a7ee2bc879f6f769 Mon Sep 17 00:00:00 2001
From: Ronald Wahl <ronald.wahl@raritan.com>
Date: Thu, 7 Aug 2014 14:15:50 +0200
Subject: [PATCH 0725/1185] carl9170: fix sending URBs with wrong type when
 using full-speed

commit 671796dd96b6cd85b75fba9d3007bcf7e5f7c309 upstream.

The driver assumes that endpoint 4 is always an interrupt endpoint.
Unfortunately the type differs between high-speed and full-speed
configurations while in the former case it is indeed an interrupt
endpoint this is not true for the latter case - here it is a bulk
endpoint. When sending URBs with the wrong type the kernel will
generate a warning message including backtrace. In this specific
case there will be a huge amount of warnings which can bring the system
to freeze.

To fix this we are now sending URBs to endpoint 4 using the type
found in the endpoint descriptor.

A side note: The carl9170 firmware currently specifies endpoint 4 as
interrupt endpoint even in the full-speed configuration but this has
no relevance because before this firmware is loaded the endpoint type
is as described above and after the firmware is running the stick is not
reenumerated and so the old descriptor is used.

Signed-off-by: Ronald Wahl <ronald.wahl@raritan.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/carl9170/carl9170.h |  1 +
 drivers/net/wireless/ath/carl9170/usb.c      | 31 +++++++++++++++++---
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/carl9170/carl9170.h b/drivers/net/wireless/ath/carl9170/carl9170.h
index 9dce106cd6d4..95a334f0719c 100644
--- a/drivers/net/wireless/ath/carl9170/carl9170.h
+++ b/drivers/net/wireless/ath/carl9170/carl9170.h
@@ -253,6 +253,7 @@ struct ar9170 {
 	atomic_t rx_work_urbs;
 	atomic_t rx_pool_urbs;
 	kernel_ulong_t features;
+	bool usb_ep_cmd_is_bulk;
 
 	/* firmware settings */
 	struct completion fw_load_wait;
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index 307bc0ddff99..83d20c8b2ad7 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -621,9 +621,16 @@ int __carl9170_exec_cmd(struct ar9170 *ar, struct carl9170_cmd *cmd,
 		goto err_free;
 	}
 
-	usb_fill_int_urb(urb, ar->udev, usb_sndintpipe(ar->udev,
-		AR9170_USB_EP_CMD), cmd, cmd->hdr.len + 4,
-		carl9170_usb_cmd_complete, ar, 1);
+	if (ar->usb_ep_cmd_is_bulk)
+		usb_fill_bulk_urb(urb, ar->udev,
+				  usb_sndbulkpipe(ar->udev, AR9170_USB_EP_CMD),
+				  cmd, cmd->hdr.len + 4,
+				  carl9170_usb_cmd_complete, ar);
+	else
+		usb_fill_int_urb(urb, ar->udev,
+				 usb_sndintpipe(ar->udev, AR9170_USB_EP_CMD),
+				 cmd, cmd->hdr.len + 4,
+				 carl9170_usb_cmd_complete, ar, 1);
 
 	if (free_buf)
 		urb->transfer_flags |= URB_FREE_BUFFER;
@@ -1032,9 +1039,10 @@ static void carl9170_usb_firmware_step2(const struct firmware *fw,
 static int carl9170_usb_probe(struct usb_interface *intf,
 			      const struct usb_device_id *id)
 {
+	struct usb_endpoint_descriptor *ep;
 	struct ar9170 *ar;
 	struct usb_device *udev;
-	int err;
+	int i, err;
 
 	err = usb_reset_device(interface_to_usbdev(intf));
 	if (err)
@@ -1050,6 +1058,21 @@ static int carl9170_usb_probe(struct usb_interface *intf,
 	ar->intf = intf;
 	ar->features = id->driver_info;
 
+	/* We need to remember the type of endpoint 4 because it differs
+	 * between high- and full-speed configuration. The high-speed
+	 * configuration specifies it as interrupt and the full-speed
+	 * configuration as bulk endpoint. This information is required
+	 * later when sending urbs to that endpoint.
+	 */
+	for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; ++i) {
+		ep = &intf->cur_altsetting->endpoint[i].desc;
+
+		if (usb_endpoint_num(ep) == AR9170_USB_EP_CMD &&
+		    usb_endpoint_dir_out(ep) &&
+		    usb_endpoint_type(ep) == USB_ENDPOINT_XFER_BULK)
+			ar->usb_ep_cmd_is_bulk = true;
+	}
+
 	usb_set_intfdata(intf, ar);
 	SET_IEEE80211_DEV(ar->hw, &intf->dev);
 

From b9733d3b8aafde65eb3ef5fbdd6ba21250680c02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guido=20Mart=C3=ADnez?= <guido@vanguardiasur.com.ar>
Date: Tue, 17 Jun 2014 11:17:04 -0300
Subject: [PATCH 0726/1185] drm/tilcdc: panel: fix dangling sysfs connector
 node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit e396900e649b0af31161634d87fe37076f46c12b upstream.

Add a drm_sysfs_connector_remove call when we destroy the panel to make
sure the connector node in sysfs gets deleted.

This is required for proper unload and re-load of this driver as a
module. Without this, we would get a warning at re-load time like so:

   ------------[ cut here ]------------
   WARNING: CPU: 0 PID: 824 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x54/0x74()
   sysfs: cannot create duplicate filename '/class/drm/card0-LVDS-1'
   Modules linked in: [...]
   CPU: 0 PID: 824 Comm: modprobe Not tainted 3.15.0-rc4-00027-g6484f96-dirty #81
   [<c0013bb8>] (unwind_backtrace) from [<c0011824>] (show_stack+0x10/0x14)
   [<c0011824>] (show_stack) from [<c0034e8c>] (warn_slowpath_common+0x68/0x88)
   [<c0034e8c>] (warn_slowpath_common) from [<c0034edc>] (warn_slowpath_fmt+0x30/0x40)
   [<c0034edc>] (warn_slowpath_fmt) from [<c01243f4>] (sysfs_warn_dup+0x54/0x74)
   [<c01243f4>] (sysfs_warn_dup) from [<c0124708>] (sysfs_do_create_link_sd.isra.2+0xb0/0xb8)
   [<c0124708>] (sysfs_do_create_link_sd.isra.2) from [<c02ae37c>] (device_add+0x338/0x520)
   [<c02ae37c>] (device_add) from [<c02ae6e8>] (device_create_groups_vargs+0xa0/0xc4)
   [<c02ae6e8>] (device_create_groups_vargs) from [<c02ae758>] (device_create+0x24/0x2c)
   [<c02ae758>] (device_create) from [<c029b4ec>] (drm_sysfs_connector_add+0x64/0x204)
   [<c029b4ec>] (drm_sysfs_connector_add) from [<bf0b1fec>] (panel_modeset_init+0xb8/0x134 [tilcdc])
   [<bf0b1fec>] (panel_modeset_init [tilcdc]) from [<bf0b2bf0>] (tilcdc_load+0x214/0x4c0 [tilcdc])
   [<bf0b2bf0>] (tilcdc_load [tilcdc]) from [<c029955c>] (drm_dev_register+0xa4/0x104)
      [ .. snip .. ]
   ---[ end trace b2d09cd9578b0497 ]---
   [drm:drm_sysfs_connector_add] *ERROR* failed to register connector device: -17

Signed-off-by: Guido Martínez <guido@vanguardiasur.com.ar>
Tested-by: Darren Etheridge <detheridge@ti.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/tilcdc/tilcdc_panel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_panel.c b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
index 09176654fddb..2a93ea290c3e 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_panel.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
@@ -151,6 +151,7 @@ struct panel_connector {
 static void panel_connector_destroy(struct drm_connector *connector)
 {
 	struct panel_connector *panel_connector = to_panel_connector(connector);
+	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
 	kfree(panel_connector);
 }

From 94ff92a34e0c666daa6a3711a07ec65f8067dcd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guido=20Mart=C3=ADnez?= <guido@vanguardiasur.com.ar>
Date: Tue, 17 Jun 2014 11:17:05 -0300
Subject: [PATCH 0727/1185] drm/tilcdc: slave: fix dangling sysfs connector
 node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit daa15b4cd1eee58eb1322062a3320b1dbe5dc96e upstream.

Add a drm_sysfs_connector_remove call when we destroy the panel to make
sure the connector node in sysfs gets deleted.

This is required for proper unload and re-load of this driver as a
module. Without this, we would get a warning at re-load time like so:

   tda998x 0-0070: found TDA19988
   ------------[ cut here ]------------
   WARNING: CPU: 0 PID: 825 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x54/0x74()
   sysfs: cannot create duplicate filename '/class/drm/card0-HDMI-A-1'
   Modules linked in: [..]
   CPU: 0 PID: 825 Comm: modprobe Not tainted 3.15.0-rc4-00027-g9dcdef4 #82
   [<c0013bb8>] (unwind_backtrace) from [<c0011824>] (show_stack+0x10/0x14)
   [<c0011824>] (show_stack) from [<c0034e8c>] (warn_slowpath_common+0x68/0x88)
   [<c0034e8c>] (warn_slowpath_common) from [<c0034edc>] (warn_slowpath_fmt+0x30/0x40)
   [<c0034edc>] (warn_slowpath_fmt) from [<c01243f4>] (sysfs_warn_dup+0x54/0x74)
   [<c01243f4>] (sysfs_warn_dup) from [<c0124708>] (sysfs_do_create_link_sd.isra.2+0xb0/0xb8)
   [<c0124708>] (sysfs_do_create_link_sd.isra.2) from [<c02ae37c>] (device_add+0x338/0x520)
   [<c02ae37c>] (device_add) from [<c02ae6e8>] (device_create_groups_vargs+0xa0/0xc4)
   [<c02ae6e8>] (device_create_groups_vargs) from [<c02ae758>] (device_create+0x24/0x2c)
   [<c02ae758>] (device_create) from [<c029b4ec>] (drm_sysfs_connector_add+0x64/0x204)
   [<c029b4ec>] (drm_sysfs_connector_add) from [<bf0b1b40>] (slave_modeset_init+0x120/0x1bc [tilcdc])
   [<bf0b1b40>] (slave_modeset_init [tilcdc]) from [<bf0b2be8>] (tilcdc_load+0x214/0x4c0 [tilcdc])
   [<bf0b2be8>] (tilcdc_load [tilcdc]) from [<c029955c>] (drm_dev_register+0xa4/0x104)
      [..snip..]
   ---[ end trace 4df8d614936ebdee ]---
   [drm:drm_sysfs_connector_add] *ERROR* failed to register connector device: -17

Signed-off-by: Guido Martínez <guido@vanguardiasur.com.ar>
Tested-by: Darren Etheridge <detheridge@ti.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/tilcdc/tilcdc_slave.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_slave.c b/drivers/gpu/drm/tilcdc/tilcdc_slave.c
index db1d2fc9dfb5..5d6c597a5d69 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_slave.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_slave.c
@@ -142,6 +142,7 @@ struct slave_connector {
 static void slave_connector_destroy(struct drm_connector *connector)
 {
 	struct slave_connector *slave_connector = to_slave_connector(connector);
+	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
 	kfree(slave_connector);
 }

From a06de0430256d72e3deb00bbd9426b0c4183c11c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guido=20Mart=C3=ADnez?= <guido@vanguardiasur.com.ar>
Date: Tue, 17 Jun 2014 11:17:06 -0300
Subject: [PATCH 0728/1185] drm/tilcdc: tfp410: fix dangling sysfs connector
 node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 16dcbdef404f4e87dab985494381939fe0a2d456 upstream.

Add a drm_sysfs_connector_remove call when we destroy the panel to make
sure the connector node in sysfs gets deleted.

This is required for proper unload and re-load of this driver, otherwise
we will get a warning about a duplicate filename in sysfs.

Signed-off-by: Guido Martínez <guido@vanguardiasur.com.ar>
Tested-by: Darren Etheridge <detheridge@ti.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/tilcdc/tilcdc_tfp410.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
index a36788fbcd98..986131dd9f47 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
@@ -168,6 +168,7 @@ struct tfp410_connector {
 static void tfp410_connector_destroy(struct drm_connector *connector)
 {
 	struct tfp410_connector *tfp410_connector = to_tfp410_connector(connector);
+	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
 	kfree(tfp410_connector);
 }

From ed64381eb3e1fe2f04af1ca06e942f3602abf123 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guido=20Mart=C3=ADnez?= <guido@vanguardiasur.com.ar>
Date: Tue, 17 Jun 2014 11:17:07 -0300
Subject: [PATCH 0729/1185] drm/tilcdc: panel: fix leak when unloading the
 module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 3a49012224ca9016658a831a327ff6a7fe5bb4f9 upstream.

The driver did not unregister the allocated framebuffer, which caused
memory leaks (and memory manager WARNs) when unloading. Also, the
framebuffer device under /dev still existed after unloading.

Add a call to drm_fbdev_cma_fini when unloading the module to prevent
both issues.

Signed-off-by: Guido Martínez <guido@vanguardiasur.com.ar>
Tested-by: Darren Etheridge <detheridge@ti.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/tilcdc/tilcdc_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 2b5461bcd9fb..668e34c4cc5d 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -116,6 +116,7 @@ static int tilcdc_unload(struct drm_device *dev)
 	struct tilcdc_drm_private *priv = dev->dev_private;
 	struct tilcdc_module *mod, *cur;
 
+	drm_fbdev_cma_fini(priv->fbdev);
 	drm_kms_helper_poll_fini(dev);
 	drm_mode_config_cleanup(dev);
 	drm_vblank_cleanup(dev);

From aaee1af7b74ecbb1d16b3303aa63ef62a37d20c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guido=20Mart=C3=ADnez?= <guido@vanguardiasur.com.ar>
Date: Tue, 17 Jun 2014 11:17:08 -0300
Subject: [PATCH 0730/1185] drm/tilcdc: fix release order on exit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit eb565a2bbadc6a5030a6dbe58db1aa52453e7edf upstream.

Unregister resources in the correct order on tilcdc_drm_fini, which is
the reverse order they were registered during tilcdc_drm_init.

This also means unregistering the driver before releasing its resources.

Signed-off-by: Guido Martínez <guido@vanguardiasur.com.ar>
Tested-by: Darren Etheridge <detheridge@ti.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/tilcdc/tilcdc_drv.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 668e34c4cc5d..f060b7487c34 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -597,10 +597,10 @@ static int __init tilcdc_drm_init(void)
 static void __exit tilcdc_drm_fini(void)
 {
 	DBG("fini");
-	tilcdc_tfp410_fini();
-	tilcdc_slave_fini();
-	tilcdc_panel_fini();
 	platform_driver_unregister(&tilcdc_platform_driver);
+	tilcdc_panel_fini();
+	tilcdc_slave_fini();
+	tilcdc_tfp410_fini();
 }
 
 late_initcall(tilcdc_drm_init);

From ddb3b2c0ed7d2f9b0f429824c096566c38dfda66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guido=20Mart=C3=ADnez?= <guido@vanguardiasur.com.ar>
Date: Tue, 17 Jun 2014 11:17:09 -0300
Subject: [PATCH 0731/1185] drm/tilcdc: fix double kfree
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit c9a3ad25eddfdb898114a9d73cdb4c3472d9dfca upstream.

display_timings_release calls kfree on the display_timings object passed
to it. Calling kfree after it is wrong. SLUB debug showed the following
warning:

    =============================================================================
    BUG kmalloc-64 (Tainted: G        W    ): Object already free
    -----------------------------------------------------------------------------

    Disabling lock debugging due to kernel taint
    INFO: Allocated in of_get_display_timings+0x2c/0x214 age=601 cpu=0
    pid=884
     __slab_alloc.constprop.79+0x2e0/0x33c
     kmem_cache_alloc+0xac/0xdc
     of_get_display_timings+0x2c/0x214
     panel_probe+0x7c/0x314 [tilcdc]
     platform_drv_probe+0x18/0x48
     [..snip..]
    INFO: Freed in panel_destroy+0x18/0x3c [tilcdc] age=0 cpu=0 pid=907
     __slab_free+0x34/0x330
     panel_destroy+0x18/0x3c [tilcdc]
     tilcdc_unload+0xd0/0x118 [tilcdc]
     drm_dev_unregister+0x24/0x98
     [..snip..]

Signed-off-by: Guido Martínez <guido@vanguardiasur.com.ar>
Tested-by: Darren Etheridge <detheridge@ti.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/tilcdc/tilcdc_panel.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_panel.c b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
index 2a93ea290c3e..779d508616d3 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_panel.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
@@ -286,10 +286,8 @@ static void panel_destroy(struct tilcdc_module *mod)
 {
 	struct panel_module *panel_mod = to_panel_module(mod);
 
-	if (panel_mod->timings) {
+	if (panel_mod->timings)
 		display_timings_release(panel_mod->timings);
-		kfree(panel_mod->timings);
-	}
 
 	tilcdc_module_cleanup(mod);
 	kfree(panel_mod->info);

From 0f90c9c088df9196dbb322175413a79f41f9975f Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sun, 3 Aug 2014 19:59:35 +0900
Subject: [PATCH 0732/1185] drm/ttm: Fix possible division by 0 in
 ttm_dma_pool_shrink_scan().

commit 11e504cc705e8ccb06ac93a276e11b5e8fee4d40 upstream.

list_empty(&_manager->pools) being false before taking _manager->lock
does not guarantee that _manager->npools != 0 after taking _manager->lock
because _manager->npools is updated under _manager->lock.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index b8b394319b45..e9f65ad2a0eb 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -1016,6 +1016,8 @@ static int ttm_dma_pool_mm_shrink(struct shrinker *shrink,
 		return 0;
 
 	mutex_lock(&_manager->lock);
+	if (!_manager->npools)
+		goto out;
 	pool_offset = pool_offset % _manager->npools;
 	list_for_each_entry(p, &_manager->pools, pools) {
 		unsigned nr_free;
@@ -1033,6 +1035,7 @@ static int ttm_dma_pool_mm_shrink(struct shrinker *shrink,
 			 p->pool->dev_name, p->pool->name, current->pid,
 			 nr_free, shrink_pages);
 	}
+out:
 	mutex_unlock(&_manager->lock);
 	/* return estimated number of unused pages in pool */
 	return ttm_dma_pool_get_num_unused_pages();

From a0fe26d09596ccb84f68603f02c4c78a6394e9f9 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sun, 3 Aug 2014 20:00:40 +0900
Subject: [PATCH 0733/1185] drm/ttm: Choose a pool to shrink correctly in
 ttm_dma_pool_shrink_scan().

commit 46c2df68f03a236b30808bba361f10900c88d95e upstream.

We can use "unsigned int" instead of "atomic_t" by updating start_pool
variable under _manager->lock. This patch will make it possible to avoid
skipping when choosing a pool to shrink in round-robin style, after next
patch changes mutex_lock(_manager->lock) to !mutex_trylock(_manager->lork).

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index e9f65ad2a0eb..de1a753b1d56 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -1006,9 +1006,9 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate);
 static int ttm_dma_pool_mm_shrink(struct shrinker *shrink,
 				  struct shrink_control *sc)
 {
-	static atomic_t start_pool = ATOMIC_INIT(0);
+	static unsigned start_pool;
 	unsigned idx = 0;
-	unsigned pool_offset = atomic_add_return(1, &start_pool);
+	unsigned pool_offset;
 	unsigned shrink_pages = sc->nr_to_scan;
 	struct device_pools *p;
 
@@ -1018,7 +1018,7 @@ static int ttm_dma_pool_mm_shrink(struct shrinker *shrink,
 	mutex_lock(&_manager->lock);
 	if (!_manager->npools)
 		goto out;
-	pool_offset = pool_offset % _manager->npools;
+	pool_offset = ++start_pool % _manager->npools;
 	list_for_each_entry(p, &_manager->pools, pools) {
 		unsigned nr_free;
 

From 1fc3a6ed0203b772f02253be3aea606b7192d429 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Sun, 27 Jul 2014 23:21:50 -0400
Subject: [PATCH 0734/1185] drm/radeon: load the lm63 driver for an lm64
 thermal chip.

commit 5dc355325b648dc9b4cf3bea4d968de46fd59215 upstream.

Looks like the lm63 driver supports the lm64 as well.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_atombios.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index efb06e34aed7..43a3303995f8 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -1908,7 +1908,7 @@ static const char *thermal_controller_names[] = {
 	"adm1032",
 	"adm1030",
 	"max6649",
-	"lm64",
+	"lm63", /* lm64 */
 	"f75375",
 	"asc7xxx",
 };
@@ -1919,7 +1919,7 @@ static const char *pp_lib_thermal_controller_names[] = {
 	"adm1032",
 	"adm1030",
 	"max6649",
-	"lm64",
+	"lm63", /* lm64 */
 	"f75375",
 	"RV6xx",
 	"RV770",

From 335b05e627c4ed007f2743f78d1cac60a265e74e Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 7 Aug 2014 16:29:53 +0200
Subject: [PATCH 0735/1185] drm/i915: read HEAD register back in
 init_ring_common() to enforce ordering

commit ece4a17d237a79f63fbfaf3f724a12b6d500555c upstream.

Withtout this, ring initialization fails reliabily during resume with

	[drm:init_ring_common] *ERROR* render ring initialization failed ctl 0001f001 head ffffff8804 tail 00000000 start 000e4000

This is not a complete fix, but it is verified to make the ring
initialization failures during resume much less likely.

We were not able to root-cause this bug (likely HW-specific to Gen4 chips)
yet. This is therefore used as a ducttape before problem is fully
understood and proper fix created, so that people don't suffer from
completely unusable systems in the meantime.

The discussion and debugging is happening at

	https://bugs.freedesktop.org/show_bug.cgi?id=76554

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 629527d205de..4605c3877c95 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -396,6 +396,9 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 		}
 	}
 
+	/* Enforce ordering by reading HEAD register back */
+	I915_READ_HEAD(ring);
+
 	/* Initialize the ring. This must happen _after_ we've cleared the ring
 	 * registers with the above sequence (the readback of the HEAD registers
 	 * also enforces ordering), otherwise the hw might lose the new ring

From 4f09b0e0285f3caaf1584c7cd989ef2012d87b22 Mon Sep 17 00:00:00 2001
From: Arjun Sreedharan <arjun024@gmail.com>
Date: Sun, 17 Aug 2014 20:00:09 +0530
Subject: [PATCH 0736/1185] pata_scc: propagate return value of
 scc_wait_after_reset

commit 4dc7c76cd500fa78c64adfda4b070b870a2b993c upstream.

scc_bus_softreset not necessarily should return zero.
Propagate the error code.

Signed-off-by: Arjun Sreedharan <arjun024@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/pata_scc.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index f35f15f4d83e..f7badaa39eb6 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -586,7 +586,7 @@ static int scc_wait_after_reset(struct ata_link *link, unsigned int devmask,
  *	Note: Original code is ata_bus_softreset().
  */
 
-static unsigned int scc_bus_softreset(struct ata_port *ap, unsigned int devmask,
+static int scc_bus_softreset(struct ata_port *ap, unsigned int devmask,
                                       unsigned long deadline)
 {
 	struct ata_ioports *ioaddr = &ap->ioaddr;
@@ -600,9 +600,7 @@ static unsigned int scc_bus_softreset(struct ata_port *ap, unsigned int devmask,
 	udelay(20);
 	out_be32(ioaddr->ctl_addr, ap->ctl);
 
-	scc_wait_after_reset(&ap->link, devmask, deadline);
-
-	return 0;
+	return scc_wait_after_reset(&ap->link, devmask, deadline);
 }
 
 /**
@@ -619,7 +617,8 @@ static int scc_softreset(struct ata_link *link, unsigned int *classes,
 {
 	struct ata_port *ap = link->ap;
 	unsigned int slave_possible = ap->flags & ATA_FLAG_SLAVE_POSS;
-	unsigned int devmask = 0, err_mask;
+	unsigned int devmask = 0;
+	int rc;
 	u8 err;
 
 	DPRINTK("ENTER\n");
@@ -635,9 +634,9 @@ static int scc_softreset(struct ata_link *link, unsigned int *classes,
 
 	/* issue bus reset */
 	DPRINTK("about to softreset, devmask=%x\n", devmask);
-	err_mask = scc_bus_softreset(ap, devmask, deadline);
-	if (err_mask) {
-		ata_port_err(ap, "SRST failed (err_mask=0x%x)\n", err_mask);
+	rc = scc_bus_softreset(ap, devmask, deadline);
+	if (rc) {
+		ata_port_err(ap, "SRST failed (err_mask=0x%x)\n", rc);
 		return -EIO;
 	}
 

From f714be2d745eabf529d50da2cac465956e487a63 Mon Sep 17 00:00:00 2001
From: James Ralston <james.d.ralston@intel.com>
Date: Wed, 27 Aug 2014 14:29:07 -0700
Subject: [PATCH 0737/1185] ahci: Add Device IDs for Intel 9 Series PCH

commit 1b071a0947dbce5c184c12262e02540fbc493457 upstream.

This patch adds the AHCI mode SATA Device IDs for the Intel 9 Series PCH.

Signed-off-by: James Ralston <james.d.ralston@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/ahci.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 3b39687c6336..6949bdba7cb6 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -304,6 +304,14 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, 0x9c85), board_ahci }, /* Wildcat Point-LP RAID */
 	{ PCI_VDEVICE(INTEL, 0x9c87), board_ahci }, /* Wildcat Point-LP RAID */
 	{ PCI_VDEVICE(INTEL, 0x9c8f), board_ahci }, /* Wildcat Point-LP RAID */
+	{ PCI_VDEVICE(INTEL, 0x8c82), board_ahci }, /* 9 Series AHCI */
+	{ PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series AHCI */
+	{ PCI_VDEVICE(INTEL, 0x8c84), board_ahci }, /* 9 Series RAID */
+	{ PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series RAID */
+	{ PCI_VDEVICE(INTEL, 0x8c86), board_ahci }, /* 9 Series RAID */
+	{ PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */
+	{ PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */
+	{ PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */
 
 	/* JMicron 360/1/3/5/6, match class to avoid IDE function */
 	{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,

From 4e2bd06d10715dea684e882d5731300ad4eeab88 Mon Sep 17 00:00:00 2001
From: Murali Karicheri <m-karicheri2@ti.com>
Date: Fri, 5 Sep 2014 13:21:00 -0400
Subject: [PATCH 0738/1185] ahci: add pcid for Marvel 0x9182 controller

commit c5edfff9db6f4d2c35c802acb4abe0df178becee upstream.

Keystone K2E EVM uses Marvel 0x9182 controller. This requires support
for the ID in the ahci driver.

Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/ahci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 6949bdba7cb6..c3f09505f795 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -449,6 +449,8 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x917a),
 	  .driver_data = board_ahci_yes_fbs },			/* 88se9172 */
 	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9172),
+	  .driver_data = board_ahci_yes_fbs },			/* 88se9182 */
+	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9182),
 	  .driver_data = board_ahci_yes_fbs },			/* 88se9172 */
 	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9192),
 	  .driver_data = board_ahci_yes_fbs },			/* 88se9172 on some Gigabyte */

From 0d334226ee0ab486c884e9c130e8a7939146cf23 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 22 Aug 2014 11:36:52 +1000
Subject: [PATCH 0739/1185] ibmveth: Fix endian issues with rx_no_buffer
 statistic

commit cbd5228199d8be45d895d9d0cc2b8ce53835fc21 upstream.

Hidden away in the last 8 bytes of the buffer_list page is a solitary
statistic. It needs to be byte swapped or else ethtool -S will
produce numbers that terrify the user.

Since we do this in multiple places, create a helper function with a
comment explaining what is going on.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/ibm/ibmveth.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 70fd55968844..040ecf2027cd 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -293,6 +293,18 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
 	atomic_add(buffers_added, &(pool->available));
 }
 
+/*
+ * The final 8 bytes of the buffer list is a counter of frames dropped
+ * because there was not a buffer in the buffer list capable of holding
+ * the frame.
+ */
+static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter *adapter)
+{
+	__be64 *p = adapter->buffer_list_addr + 4096 - 8;
+
+	adapter->rx_no_buffer = be64_to_cpup(p);
+}
+
 /* replenish routine */
 static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
 {
@@ -308,8 +320,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
 			ibmveth_replenish_buffer_pool(adapter, pool);
 	}
 
-	adapter->rx_no_buffer = *(u64 *)(((char*)adapter->buffer_list_addr) +
-						4096 - 8);
+	ibmveth_update_rx_no_buffer(adapter);
 }
 
 /* empty and free ana buffer pool - also used to do cleanup in error paths */
@@ -689,8 +700,7 @@ static int ibmveth_close(struct net_device *netdev)
 
 	free_irq(netdev->irq, netdev);
 
-	adapter->rx_no_buffer = *(u64 *)(((char *)adapter->buffer_list_addr) +
-						4096 - 8);
+	ibmveth_update_rx_no_buffer(adapter);
 
 	ibmveth_cleanup(adapter);
 

From bb565121ec53badf5e27972abfcfab2934cc9071 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 11 Sep 2014 14:38:16 +0100
Subject: [PATCH 0740/1185] arm64: flush TLS registers during exec

commit eb35bdd7bca29a13c8ecd44e6fd747a84ce675db upstream.

Nathan reports that we leak TLS information from the parent context
during an exec, as we don't clear the TLS registers when flushing the
thread state.

This patch updates the flushing code so that we:

  (1) Unconditionally zero the tpidr_el0 register (since this is fully
      context switched for native tasks and zeroed for compat tasks)

  (2) Zero the tp_value state in thread_info before clearing the
      tpidrr0_el0 register for compat tasks (since this is only writable
      by the set_tls compat syscall and therefore not fully switched).

A missing compiler barrier is also added to the compat set_tls syscall.

Acked-by: Nathan Lynch <Nathan_Lynch@mentor.com>
Reported-by: Nathan Lynch <Nathan_Lynch@mentor.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/process.c    | 18 ++++++++++++++++++
 arch/arm64/kernel/sys_compat.c |  6 ++++++
 2 files changed, 24 insertions(+)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 46f02c3b5015..0860fc3077fc 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -172,9 +172,27 @@ void exit_thread(void)
 {
 }
 
+static void tls_thread_flush(void)
+{
+	asm ("msr tpidr_el0, xzr");
+
+	if (is_compat_task()) {
+		current->thread.tp_value = 0;
+
+		/*
+		 * We need to ensure ordering between the shadow state and the
+		 * hardware state, so that we don't corrupt the hardware state
+		 * with a stale shadow state during context switch.
+		 */
+		barrier();
+		asm ("msr tpidrro_el0, xzr");
+	}
+}
+
 void flush_thread(void)
 {
 	fpsimd_flush_thread();
+	tls_thread_flush();
 	flush_ptrace_hw_breakpoint(current);
 }
 
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 26e9c4eeaba8..78039927c807 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -79,6 +79,12 @@ long compat_arm_syscall(struct pt_regs *regs)
 
 	case __ARM_NR_compat_set_tls:
 		current->thread.tp_value = regs->regs[0];
+
+		/*
+		 * Protect against register corruption from context switch.
+		 * See comment in tls_thread_flush.
+		 */
+		barrier();
 		asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0]));
 		return 0;
 

From 6410b64defae58d7add75e0d2e5abf9e8b0b58ab Mon Sep 17 00:00:00 2001
From: Marek Roszko <mark.roszko@gmail.com>
Date: Wed, 20 Aug 2014 21:39:41 -0400
Subject: [PATCH 0741/1185] i2c: at91: add bound checking on SMBus block length
 bytes

commit 75b81f339c6af43f6f4a1b3eabe0603321dade65 upstream.

The driver was not bound checking the received length byte to ensure it was within the
the buffer size that is allocated for SMBus blocks. This resulted in buffer overflows
whenever an invalid length byte was received.
It also failed to ensure the length byte was not zero. If it received zero, it would end up
in an infinite loop as the at91_twi_read_next_byte function returned immediately without
allowing RHR to be read to clear the RXRDY interrupt.

Tested agaisnt a SMBus compliant battery.

Signed-off-by: Marek Roszko <mark.roszko@gmail.com>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-at91.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index f35ea236497b..c3e9b4714e52 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c
@@ -102,6 +102,7 @@ struct at91_twi_dev {
 	unsigned twi_cwgr_reg;
 	struct at91_twi_pdata *pdata;
 	bool use_dma;
+	bool recv_len_abort;
 	struct at91_twi_dma dma;
 };
 
@@ -268,12 +269,24 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev)
 	*dev->buf = at91_twi_read(dev, AT91_TWI_RHR) & 0xff;
 	--dev->buf_len;
 
+	/* return if aborting, we only needed to read RHR to clear RXRDY*/
+	if (dev->recv_len_abort)
+		return;
+
 	/* handle I2C_SMBUS_BLOCK_DATA */
 	if (unlikely(dev->msg->flags & I2C_M_RECV_LEN)) {
-		dev->msg->flags &= ~I2C_M_RECV_LEN;
-		dev->buf_len += *dev->buf;
-		dev->msg->len = dev->buf_len + 1;
-		dev_dbg(dev->dev, "received block length %d\n", dev->buf_len);
+		/* ensure length byte is a valid value */
+		if (*dev->buf <= I2C_SMBUS_BLOCK_MAX && *dev->buf > 0) {
+			dev->msg->flags &= ~I2C_M_RECV_LEN;
+			dev->buf_len += *dev->buf;
+			dev->msg->len = dev->buf_len + 1;
+			dev_dbg(dev->dev, "received block length %d\n",
+					 dev->buf_len);
+		} else {
+			/* abort and send the stop by reading one more byte */
+			dev->recv_len_abort = true;
+			dev->buf_len = 1;
+		}
 	}
 
 	/* send stop if second but last byte has been read */
@@ -445,6 +458,12 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev)
 		ret = -EIO;
 		goto error;
 	}
+	if (dev->recv_len_abort) {
+		dev_err(dev->dev, "invalid smbus block length recvd\n");
+		ret = -EPROTO;
+		goto error;
+	}
+
 	dev_dbg(dev->dev, "transfer complete\n");
 
 	return 0;
@@ -501,6 +520,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
 	dev->buf_len = m_start->len;
 	dev->buf = m_start->buf;
 	dev->msg = m_start;
+	dev->recv_len_abort = false;
 
 	ret = at91_do_twi_transfer(dev);
 

From d5a6d0a4da8d97790df21cf96c1879856aedc651 Mon Sep 17 00:00:00 2001
From: Simon Lindgren <simon@aqwary.com>
Date: Tue, 26 Aug 2014 21:13:24 +0200
Subject: [PATCH 0742/1185] i2c: at91: Fix a race condition during signal
 handling in at91_do_twi_xfer.

commit 6721f28a26efd6368497abbdef5dcfc59608d899 upstream.

There is a race condition in at91_do_twi_xfer when signals arrive.
If a signal is recieved while waiting for a transfer to complete
wait_for_completion_interruptible_timeout() will return -ERESTARTSYS.
This is not handled correctly resulting in interrupts still being
enabled and a transfer being in flight when we return.

Symptoms include a range of oopses and bus lockups. Oopses can happen
when the transfer completes because the interrupt handler will corrupt
the stack. If a new transfer is started before the interrupt fires
the controller will start a new transfer in the middle of the old one,
resulting in confused slaves and a locked bus.

To avoid this, use wait_for_completion_io_timeout instead so that we
don't have to deal with gracefully shutting down the transfer and
disabling the interrupts.

Signed-off-by: Simon Lindgren <simon@aqwary.com>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-at91.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index c3e9b4714e52..b1240a250149 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c
@@ -435,8 +435,8 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev)
 		}
 	}
 
-	ret = wait_for_completion_interruptible_timeout(&dev->cmd_complete,
-							dev->adapter.timeout);
+	ret = wait_for_completion_io_timeout(&dev->cmd_complete,
+					     dev->adapter.timeout);
 	if (ret == 0) {
 		dev_err(dev->dev, "controller timed out\n");
 		at91_init_twi_bus(dev);

From 8c1bf5dcfe2ccdb5e514eec86607dbccdfa83096 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Mon, 25 Aug 2014 13:59:41 -0400
Subject: [PATCH 0743/1185] trace: Fix epoll hang when we race with new entries

commit 4ce97dbf50245227add17c83d87dc838e7ca79d0 upstream.

Epoll on trace_pipe can sometimes hang in a weird case.  If the ring buffer is
empty when we set waiters_pending but an event shows up exactly at that moment
we can miss being woken up by the ring buffers irq work.  Since
ring_buffer_empty() is inherently racey we will sometimes think that the buffer
is not empty.  So we don't get woken up and we don't think there are any events
even though there were some ready when we added the watch, which makes us hang.
This patch fixes this by making sure that we are actually on the wait list
before we set waiters_pending, and add a memory barrier to make sure
ring_buffer_empty() is going to be correct.

Link: http://lkml.kernel.org/p/1408989581-23727-1-git-send-email-jbacik@fb.com

Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/ring_buffer.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5efbc122e5ce..0bc181b0524c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -626,8 +626,22 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 		work = &cpu_buffer->irq_work;
 	}
 
-	work->waiters_pending = true;
 	poll_wait(filp, &work->waiters, poll_table);
+	work->waiters_pending = true;
+	/*
+	 * There's a tight race between setting the waiters_pending and
+	 * checking if the ring buffer is empty.  Once the waiters_pending bit
+	 * is set, the next event will wake the task up, but we can get stuck
+	 * if there's only a single event in.
+	 *
+	 * FIXME: Ideally, we need a memory barrier on the writer side as well,
+	 * but adding a memory barrier to all events will cause too much of a
+	 * performance hit in the fast path.  We only need a memory barrier when
+	 * the buffer goes from empty to having content.  But as this race is
+	 * extremely small, and it's not a problem if another event comes in, we
+	 * will fix it later.
+	 */
+	smp_mb();
 
 	if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
 	    (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))

From 73ab9689158dd27735252037e036cd908a0db65a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Aug 2014 14:13:24 +0100
Subject: [PATCH 0744/1185] arm64: ptrace: fix compat hardware watchpoint
 reporting

commit 27d7ff273c2aad37b28f6ff0cab2cfa35b51e648 upstream.

I'm not sure what I was on when I wrote this, but when iterating over
the hardware watchpoint array (hbp_watch_array), our index is off by
ARM_MAX_BRP, so we walk off the end of our thread_struct...

... except, a dodgy condition in the loop means that it never executes
at all (bp cannot be NULL).

This patch fixes the code so that we remove the bp check and use the
correct index for accessing the watchpoint structures.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/hw_breakpoint.h | 1 -
 arch/arm64/kernel/ptrace.c             | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index d064047612b1..52b484b6aa1a 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -79,7 +79,6 @@ static inline void decode_ctrl_reg(u32 reg,
  */
 #define ARM_MAX_BRP		16
 #define ARM_MAX_WRP		16
-#define ARM_MAX_HBP_SLOTS	(ARM_MAX_BRP + ARM_MAX_WRP)
 
 /* Virtual debug register bases. */
 #define AARCH64_DBG_REG_BVR	0
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 85536688f753..33a74fc45959 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -103,7 +103,8 @@ static void ptrace_hbptriggered(struct perf_event *bp,
 			break;
 		}
 	}
-	for (i = ARM_MAX_BRP; i < ARM_MAX_HBP_SLOTS && !bp; ++i) {
+
+	for (i = 0; i < ARM_MAX_WRP; ++i) {
 		if (current->thread.debug.hbp_watch[i] == bp) {
 			info.si_errno = -((i << 1) + 1);
 			break;

From b2a014bc3d20aab7789c92c7549b98b9e0c34383 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Thu, 21 Aug 2014 20:55:21 +0200
Subject: [PATCH 0745/1185] ALSA: core: fix buffer overflow in
 snd_info_get_line()

commit ddc64b278a4dda052390b3de1b551e59acdff105 upstream.

snd_info_get_line() documents that its last parameter must be one
less than the buffer size, but this API design guarantees that
(literally) every caller gets it wrong.

Just change this parameter to have its obvious meaning.

Reported-by: Tommi Rantala <tt.rantala@gmail.com>
Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/info.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/core/info.c b/sound/core/info.c
index e79baa11b60e..08070e1eefeb 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -679,7 +679,7 @@ int snd_info_card_free(struct snd_card *card)
  * snd_info_get_line - read one line from the procfs buffer
  * @buffer: the procfs buffer
  * @line: the buffer to store
- * @len: the max. buffer size - 1
+ * @len: the max. buffer size
  *
  * Reads one line from the buffer and stores the string.
  *
@@ -699,7 +699,7 @@ int snd_info_get_line(struct snd_info_buffer *buffer, char *line, int len)
 			buffer->stop = 1;
 		if (c == '\n')
 			break;
-		if (len) {
+		if (len > 1) {
 			len--;
 			*line++ = c;
 		}

From 37a86af862cde5caf76ac189479268966b47c9ad Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Sep 2014 07:21:56 +0200
Subject: [PATCH 0746/1185] ALSA: hda - Fix COEF setups for ALC1150 codec

commit acf08081adb5e8fe0519eb97bb49797ef52614d6 upstream.

ALC1150 codec seems to need the COEF- and PLL-setups just like its
compatible ALC882 codec.  Some machines (e.g. SunMicro X10SAT) show
the problem like too low output volumes unless the COEF setup is
applied.

Reported-and-tested-by: Dana Goyette <danagoyette@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 593090e5bd77..4008034b6ebe 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -318,6 +318,7 @@ static void alc_auto_init_amp(struct hda_codec *codec, int type)
 		case 0x10ec0885:
 		case 0x10ec0887:
 		/*case 0x10ec0889:*/ /* this causes an SPDIF problem */
+		case 0x10ec0900:
 			alc889_coef_init(codec);
 			break;
 		case 0x10ec0888:
@@ -2252,6 +2253,7 @@ static int patch_alc882(struct hda_codec *codec)
 	switch (codec->vendor_id) {
 	case 0x10ec0882:
 	case 0x10ec0885:
+	case 0x10ec0900:
 		break;
 	default:
 		/* ALC883 and variants */

From c0532c3ba5fc01742c50d01a1997f823dc8ac1ea Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 11 Sep 2014 12:59:21 +0200
Subject: [PATCH 0747/1185] ALSA: hda - Fix invalid pin powermap without jack
 detection

commit 7a9744cb455e6faa287e148394b4b422a6f3c5c4 upstream.

When a driver is set up without the jack detection explicitly (either
by passing a model option or via a specific fixup), the pin powermap
of IDT/STAC codecs is set up wrongly, resulting in the silence
output.  It's because of a logic failure in stac_init_power_map().
It tries to avoid creating a callback for the pins that have other
auto-hp and auto-mic callbacks, but the check is done in a wrong way
at a wrong time.  The stac_init_power_map() should be called after
creating other jack detection ctls, and the jack callback should be
created only for jack-detectable widgets.

This patch fixes the check in stac_init_power_map() and its callee
at the right place, after snd_hda_gen_build_controls().

Reported-by: Adam Richter <adam_richter2004@yahoo.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_sigmatel.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 132b4c802a47..5dd4c4af9c9f 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -539,8 +539,8 @@ static void stac_init_power_map(struct hda_codec *codec)
 		if (snd_hda_jack_tbl_get(codec, nid))
 			continue;
 		if (def_conf == AC_JACK_PORT_COMPLEX &&
-		    !(spec->vref_mute_led_nid == nid ||
-		      is_jack_detectable(codec, nid))) {
+		    spec->vref_mute_led_nid != nid &&
+		    is_jack_detectable(codec, nid)) {
 			snd_hda_jack_detect_enable_callback(codec, nid,
 							    STAC_PWR_EVENT,
 							    jack_update_power);
@@ -3647,11 +3647,18 @@ static int stac_parse_auto_config(struct hda_codec *codec)
 			return err;
 	}
 
-	stac_init_power_map(codec);
-
 	return 0;
 }
 
+static int stac_build_controls(struct hda_codec *codec)
+{
+	int err = snd_hda_gen_build_controls(codec);
+
+	if (err < 0)
+		return err;
+	stac_init_power_map(codec);
+	return 0;
+}
 
 static int stac_init(struct hda_codec *codec)
 {
@@ -3794,7 +3801,7 @@ static void stac_set_power_state(struct hda_codec *codec, hda_nid_t fg,
 #endif /* CONFIG_PM */
 
 static const struct hda_codec_ops stac_patch_ops = {
-	.build_controls = snd_hda_gen_build_controls,
+	.build_controls = stac_build_controls,
 	.build_pcms = snd_hda_gen_build_pcms,
 	.init = stac_init,
 	.free = stac_free,

From 74e0b74c5abf82acef5c8d95ef30c376ca3d158d Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Sun, 21 Sep 2014 22:50:57 +0200
Subject: [PATCH 0748/1185] ALSA: pcm: fix fifo_size frame calculation

commit a9960e6a293e6fc3ed414643bb4e4106272e4d0a upstream.

The calculated frame size was wrong because snd_pcm_format_physical_width()
actually returns the number of bits, not bytes.

Use snd_pcm_format_size() instead, which not only returns bytes, but also
simplifies the calculation.

Fixes: 8bea869c5e56 ("ALSA: PCM midlevel: improve fifo_size handling")
Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/pcm_lib.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 3284940a4af2..8eddece217bb 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1782,14 +1782,16 @@ static int snd_pcm_lib_ioctl_fifo_size(struct snd_pcm_substream *substream,
 {
 	struct snd_pcm_hw_params *params = arg;
 	snd_pcm_format_t format;
-	int channels, width;
+	int channels;
+	ssize_t frame_size;
 
 	params->fifo_size = substream->runtime->hw.fifo_size;
 	if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_FIFO_IN_FRAMES)) {
 		format = params_format(params);
 		channels = params_channels(params);
-		width = snd_pcm_format_physical_width(format);
-		params->fifo_size /= width * channels;
+		frame_size = snd_pcm_format_size(format, channels);
+		if (frame_size > 0)
+			params->fifo_size /= (unsigned)frame_size;
 	}
 	return 0;
 }

From fe63ce5175e58b55d9fa5805d24faffcad214760 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Tue, 26 Aug 2014 20:56:36 +0900
Subject: [PATCH 0749/1185] cfq-iosched: Fix wrong children_weight calculation

commit e15693ef18e13e3e6bffe891fe140f18b8ff6d07 upstream.

cfq_group_service_tree_add() is applying new_weight at the beginning of
the function via cfq_update_group_weight().
This actually allows weight to change between adding it to and subtracting
it from children_weight, and triggers WARN_ON_ONCE() in
cfq_group_service_tree_del(), or even causes oops by divide error during
vfr calculation in cfq_group_service_tree_add().

The detailed scenario is as follows:
1. Create blkio cgroups X and Y as a child of X.
   Set X's weight to 500 and perform some I/O to apply new_weight.
   This X's I/O completes before starting Y's I/O.
2. Y starts I/O and cfq_group_service_tree_add() is called with Y.
3. cfq_group_service_tree_add() walks up the tree during children_weight
   calculation and adds parent X's weight (500) to children_weight of root.
   children_weight becomes 500.
4. Set X's weight to 1000.
5. X starts I/O and cfq_group_service_tree_add() is called with X.
6. cfq_group_service_tree_add() applies its new_weight (1000).
7. I/O of Y completes and cfq_group_service_tree_del() is called with Y.
8. I/O of X completes and cfq_group_service_tree_del() is called with X.
9. cfq_group_service_tree_del() subtracts X's weight (1000) from
   children_weight of root. children_weight becomes -500.
   This triggers WARN_ON_ONCE().
10. Set X's weight to 500.
11. X starts I/O and cfq_group_service_tree_add() is called with X.
12. cfq_group_service_tree_add() applies its new_weight (500) and adds it
    to children_weight of root. children_weight becomes 0. Calcularion of
    vfr triggers oops by divide error.

weight should be updated right before adding it to children_weight.

Reported-by: Ruki Sekiya <sekiya.ruki@lab.ntt.co.jp>
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/cfq-iosched.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c410752c5c65..c981097dd634 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1275,12 +1275,16 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
 static void
 cfq_update_group_weight(struct cfq_group *cfqg)
 {
-	BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
-
 	if (cfqg->new_weight) {
 		cfqg->weight = cfqg->new_weight;
 		cfqg->new_weight = 0;
 	}
+}
+
+static void
+cfq_update_group_leaf_weight(struct cfq_group *cfqg)
+{
+	BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
 
 	if (cfqg->new_leaf_weight) {
 		cfqg->leaf_weight = cfqg->new_leaf_weight;
@@ -1299,7 +1303,7 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
 	/* add to the service tree */
 	BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
 
-	cfq_update_group_weight(cfqg);
+	cfq_update_group_leaf_weight(cfqg);
 	__cfq_group_service_tree_add(st, cfqg);
 
 	/*
@@ -1323,6 +1327,7 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
 	 */
 	while ((parent = cfqg_parent(pos))) {
 		if (propagate) {
+			cfq_update_group_weight(pos);
 			propagate = !parent->nr_active++;
 			parent->children_weight += pos->weight;
 		}

From 8ee6be5563a5bdb9a5df5f88f8bbbab8930fe6da Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 27 Aug 2014 09:13:15 +0200
Subject: [PATCH 0750/1185] HID: picolcd: sanity check report size in
 raw_event() callback

commit 844817e47eef14141cf59b8d5ac08dd11c0a9189 upstream.

The report passed to us from transport driver could potentially be
arbitrarily large, therefore we better sanity-check it so that raw_data
that we hold in picolcd_pending structure are always kept within proper
bounds.

Reported-by: Steven Vittitoe <scvitti@google.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-picolcd_core.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/hid/hid-picolcd_core.c b/drivers/hid/hid-picolcd_core.c
index acbb021065ec..020df3c2e8b4 100644
--- a/drivers/hid/hid-picolcd_core.c
+++ b/drivers/hid/hid-picolcd_core.c
@@ -350,6 +350,12 @@ static int picolcd_raw_event(struct hid_device *hdev,
 	if (!data)
 		return 1;
 
+	if (size > 64) {
+		hid_warn(hdev, "invalid size value (%d) for picolcd raw event\n",
+				size);
+		return 0;
+	}
+
 	if (report->id == REPORT_KEY_STATE) {
 		if (data->input_keys)
 			ret = picolcd_raw_keypad(data, report, raw_data+1, size-1);

From 11677bd1cc851c9dda154095f62d93b2c34afc9c Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 27 Aug 2014 09:12:24 +0200
Subject: [PATCH 0751/1185] HID: magicmouse: sanity check report size in
 raw_event() callback

commit c54def7bd64d7c0b6993336abcffb8444795bf38 upstream.

The report passed to us from transport driver could potentially be
arbitrarily large, therefore we better sanity-check it so that
magicmouse_emit_touch() gets only valid values of raw_id.

Reported-by: Steven Vittitoe <scvitti@google.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-magicmouse.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 5bc37343eb22..c24f3dfd9367 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -290,6 +290,11 @@ static int magicmouse_raw_event(struct hid_device *hdev,
 		if (size < 4 || ((size - 4) % 9) != 0)
 			return 0;
 		npoints = (size - 4) / 9;
+		if (npoints > 15) {
+			hid_warn(hdev, "invalid size value (%d) for TRACKPAD_REPORT_ID\n",
+					size);
+			return 0;
+		}
 		msc->ntouches = 0;
 		for (ii = 0; ii < npoints; ii++)
 			magicmouse_emit_touch(msc, ii, data + ii * 9 + 4);
@@ -307,6 +312,11 @@ static int magicmouse_raw_event(struct hid_device *hdev,
 		if (size < 6 || ((size - 6) % 8) != 0)
 			return 0;
 		npoints = (size - 6) / 8;
+		if (npoints > 15) {
+			hid_warn(hdev, "invalid size value (%d) for MOUSE_REPORT_ID\n",
+					size);
+			return 0;
+		}
 		msc->ntouches = 0;
 		for (ii = 0; ii < npoints; ii++)
 			magicmouse_emit_touch(msc, ii, data + ii * 8 + 6);

From 0fbcd4c2187ebb4fdba5f7a6a15992458c634cda Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 22 Aug 2014 16:16:05 -0400
Subject: [PATCH 0752/1185] HID: logitech-dj: prevent false errors to be shown

commit 5abfe85c1d4694d5d4bbd13ecc166262b937adf0 upstream.

Commit "HID: logitech: perform bounds checking on device_id early
enough" unfortunately leaks some errors to dmesg which are not real
ones:
- if the report is not a DJ one, then there is not point in checking
  the device_id
- the receiver (index 0) can also receive some notifications which
  can be safely ignored given the current implementation

Move out the test regarding the report_id and also discards
printing errors when the receiver got notified.

Fixes: ad3e14d7c5268c2e24477c6ef54bbdf88add5d36

Reported-and-tested-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-logitech-dj.c | 43 ++++++++++++++++++++---------------
 drivers/hid/hid-logitech-dj.h |  1 +
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 51b1a5088c0d..d4c6d9f85ca5 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -679,7 +679,6 @@ static int logi_dj_raw_event(struct hid_device *hdev,
 	struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
 	struct dj_report *dj_report = (struct dj_report *) data;
 	unsigned long flags;
-	bool report_processed = false;
 
 	dbg_hid("%s, size:%d\n", __func__, size);
 
@@ -706,34 +705,42 @@ static int logi_dj_raw_event(struct hid_device *hdev,
 	 * device (via hid_input_report() ) and return 1 so hid-core does not do
 	 * anything else with it.
 	 */
+
+	/* case 1) */
+	if (data[0] != REPORT_ID_DJ_SHORT)
+		return false;
+
 	if ((dj_report->device_index < DJ_DEVICE_INDEX_MIN) ||
 	    (dj_report->device_index > DJ_DEVICE_INDEX_MAX)) {
-		dev_err(&hdev->dev, "%s: invalid device index:%d\n",
+		/*
+		 * Device index is wrong, bail out.
+		 * This driver can ignore safely the receiver notifications,
+		 * so ignore those reports too.
+		 */
+		if (dj_report->device_index != DJ_RECEIVER_INDEX)
+			dev_err(&hdev->dev, "%s: invalid device index:%d\n",
 				__func__, dj_report->device_index);
 		return false;
 	}
 
 	spin_lock_irqsave(&djrcv_dev->lock, flags);
-	if (dj_report->report_id == REPORT_ID_DJ_SHORT) {
-		switch (dj_report->report_type) {
-		case REPORT_TYPE_NOTIF_DEVICE_PAIRED:
-		case REPORT_TYPE_NOTIF_DEVICE_UNPAIRED:
-			logi_dj_recv_queue_notification(djrcv_dev, dj_report);
-			break;
-		case REPORT_TYPE_NOTIF_CONNECTION_STATUS:
-			if (dj_report->report_params[CONNECTION_STATUS_PARAM_STATUS] ==
-			    STATUS_LINKLOSS) {
-				logi_dj_recv_forward_null_report(djrcv_dev, dj_report);
-			}
-			break;
-		default:
-			logi_dj_recv_forward_report(djrcv_dev, dj_report);
+	switch (dj_report->report_type) {
+	case REPORT_TYPE_NOTIF_DEVICE_PAIRED:
+	case REPORT_TYPE_NOTIF_DEVICE_UNPAIRED:
+		logi_dj_recv_queue_notification(djrcv_dev, dj_report);
+		break;
+	case REPORT_TYPE_NOTIF_CONNECTION_STATUS:
+		if (dj_report->report_params[CONNECTION_STATUS_PARAM_STATUS] ==
+		    STATUS_LINKLOSS) {
+			logi_dj_recv_forward_null_report(djrcv_dev, dj_report);
 		}
-		report_processed = true;
+		break;
+	default:
+		logi_dj_recv_forward_report(djrcv_dev, dj_report);
 	}
 	spin_unlock_irqrestore(&djrcv_dev->lock, flags);
 
-	return report_processed;
+	return true;
 }
 
 static int logi_dj_probe(struct hid_device *hdev,
diff --git a/drivers/hid/hid-logitech-dj.h b/drivers/hid/hid-logitech-dj.h
index 4a4000340ce1..daeb0aa4bee9 100644
--- a/drivers/hid/hid-logitech-dj.h
+++ b/drivers/hid/hid-logitech-dj.h
@@ -27,6 +27,7 @@
 
 #define DJ_MAX_PAIRED_DEVICES			6
 #define DJ_MAX_NUMBER_NOTIFICATIONS		8
+#define DJ_RECEIVER_INDEX			0
 #define DJ_DEVICE_INDEX_MIN 			1
 #define DJ_DEVICE_INDEX_MAX 			6
 

From da40a1656503bf7883d41ff02760475810ec794f Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 27 Aug 2014 18:41:19 +0200
Subject: [PATCH 0753/1185] drm/i915: Remove bogus __init annotation from DMI
 callbacks

commit bbe1c2740d3a25aa1dbe5d842d2ff09cddcdde0a upstream.

The __init annotations for the DMI callback functions are wrong as this
code can be called even after the module has been initialized, e.g. like
this:

  # echo 1 > /sys/bus/pci/devices/0000:00:02.0/remove
  # modprobe i915
  # echo 1 > /sys/bus/pci/rescan

The first command will remove the PCI device from the kernel's device
list so the second command won't see it right away. But as it registers
a PCI driver it'll see it on the third command. If the system happens to
match one of the DMI table entries we'll try to call a function in long
released memory and generate an Oops, at best.

Fix this by removing the bogus annotation.

Modpost should have caught that one but it ignores section reference
mismatches from the .rodata section. :/

Fixes: 25e341cfc33d ("drm/i915: quirk away broken OpRegion VBT")
Fixes: 8ca4013d702d ("CHROMIUM: i915: Add DMI override to skip CRT...")
Fixes: 425d244c8670 ("drm/i915: ignore LVDS on intel graphics systems...")
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Duncan Laurie <dlaurie@chromium.org>
Cc: Jarod Wilson <jarod@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>	# Can modpost be fixed?
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_bios.c | 2 +-
 drivers/gpu/drm/i915/intel_crt.c  | 2 +-
 drivers/gpu/drm/i915/intel_lvds.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 95070b2124c6..49acec155046 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -657,7 +657,7 @@ init_vbt_defaults(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_KMS("Set default to SSC at %dMHz\n", dev_priv->lvds_ssc_freq);
 }
 
-static int __init intel_no_opregion_vbt_callback(const struct dmi_system_id *id)
+static int intel_no_opregion_vbt_callback(const struct dmi_system_id *id)
 {
 	DRM_DEBUG_KMS("Falling back to manually reading VBT from "
 		      "VBIOS ROM for %s\n",
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 4a809969c5ac..53435a9d847e 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -702,7 +702,7 @@ static const struct drm_encoder_funcs intel_crt_enc_funcs = {
 	.destroy = intel_encoder_destroy,
 };
 
-static int __init intel_no_crt_dmi_callback(const struct dmi_system_id *id)
+static int intel_no_crt_dmi_callback(const struct dmi_system_id *id)
 {
 	DRM_INFO("Skipping CRT initialization for %s\n", id->ident);
 	return 1;
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index f77d42f74427..08e8e18b3f85 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -694,7 +694,7 @@ static const struct drm_encoder_funcs intel_lvds_enc_funcs = {
 	.destroy = intel_encoder_destroy,
 };
 
-static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id)
+static int intel_no_lvds_dmi_callback(const struct dmi_system_id *id)
 {
 	DRM_INFO("Skipping LVDS initialization for %s\n", id->ident);
 	return 1;

From b840e3f1733cf7e19895c95ffcc14eeb5a41bc79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 8 Sep 2014 17:43:01 +0300
Subject: [PATCH 0754/1185] drm/i915: Wait for vblank before enabling the TV
 encoder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 7a98948f3b536ca9a077e84966ddc0e9f53726df upstream.

The vblank waits in intel_tv_detect_type() are timing out for some
reason. This is a regression caused removing seemingly useless vblank
waits from the modeset seqeuence in:

 commit 56ef52cad5e37fca89638e4bad598a994ecc3d9f
 Author: Ville Syrjälä <ville.syrjala@linux.intel.com>
 Date:   Thu May 8 19:23:15 2014 +0300

    drm/i915: Kill vblank waits after pipe enable on gmch platforms

So it turns out they weren't all entirely useless. Apparently the pipe
has to go through one full frame before we enable the TV port. Add a
vblank wait to intel_enable_tv() to make sure that happens.

Another approach was attempted by placing the vblank wait just after
enabling the port. The theory behind that attempt was that we need to
let the port stay enabled for one full frame before disabling it again
during load detection. But that didn't work, and we definitely must
have the vblank wait before enabling the port.

Cc: Alan Bartlett <ajb@elrepo.org>
Tested-by: Alan Bartlett <ajb@elrepo.org>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=79311
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_tv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index a202d8d08c56..7c4e3126df27 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -856,6 +856,10 @@ intel_enable_tv(struct intel_encoder *encoder)
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	/* Prevents vblank waits from timing out in intel_tv_detect_type() */
+	intel_wait_for_vblank(encoder->base.dev,
+			      to_intel_crtc(encoder->base.crtc)->pipe);
+
 	I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE);
 }
 

From d287fccb8d5d5ece2b085ac4119e4a4cb7ffbdaa Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Wed, 10 Sep 2014 12:07:54 +0800
Subject: [PATCH 0755/1185] drm/ast: AST2000 cannot be detected correctly

commit 83502a5d34386f7c6973bc70e1c423f55f5a2e3a upstream.

Type error and cause AST2000 cannot be detected correctly

Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Reviewed-by: Egbert Eich <eich@suse.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/ast/ast_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index f60fd7bd1183..96f874a508e2 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -100,7 +100,7 @@ static int ast_detect_chip(struct drm_device *dev)
 			}
 			ast->vga2_clone = false;
 		} else {
-			ast->chip = 2000;
+			ast->chip = AST2000;
 			DRM_INFO("AST 2000 detected\n");
 		}
 	}

From 93ba9fc346f809e3fd32883219bffbdc04fd9a3c Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 28 Aug 2014 11:53:23 +0200
Subject: [PATCH 0756/1185] drm/vmwgfx: Fix a potential infinite spin waiting
 for fifo idle

commit f01ea0c3d9db536c64d47922716d8b3b8f21d850 upstream.

The code waiting for fifo idle was incorrect and could possibly spin
forever under certain circumstances.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reported-by: Mark Sheldon <markshel@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
Reivewed-by: Mark Sheldon <markshel@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 3eb148667d63..89664933861f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -163,8 +163,9 @@ void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 
 	mutex_lock(&dev_priv->hw_mutex);
 
+	vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
 	while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0)
-		vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
+		;
 
 	dev_priv->last_read_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
 

From 2d144294d75ce6a0715e58608eff6d96c6e6ee0c Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 8 Sep 2014 13:55:51 -0400
Subject: [PATCH 0757/1185] drm/radeon: add connector quirk for fujitsu board

commit 1952f24d0fa6292d65f886887af87ba8ac79b3ba upstream.

Vbios connector table lists non-existent VGA port.

Bug:
https://bugs.freedesktop.org/show_bug.cgi?id=83184

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_atombios.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 43a3303995f8..ba2ab9a9b988 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -463,6 +463,13 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
 		}
 	}
 
+	/* Fujitsu D3003-S2 board lists DVI-I as DVI-I and VGA */
+	if ((dev->pdev->device == 0x9805) &&
+	    (dev->pdev->subsystem_vendor == 0x1734) &&
+	    (dev->pdev->subsystem_device == 0x11bd)) {
+		if (*connector_type == DRM_MODE_CONNECTOR_VGA)
+			return false;
+	}
 
 	return true;
 }

From 9f9e0bc250e49796aac5e50abcc55b73590a57ec Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Sun, 20 Jul 2014 03:38:53 +0400
Subject: [PATCH 0758/1185] xtensa: replace IOCTL code definitions with
 constants

commit f61bf8e7d19e0a3456a7a9ed97c399e4353698dc upstream.

This fixes userspace code that builds on other architectures but fails
on xtensa due to references to structures that other architectures don't
refer to. E.g. this fixes the following issue with python-2.7.8:

  python-2.7.8/Modules/termios.c:861:25: error: invalid application
     of 'sizeof' to incomplete type 'struct serial_multiport_struct'
     {"TIOCSERGETMULTI", TIOCSERGETMULTI},
  python-2.7.8/Modules/termios.c:870:25: error: invalid application
     of 'sizeof' to incomplete type 'struct serial_multiport_struct'
     {"TIOCSERSETMULTI", TIOCSERSETMULTI},
  python-2.7.8/Modules/termios.c:900:24: error: invalid application
     of 'sizeof' to incomplete type 'struct tty_struct'
     {"TIOCTTYGSTRUCT", TIOCTTYGSTRUCT},

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/include/uapi/asm/ioctls.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/xtensa/include/uapi/asm/ioctls.h b/arch/xtensa/include/uapi/asm/ioctls.h
index b4cb1100c0fb..a47909f0c34b 100644
--- a/arch/xtensa/include/uapi/asm/ioctls.h
+++ b/arch/xtensa/include/uapi/asm/ioctls.h
@@ -28,17 +28,17 @@
 #define TCSETSW		0x5403
 #define TCSETSF		0x5404
 
-#define TCGETA		_IOR('t', 23, struct termio)
-#define TCSETA		_IOW('t', 24, struct termio)
-#define TCSETAW		_IOW('t', 25, struct termio)
-#define TCSETAF		_IOW('t', 28, struct termio)
+#define TCGETA		0x80127417	/* _IOR('t', 23, struct termio) */
+#define TCSETA		0x40127418	/* _IOW('t', 24, struct termio) */
+#define TCSETAW		0x40127419	/* _IOW('t', 25, struct termio) */
+#define TCSETAF		0x4012741C	/* _IOW('t', 28, struct termio) */
 
 #define TCSBRK		_IO('t', 29)
 #define TCXONC		_IO('t', 30)
 #define TCFLSH		_IO('t', 31)
 
-#define TIOCSWINSZ	_IOW('t', 103, struct winsize)
-#define TIOCGWINSZ	_IOR('t', 104, struct winsize)
+#define TIOCSWINSZ	0x40087467	/* _IOW('t', 103, struct winsize) */
+#define TIOCGWINSZ	0x80087468	/* _IOR('t', 104, struct winsize) */
 #define	TIOCSTART	_IO('t', 110)		/* start output, like ^Q */
 #define	TIOCSTOP	_IO('t', 111)		/* stop output, like ^S */
 #define TIOCOUTQ        _IOR('t', 115, int)     /* output queue size */
@@ -88,7 +88,6 @@
 #define TIOCSETD	_IOW('T', 35, int)
 #define TIOCGETD	_IOR('T', 36, int)
 #define TCSBRKP		_IOW('T', 37, int)   /* Needed for POSIX tcsendbreak()*/
-#define TIOCTTYGSTRUCT	_IOR('T', 38, struct tty_struct) /* For debugging only*/
 #define TIOCSBRK	_IO('T', 39) 	     /* BSD compatibility */
 #define TIOCCBRK	_IO('T', 40)	     /* BSD compatibility */
 #define TIOCGSID	_IOR('T', 41, pid_t) /* Return the session ID of FD*/
@@ -114,8 +113,10 @@
 #define TIOCSERGETLSR   _IOR('T', 89, unsigned int) /* Get line status reg. */
   /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
 # define TIOCSER_TEMT    0x01		     /* Transmitter physically empty */
-#define TIOCSERGETMULTI _IOR('T', 90, struct serial_multiport_struct) /* Get multiport config  */
-#define TIOCSERSETMULTI _IOW('T', 91, struct serial_multiport_struct) /* Set multiport config */
+#define TIOCSERGETMULTI 0x80a8545a /* Get multiport config  */
+			/* _IOR('T', 90, struct serial_multiport_struct) */
+#define TIOCSERSETMULTI 0x40a8545b /* Set multiport config */
+			/* _IOW('T', 91, struct serial_multiport_struct) */
 
 #define TIOCMIWAIT	_IO('T', 92) /* wait for a change on serial input line(s) */
 #define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */

From 3659bb266d2b84f868e31fd6f94ee338322aa5ff Mon Sep 17 00:00:00 2001
From: Alan Douglas <adouglas@cadence.com>
Date: Wed, 23 Jul 2014 14:06:40 +0400
Subject: [PATCH 0759/1185] xtensa: fix address checks in
 dma_{alloc,free}_coherent

commit 1ca49463c44c970b1ab1d71b0f268bfdf8427a7e upstream.

Virtual address is translated to the XCHAL_KSEG_CACHED region in the
dma_free_coherent, but is checked to be in the 0...XCHAL_KSEG_SIZE
range.

Change check for end of the range from 'addr >= X' to 'addr > X - 1' to
handle the case of X == 0.

Replace 'if (C) BUG();' construct with 'BUG_ON(C);'.

Signed-off-by: Alan Douglas <adouglas@cadence.com>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/kernel/pci-dma.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 2d9cc6dbfd78..e8b76b8e4b29 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -49,9 +49,8 @@ dma_alloc_coherent(struct device *dev,size_t size,dma_addr_t *handle,gfp_t flag)
 
 	/* We currently don't support coherent memory outside KSEG */
 
-	if (ret < XCHAL_KSEG_CACHED_VADDR
-	    || ret >= XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE)
-		BUG();
+	BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
+	       ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
 
 
 	if (ret != 0) {
@@ -68,10 +67,11 @@ EXPORT_SYMBOL(dma_alloc_coherent);
 void dma_free_coherent(struct device *hwdev, size_t size,
 			 void *vaddr, dma_addr_t dma_handle)
 {
-	long addr=(long)vaddr+XCHAL_KSEG_CACHED_VADDR-XCHAL_KSEG_BYPASS_VADDR;
+	unsigned long addr = (unsigned long)vaddr +
+		XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
 
-	if (addr < 0 || addr >= XCHAL_KSEG_SIZE)
-		BUG();
+	BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR ||
+	       addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
 
 	free_pages(addr, get_order(size));
 }

From 86403882943f3da80f6eac728c13b853e3cace5a Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Sun, 27 Jul 2014 07:23:41 +0400
Subject: [PATCH 0760/1185] xtensa: fix access to THREAD_RA/THREAD_SP/THREAD_DS

commit 52247123749cc3cbc30168b33ad8c69515c96d23 upstream.

With SMP and a lot of debug options enabled task_struct::thread gets out
of reach of s32i/l32i instructions with base pointing at task_struct,
breaking build with the following messages:

  arch/xtensa/kernel/entry.S: Assembler messages:
  arch/xtensa/kernel/entry.S:1002: Error: operand 3 of 'l32i.n' has invalid value '1048'
  arch/xtensa/kernel/entry.S:1831: Error: operand 3 of 's32i.n' has invalid value '1040'
  arch/xtensa/kernel/entry.S:1832: Error: operand 3 of 's32i.n' has invalid value '1044'

Change base to point to task_struct::thread in such cases.
Don't use a10 in _switch_to to save/restore prev pointer as a2 is not
clobbered.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/include/asm/uaccess.h |  5 +++++
 arch/xtensa/kernel/entry.S        | 12 ++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index fd686dc45d1a..c7211e7e182d 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h
@@ -52,7 +52,12 @@
  */
 	.macro	get_fs	ad, sp
 	GET_CURRENT(\ad,\sp)
+#if THREAD_CURRENT_DS > 1020
+	addi	\ad, \ad, TASK_THREAD
+	l32i	\ad, \ad, THREAD_CURRENT_DS - TASK_THREAD
+#else
 	l32i	\ad, \ad, THREAD_CURRENT_DS
+#endif
 	.endm
 
 /*
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index aa7f9add7d77..9fd69b2a22b4 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1961,7 +1961,6 @@ ENTRY(_switch_to)
 
 	entry	a1, 16
 
-	mov	a10, a2			# preserve 'prev' (a2)
 	mov	a11, a3			# and 'next' (a3)
 
 	l32i	a4, a2, TASK_THREAD_INFO
@@ -1969,8 +1968,14 @@ ENTRY(_switch_to)
 
 	save_xtregs_user a4 a6 a8 a9 a12 a13 THREAD_XTREGS_USER
 
-	s32i	a0, a10, THREAD_RA	# save return address
-	s32i	a1, a10, THREAD_SP	# save stack pointer
+#if THREAD_RA > 1020 || THREAD_SP > 1020
+	addi	a10, a2, TASK_THREAD
+	s32i	a0, a10, THREAD_RA - TASK_THREAD	# save return address
+	s32i	a1, a10, THREAD_SP - TASK_THREAD	# save stack pointer
+#else
+	s32i	a0, a2, THREAD_RA	# save return address
+	s32i	a1, a2, THREAD_SP	# save stack pointer
+#endif
 
 	/* Disable ints while we manipulate the stack pointer. */
 
@@ -2011,7 +2016,6 @@ ENTRY(_switch_to)
 	load_xtregs_user a5 a6 a8 a9 a12 a13 THREAD_XTREGS_USER
 
 	wsr	a14, ps
-	mov	a2, a10			# return 'prev'
 	rsync
 
 	retw

From af3a54ffc0af4726bf8898a16b339f596fddd89a Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Mon, 21 Jul 2014 22:01:51 +0400
Subject: [PATCH 0761/1185] xtensa: fix TLBTEMP_BASE_2 region handling in
 fast_second_level_miss

commit 7128039fe2dd3d59da9e4ffa036f3aaa3ba87b9f upstream.

Current definition of TLBTEMP_BASE_2 is always 32K above the
TLBTEMP_BASE_1, whereas fast_second_level_miss handler for the TLBTEMP
region analyzes virtual address bit (PAGE_SHIFT + DCACHE_ALIAS_ORDER)
to determine TLBTEMP region where the fault happened. The size of the
TLBTEMP region is also checked incorrectly: not 64K, but twice data
cache way size (whicht may as well be less than the instruction cache
way size).

Fix TLBTEMP_BASE_2 to be TLBTEMP_BASE_1 + data cache way size.
Provide TLBTEMP_SIZE that is a greater of doubled data cache way size or
the instruction cache way size, and use it to determine if the second
level TLB miss occured in the TLBTEMP region.

Practical occurence of page faults in the TLBTEMP area is extremely
rare, this code can be tested by deletion of all w[di]tlb instructions
in the tlbtemp_mapping region.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/include/asm/pgtable.h | 7 ++++++-
 arch/xtensa/kernel/entry.S        | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index d7546c94da52..385efb23ddce 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -68,7 +68,12 @@
 #define VMALLOC_START		0xC0000000
 #define VMALLOC_END		0xC7FEFFFF
 #define TLBTEMP_BASE_1		0xC7FF0000
-#define TLBTEMP_BASE_2		0xC7FF8000
+#define TLBTEMP_BASE_2		(TLBTEMP_BASE_1 + DCACHE_WAY_SIZE)
+#if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE
+#define TLBTEMP_SIZE		(2 * DCACHE_WAY_SIZE)
+#else
+#define TLBTEMP_SIZE		ICACHE_WAY_SIZE
+#endif
 
 /*
  * Xtensa Linux config PTE layout (when present):
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 9fd69b2a22b4..60dece2776ef 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1703,7 +1703,7 @@ ENTRY(fast_second_level_miss)
 	rsr	a0, excvaddr
 	bltu	a0, a3, 2f
 
-	addi	a1, a0, -(2 << (DCACHE_ALIAS_ORDER + PAGE_SHIFT))
+	addi	a1, a0, -TLBTEMP_SIZE
 	bgeu	a1, a3, 2f
 
 	/* Check if we have to restore an ITLB mapping. */

From 3b4b5a08bdba78dac8d1cbc1505d82b0c8abff2b Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Thu, 31 Jul 2014 22:40:57 +0400
Subject: [PATCH 0762/1185] xtensa: fix a6 and a7 handling in
 fast_syscall_xtensa

commit d1b6ba82a50cecf94be540a3a153aa89d97511a0 upstream.

Remove restoring a6 on some return paths and instead modify and restore
it in a single place, using symbolic name.
Correctly restore a7 from PT_AREG7 in case of illegal a6 value.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/kernel/entry.S | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 60dece2776ef..6e53174f8556 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1121,9 +1121,8 @@ ENTRY(fast_syscall_xtensa)
 	movi	a7, 4			# sizeof(unsigned int)
 	access_ok a3, a7, a0, a2, .Leac	# a0: scratch reg, a2: sp
 
-	addi	a6, a6, -1		# assuming SYS_XTENSA_ATOMIC_SET = 1
-	_bgeui	a6, SYS_XTENSA_COUNT - 1, .Lill
-	_bnei	a6, SYS_XTENSA_ATOMIC_CMP_SWP - 1, .Lnswp
+	_bgeui	a6, SYS_XTENSA_COUNT, .Lill
+	_bnei	a6, SYS_XTENSA_ATOMIC_CMP_SWP, .Lnswp
 
 	/* Fall through for ATOMIC_CMP_SWP. */
 
@@ -1135,27 +1134,26 @@ TRY	s32i	a5, a3, 0		# different, modify value
 	l32i	a7, a2, PT_AREG7	# restore a7
 	l32i	a0, a2, PT_AREG0	# restore a0
 	movi	a2, 1			# and return 1
-	addi	a6, a6, 1		# restore a6 (really necessary?)
 	rfe
 
 1:	l32i	a7, a2, PT_AREG7	# restore a7
 	l32i	a0, a2, PT_AREG0	# restore a0
 	movi	a2, 0			# return 0 (note that we cannot set
-	addi	a6, a6, 1		# restore a6 (really necessary?)
 	rfe
 
 .Lnswp:	/* Atomic set, add, and exg_add. */
 
 TRY	l32i	a7, a3, 0		# orig
+	addi	a6, a6, -SYS_XTENSA_ATOMIC_SET
 	add	a0, a4, a7		# + arg
 	moveqz	a0, a4, a6		# set
+	addi	a6, a6, SYS_XTENSA_ATOMIC_SET
 TRY	s32i	a0, a3, 0		# write new value
 
 	mov	a0, a2
 	mov	a2, a7
 	l32i	a7, a0, PT_AREG7	# restore a7
 	l32i	a0, a0, PT_AREG0	# restore a0
-	addi	a6, a6, 1		# restore a6 (really necessary?)
 	rfe
 
 CATCH
@@ -1164,7 +1162,7 @@ CATCH
 	movi	a2, -EFAULT
 	rfe
 
-.Lill:	l32i	a7, a2, PT_AREG0	# restore a7
+.Lill:	l32i	a7, a2, PT_AREG7	# restore a7
 	l32i	a0, a2, PT_AREG0	# restore a0
 	movi	a2, -EINVAL
 	rfe

From c2360c11fc2805fe8cd66ce41b51654e7159afcf Mon Sep 17 00:00:00 2001
From: Greg KH <gregkh@linuxfoundation.org>
Date: Fri, 15 Aug 2014 15:22:21 +0800
Subject: [PATCH 0763/1185] USB: serial: pl2303: add device id for ztek device

commit 91fcb1ce420e0a5f8d92d556d7008a78bc6ce1eb upstream.

This adds a new device id to the pl2303 driver for the ZTEK device.

Reported-by: Mike Chu <Mike-Chu@prolific.com.tw>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/pl2303.c | 1 +
 drivers/usb/serial/pl2303.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index a0b58e252073..de3e15d8eb10 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -47,6 +47,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_GPRS) },
 	{ USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_HCR331) },
 	{ USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MOTOROLA) },
+	{ USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ZTEK) },
 	{ USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) },
 	{ USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) },
 	{ USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) },
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index 42bc082896ac..71fd9da1d6e7 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -22,6 +22,7 @@
 #define PL2303_PRODUCT_ID_GPRS		0x0609
 #define PL2303_PRODUCT_ID_HCR331	0x331a
 #define PL2303_PRODUCT_ID_MOTOROLA	0x0307
+#define PL2303_PRODUCT_ID_ZTEK		0xe1f1
 
 #define ATEN_VENDOR_ID		0x0557
 #define ATEN_VENDOR_ID2		0x0547

From ecb80e64a68ffe4d7b2278895bb23af43a58a222 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 27 Aug 2014 11:55:18 +0200
Subject: [PATCH 0764/1185] USB: serial: fix potential stack buffer overflow

commit d979e9f9ecab04c1ecca741370e30a8a498893f5 upstream.

Make sure to verify the maximum number of endpoints per type to avoid
writing beyond the end of a stack-allocated array.

The current usb-serial implementation is limited to eight ports per
interface but failed to verify that the number of endpoints of a certain
type reported by a device did not exceed this limit.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/usb-serial.c | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index c1032d42b9d5..a78f01571ef0 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -778,29 +778,39 @@ static int usb_serial_probe(struct usb_interface *interface,
 		if (usb_endpoint_is_bulk_in(endpoint)) {
 			/* we found a bulk in endpoint */
 			dev_dbg(ddev, "found bulk in on endpoint %d\n", i);
-			bulk_in_endpoint[num_bulk_in] = endpoint;
-			++num_bulk_in;
+			if (num_bulk_in < MAX_NUM_PORTS) {
+				bulk_in_endpoint[num_bulk_in] = endpoint;
+				++num_bulk_in;
+			}
 		}
 
 		if (usb_endpoint_is_bulk_out(endpoint)) {
 			/* we found a bulk out endpoint */
 			dev_dbg(ddev, "found bulk out on endpoint %d\n", i);
-			bulk_out_endpoint[num_bulk_out] = endpoint;
-			++num_bulk_out;
+			if (num_bulk_out < MAX_NUM_PORTS) {
+				bulk_out_endpoint[num_bulk_out] = endpoint;
+				++num_bulk_out;
+			}
 		}
 
 		if (usb_endpoint_is_int_in(endpoint)) {
 			/* we found a interrupt in endpoint */
 			dev_dbg(ddev, "found interrupt in on endpoint %d\n", i);
-			interrupt_in_endpoint[num_interrupt_in] = endpoint;
-			++num_interrupt_in;
+			if (num_interrupt_in < MAX_NUM_PORTS) {
+				interrupt_in_endpoint[num_interrupt_in] =
+						endpoint;
+				++num_interrupt_in;
+			}
 		}
 
 		if (usb_endpoint_is_int_out(endpoint)) {
 			/* we found an interrupt out endpoint */
 			dev_dbg(ddev, "found interrupt out on endpoint %d\n", i);
-			interrupt_out_endpoint[num_interrupt_out] = endpoint;
-			++num_interrupt_out;
+			if (num_interrupt_out < MAX_NUM_PORTS) {
+				interrupt_out_endpoint[num_interrupt_out] =
+						endpoint;
+				++num_interrupt_out;
+			}
 		}
 	}
 
@@ -823,8 +833,10 @@ static int usb_serial_probe(struct usb_interface *interface,
 				if (usb_endpoint_is_int_in(endpoint)) {
 					/* we found a interrupt in endpoint */
 					dev_dbg(ddev, "found interrupt in for Prolific device on separate interface\n");
-					interrupt_in_endpoint[num_interrupt_in] = endpoint;
-					++num_interrupt_in;
+					if (num_interrupt_in < MAX_NUM_PORTS) {
+						interrupt_in_endpoint[num_interrupt_in] = endpoint;
+						++num_interrupt_in;
+					}
 				}
 			}
 		}

From cf185a9503439b8b031951ebfe4fa7489f789346 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Mon, 25 Aug 2014 21:07:47 -0700
Subject: [PATCH 0765/1185] USB: sisusb: add device id for Magic Control USB
 video

commit 5b6b80aeb21091ed3030b9b6aae597d81326f1aa upstream.

I have a j5 create (JUA210) USB 2 video device and adding it device id
to SIS USB video gets it to work.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/sisusbvga/sisusb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
index de98906f786d..0aef801edbc1 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.c
+++ b/drivers/usb/misc/sisusbvga/sisusb.c
@@ -3248,6 +3248,7 @@ static const struct usb_device_id sisusb_table[] = {
 	{ USB_DEVICE(0x0711, 0x0918) },
 	{ USB_DEVICE(0x0711, 0x0920) },
 	{ USB_DEVICE(0x0711, 0x0950) },
+	{ USB_DEVICE(0x0711, 0x5200) },
 	{ USB_DEVICE(0x182d, 0x021c) },
 	{ USB_DEVICE(0x182d, 0x0269) },
 	{ }

From faff9fc04c5795b9979061b731297fae52fa2280 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 27 Aug 2014 11:55:19 +0200
Subject: [PATCH 0766/1185] USB: serial: fix potential heap buffer overflow

commit 5654699fb38512bdbfc0f892ce54fce75bdc2bab upstream.

Make sure to verify the number of ports requested by subdriver to avoid
writing beyond the end of fixed-size array in interface data.

The current usb-serial implementation is limited to eight ports per
interface but failed to verify that the number of ports requested by a
subdriver (which could have been determined from device descriptors) did
not exceed this limit.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/usb-serial.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index a78f01571ef0..80d689f0fda9 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -876,6 +876,11 @@ static int usb_serial_probe(struct usb_interface *interface,
 			num_ports = type->num_ports;
 	}
 
+	if (num_ports > MAX_NUM_PORTS) {
+		dev_warn(ddev, "too many ports requested: %d\n", num_ports);
+		num_ports = MAX_NUM_PORTS;
+	}
+
 	serial->num_ports = num_ports;
 	serial->num_bulk_in = num_bulk_in;
 	serial->num_bulk_out = num_bulk_out;

From 0ebb8960c83270d178ab80cec2550688f6414282 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 29 Jul 2014 14:14:55 +0200
Subject: [PATCH 0767/1185] USB: option: reduce interrupt-urb logging verbosity

commit f0e4cba2534cd88476dff920727c81350130f3c5 upstream.

Do not log normal interrupt-urb shutdowns as errors.

The option driver has always been logging any nonzero interrupt-urb
status as an error, including when the urb is killed during normal
operation.

Commit 9096f1fbba91 ("USB: usb_wwan: fix potential NULL-deref at
resume") moved the interrupt urb submission from port probe and release
to open and close, thus potentially increasing the number of these
false-positive error messages dramatically.

Reported-by: Ed Butler <ressy66@ausics.net>
Tested-by: Ed Butler <ressy66@ausics.net>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 9da566a3f5c8..240c0739eefb 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1917,6 +1917,8 @@ static void option_instat_callback(struct urb *urb)
 			dev_dbg(dev, "%s: type %x req %x\n", __func__,
 				req_pkt->bRequestType, req_pkt->bRequest);
 		}
+	} else if (status == -ENOENT || status == -ESHUTDOWN) {
+		dev_dbg(dev, "%s: urb stopped: %d\n", __func__, status);
 	} else
 		dev_err(dev, "%s: error %d\n", __func__, status);
 

From 6d39643334ca8e3cd38d656546fc148b9ba4588b Mon Sep 17 00:00:00 2001
From: Brennan Ashton <bashton@brennanashton.com>
Date: Wed, 6 Aug 2014 08:46:44 -0700
Subject: [PATCH 0768/1185] USB: option: add VIA Telecom CDS7 chipset device id

commit d77302739d900bbca5e901a3b7ac48c907ee6c93 upstream.

This VIA Telecom baseband processor is used is used by by u-blox in both the
FW2770 and FW2760 products and may be used in others as well.

This patch has been tested on both of these modem versions.

Signed-off-by: Brennan Ashton <bashton@brennanashton.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 240c0739eefb..bd5606b64613 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -494,6 +494,10 @@ static void option_instat_callback(struct urb *urb);
 #define INOVIA_VENDOR_ID			0x20a6
 #define INOVIA_SEW858				0x1105
 
+/* VIA Telecom */
+#define VIATELECOM_VENDOR_ID			0x15eb
+#define VIATELECOM_PRODUCT_CDS7			0x0001
+
 /* some devices interfaces need special handling due to a number of reasons */
 enum option_blacklist_reason {
 		OPTION_BLACKLIST_NONE = 0,
@@ -1724,6 +1728,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
 	{ USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) },
+	{ USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) },
 	{ } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);

From 905836d85c4dab0f27b617eacaa13e5f2b9e2cc5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 7 Aug 2014 16:00:13 +0200
Subject: [PATCH 0769/1185] Revert "USB: option,zte_ev: move most ZTE CDMA
 devices to zte_ev"

commit 63a901c06e3c2c45bd601916fe04e870e9ccae1e upstream.

This reverts commit 73228a0538a7 ("USB: option,zte_ev: move most ZTE
CDMA devices to zte_ev").

Move the IDs of the devices that were previously driven by the option
driver back to that driver.

As several users have reported, the zte_ev driver is causing random
disconnects as well as reconnect failures.

A closer analysis of the zte_ev setup code reveals that it consists of
standard CDC requests (SET/GET_LINE_CODING and SET_CONTROL_LINE_STATE)
but unfortunately fails to get some of those right. In particular, as
reported by Liu Lei, it fails to lower DTR/RTS on close. It also appears
that the control requests lack the interface argument.

Note that the zte_ev driver is based on code (once) distributed by ZTE
that still appears to originally have been reverse-engineered and bolted
onto the generic driver.

Since line control is already handled properly by the option driver, and
the SET/GET_LINE_CODING requests appears to be redundant (amounts to a
SET 9600 8N1), this is a first step in ultimately removing the redundant
zte_ev driver.

Note that AC2726 had already been moved back to option, and that some
IDs were in the device table of both drivers prior to the commit being
reverted.

Reported-by: Lei Liu <liu.lei78@zte.com.cn>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 24 +++++++++++++++++++++---
 drivers/usb/serial/zte_ev.c | 18 ------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index bd5606b64613..e47aabe0c760 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -275,8 +275,12 @@ static void option_instat_callback(struct urb *urb);
 #define ZTE_PRODUCT_MF622			0x0001
 #define ZTE_PRODUCT_MF628			0x0015
 #define ZTE_PRODUCT_MF626			0x0031
-#define ZTE_PRODUCT_MC2718			0xffe8
 #define ZTE_PRODUCT_AC2726			0xfff1
+#define ZTE_PRODUCT_CDMA_TECH			0xfffe
+#define ZTE_PRODUCT_AC8710T			0xffff
+#define ZTE_PRODUCT_MC2718			0xffe8
+#define ZTE_PRODUCT_AD3812			0xffeb
+#define ZTE_PRODUCT_MC2716			0xffed
 
 #define BENQ_VENDOR_ID				0x04a5
 #define BENQ_PRODUCT_H10			0x4068
@@ -531,10 +535,18 @@ static const struct option_blacklist_info zte_k3765_z_blacklist = {
 	.reserved = BIT(4),
 };
 
+static const struct option_blacklist_info zte_ad3812_z_blacklist = {
+	.sendsetup = BIT(0) | BIT(1) | BIT(2),
+};
+
 static const struct option_blacklist_info zte_mc2718_z_blacklist = {
 	.sendsetup = BIT(1) | BIT(2) | BIT(3) | BIT(4),
 };
 
+static const struct option_blacklist_info zte_mc2716_z_blacklist = {
+	.sendsetup = BIT(1) | BIT(2) | BIT(3),
+};
+
 static const struct option_blacklist_info huawei_cdc12_blacklist = {
 	.reserved = BIT(1) | BIT(2),
 };
@@ -1074,6 +1086,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1012, 0xff) },
 	{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) },
 	{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) },
+	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
@@ -1548,13 +1561,18 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff93, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff94, 0xff, 0xff, 0xff) },
 
-	/* NOTE: most ZTE CDMA devices should be driven by zte_ev, not option */
+	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_CDMA_TECH, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC2726, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC8710T, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MC2718, 0xff, 0xff, 0xff),
 	 .driver_info = (kernel_ulong_t)&zte_mc2718_z_blacklist },
+	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AD3812, 0xff, 0xff, 0xff),
+	 .driver_info = (kernel_ulong_t)&zte_ad3812_z_blacklist },
+	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MC2716, 0xff, 0xff, 0xff),
+	 .driver_info = (kernel_ulong_t)&zte_mc2716_z_blacklist },
 	{ USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x01) },
 	{ USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x05) },
 	{ USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x86, 0x10) },
-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC2726, 0xff, 0xff, 0xff) },
 
 	{ USB_DEVICE(BENQ_VENDOR_ID, BENQ_PRODUCT_H10) },
 	{ USB_DEVICE(DLINK_VENDOR_ID, DLINK_PRODUCT_DWM_652) },
diff --git a/drivers/usb/serial/zte_ev.c b/drivers/usb/serial/zte_ev.c
index eae2c873b39f..5dbc859400c0 100644
--- a/drivers/usb/serial/zte_ev.c
+++ b/drivers/usb/serial/zte_ev.c
@@ -273,27 +273,9 @@ static void zte_ev_usb_serial_close(struct usb_serial_port *port)
 }
 
 static const struct usb_device_id id_table[] = {
-	/* AC8710, AC8710T */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x19d2, 0xffff, 0xff, 0xff, 0xff) },
-	 /* AC8700 */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x19d2, 0xfffe, 0xff, 0xff, 0xff) },
 	/* MG880 */
 	{ USB_DEVICE(0x19d2, 0xfffd) },
-	{ USB_DEVICE(0x19d2, 0xfffc) },
-	{ USB_DEVICE(0x19d2, 0xfffb) },
-	/* AC8710_V3 */
-	{ USB_DEVICE(0x19d2, 0xfff6) },
-	{ USB_DEVICE(0x19d2, 0xfff7) },
-	{ USB_DEVICE(0x19d2, 0xfff8) },
-	{ USB_DEVICE(0x19d2, 0xfff9) },
-	{ USB_DEVICE(0x19d2, 0xffee) },
-	/* AC2716, MC2716 */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x19d2, 0xffed, 0xff, 0xff, 0xff) },
-	/* AD3812 */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x19d2, 0xffeb, 0xff, 0xff, 0xff) },
-	{ USB_DEVICE(0x19d2, 0xffec) },
 	{ USB_DEVICE(0x05C6, 0x3197) },
-	{ USB_DEVICE(0x05C6, 0x6000) },
 	{ USB_DEVICE(0x05C6, 0x9008) },
 	{ },
 };

From 25cbca9af6e70d963a93150793c254d4c0040ee0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 7 Aug 2014 16:00:14 +0200
Subject: [PATCH 0770/1185] USB: zte_ev: remove duplicate Gobi PID

commit 95be5739588c56a9327e477aa0ba3c81c5cf8631 upstream.

Remove dublicate Gobi PID 0x9008 which is already handled by the
qcserial driver since commit f05932c0caf4 ("USB: qcserial: Add extra
device IDs").

Fixes: 799ee9243d89 ("USB: serial: add zte_ev.c driver")
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/zte_ev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/usb/serial/zte_ev.c b/drivers/usb/serial/zte_ev.c
index 5dbc859400c0..78ff536ed58e 100644
--- a/drivers/usb/serial/zte_ev.c
+++ b/drivers/usb/serial/zte_ev.c
@@ -276,7 +276,6 @@ static const struct usb_device_id id_table[] = {
 	/* MG880 */
 	{ USB_DEVICE(0x19d2, 0xfffd) },
 	{ USB_DEVICE(0x05C6, 0x3197) },
-	{ USB_DEVICE(0x05C6, 0x9008) },
 	{ },
 };
 MODULE_DEVICE_TABLE(usb, id_table);

From bcb8e88e0a769d711174a3322eede58467361642 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 7 Aug 2014 16:00:15 +0200
Subject: [PATCH 0771/1185] USB: zte_ev: remove duplicate Qualcom PID

commit 754eb21c0bbbbc4b8830a9a864b286323b84225f upstream.

Remove dublicate Qualcom PID 0x3197 which is already handled by the
moto-modem driver since commit 6986a978eec7 ("USB: add new moto_modem
driver for some Morotola phones").

Fixes: 799ee9243d89 ("USB: serial: add zte_ev.c driver")
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/zte_ev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/usb/serial/zte_ev.c b/drivers/usb/serial/zte_ev.c
index 78ff536ed58e..88dd32ce5224 100644
--- a/drivers/usb/serial/zte_ev.c
+++ b/drivers/usb/serial/zte_ev.c
@@ -275,7 +275,6 @@ static void zte_ev_usb_serial_close(struct usb_serial_port *port)
 static const struct usb_device_id id_table[] = {
 	/* MG880 */
 	{ USB_DEVICE(0x19d2, 0xfffd) },
-	{ USB_DEVICE(0x05C6, 0x3197) },
 	{ },
 };
 MODULE_DEVICE_TABLE(usb, id_table);

From 8256662acca1bb71dc947bf0ea6d4ff41841e685 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 28 Aug 2014 14:11:23 +0200
Subject: [PATCH 0772/1185] USB: sierra: avoid CDC class functions on "68A3"
 devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 049255f51644c1105775af228396d187402a5934 upstream.

Sierra Wireless Direct IP devices using the 68A3 product ID
can be configured for modes including a CDC ECM class function.
The known example uses interface numbers 12 and 13 for the ECM
control and data interfaces respectively, consistent with CDC
MBIM function interface numbering on other Sierra devices.

It seems cleaner to restrict this driver to the ff/ff/ff
vendor specific interfaces rather than increasing the already
long interface number blacklist.  This should be more future
proof if Sierra adds more class functions using interface
numbers not yet in the blacklist.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/sierra.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 4e4590854123..0553152bae1c 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -282,14 +282,16 @@ static const struct usb_device_id id_table[] = {
 	/* Sierra Wireless HSPA Non-Composite Device */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x6892, 0xFF, 0xFF, 0xFF)},
 	{ USB_DEVICE(0x1199, 0x6893) },	/* Sierra Wireless Device */
-	{ USB_DEVICE(0x1199, 0x68A3), 	/* Sierra Wireless Direct IP modems */
+	/* Sierra Wireless Direct IP modems */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68A3, 0xFF, 0xFF, 0xFF),
 	  .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
 	},
 	/* AT&T Direct IP LTE modems */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68AA, 0xFF, 0xFF, 0xFF),
 	  .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
 	},
-	{ USB_DEVICE(0x0f3d, 0x68A3), 	/* Airprime/Sierra Wireless Direct IP modems */
+	/* Airprime/Sierra Wireless Direct IP modems */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68A3, 0xFF, 0xFF, 0xFF),
 	  .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
 	},
 

From 42c85f01b87932a4bf080b3c483688fa01596d34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 28 Aug 2014 15:08:16 +0200
Subject: [PATCH 0773/1185] USB: sierra: add 1199:68AA device ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 5b3da69285c143b7ea76b3b9f73099ff1093ab73 upstream.

This VID:PID is used for some Direct IP devices behaving
identical to the already supported 0F3D:68AA devices.

Reported-by: Lars Melin <larsm17@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/sierra.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 0553152bae1c..5aaa2b675116 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -286,6 +286,9 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68A3, 0xFF, 0xFF, 0xFF),
 	  .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
 	},
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68AA, 0xFF, 0xFF, 0xFF),
+	  .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
+	},
 	/* AT&T Direct IP LTE modems */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68AA, 0xFF, 0xFF, 0xFF),
 	  .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist

From 3652104a3720a9c1d7f5c805c3391f923d57ae2c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 18 Aug 2014 18:33:11 +0200
Subject: [PATCH 0774/1185] USB: ftdi_sio: add support for NOVITUS Bono E
 thermal printer

commit ee444609dbae8afee420c3243ce4c5f442efb622 upstream.

Add device id for NOVITUS Bono E thermal printer.

Reported-by: Emanuel Koczwara <poczta@emanuelkoczwara.pl>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ftdi_sio.c     | 1 +
 drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 120fff399c10..4235693ba2f7 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -744,6 +744,7 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_NDI_AURORA_SCU_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk },
 	{ USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) },
+	{ USB_DEVICE(NOVITUS_VID, NOVITUS_BONO_E_PID) },
 	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S03_PID) },
 	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_59_PID) },
 	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57A_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 70b0b1d88ae9..8927a5c39b00 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -836,6 +836,12 @@
 #define TELLDUS_VID			0x1781	/* Vendor ID */
 #define TELLDUS_TELLSTICK_PID		0x0C30	/* RF control dongle 433 MHz using FT232RL */
 
+/*
+ * NOVITUS printers
+ */
+#define NOVITUS_VID			0x1a28
+#define NOVITUS_BONO_E_PID		0x6010
+
 /*
  * RT Systems programming cables for various ham radios
  */

From 84ef02bb771786142db40e12239cd9fa90d3888c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 28 Aug 2014 12:46:54 +0200
Subject: [PATCH 0775/1185] USB: zte_ev: fix removed PIDs

commit 3096691011d01cef56b243a5e65431405c07d574 upstream.

Add back some PIDs that were mistakingly remove when reverting commit
73228a0538a7 ("USB: option,zte_ev: move most ZTE CDMA devices to
zte_ev"), which apparently did more than its commit message claimed in
that it not only moved some PIDs from option to zte_ev but also added
some new ones.

Fixes: 63a901c06e3c ("Revert "USB: option,zte_ev: move most ZTE CDMA
devices to zte_ev"")

Reported-by: Lei Liu <lei35151@163.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/zte_ev.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/usb/serial/zte_ev.c b/drivers/usb/serial/zte_ev.c
index 88dd32ce5224..d6a3fbd029be 100644
--- a/drivers/usb/serial/zte_ev.c
+++ b/drivers/usb/serial/zte_ev.c
@@ -273,6 +273,14 @@ static void zte_ev_usb_serial_close(struct usb_serial_port *port)
 }
 
 static const struct usb_device_id id_table[] = {
+	{ USB_DEVICE(0x19d2, 0xffec) },
+	{ USB_DEVICE(0x19d2, 0xffee) },
+	{ USB_DEVICE(0x19d2, 0xfff6) },
+	{ USB_DEVICE(0x19d2, 0xfff7) },
+	{ USB_DEVICE(0x19d2, 0xfff8) },
+	{ USB_DEVICE(0x19d2, 0xfff9) },
+	{ USB_DEVICE(0x19d2, 0xfffb) },
+	{ USB_DEVICE(0x19d2, 0xfffc) },
 	/* MG880 */
 	{ USB_DEVICE(0x19d2, 0xfffd) },
 	{ },

From 7aeede98bdbcbca2956822ce7a871e875663c6b6 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Thu, 11 Sep 2014 13:55:48 +0300
Subject: [PATCH 0776/1185] xhci: Fix null pointer dereference if xhci
 initialization fails

commit c207e7c50f31113c24a9f536fcab1e8a256985d7 upstream.

If xhci initialization fails before the roothub bandwidth
domains (xhci->rh_bw[i]) are allocated it will oops when
trying to access rh_bw members in xhci_mem_cleanup().

Reported-by: Manuel Reimer <manuel.reimer@gmx.de>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index d007f0920126..677f032482f7 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1795,7 +1795,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
 	}
 
 	num_ports = HCS_MAX_PORTS(xhci->hcs_params1);
-	for (i = 0; i < num_ports; i++) {
+	for (i = 0; i < num_ports && xhci->rh_bw; i++) {
 		struct xhci_interval_bw_table *bwt = &xhci->rh_bw[i].bw_table;
 		for (j = 0; j < XHCI_MAX_INTERVAL; j++) {
 			struct list_head *ep = &bwt->interval_bw[j].endpoints;

From 1fdbb939b8b3fb23fd03ba617baca64458449966 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Thu, 11 Sep 2014 13:55:50 +0300
Subject: [PATCH 0777/1185] xhci: fix oops when xhci resumes from hibernate
 with hw lpm capable devices

commit 96044694b8511bc2b04df0776b4ba295cfe005c0 upstream.

Resuming from hibernate (S4) will restart and re-initialize xHC.
The device contexts are freed and will be re-allocated later during device reset.

Usb core will disable link pm in device resume before device reset, which will
try to change the max exit latency, accessing the device contexts before they are re-allocated.

There is no need to zero (disable) the max exit latency when disabling hw lpm
for a freshly re-initialized xHC. So check that device context exists before
doing anything. The max exit latency will be set again after device reset when usb core
enables the link pm.

Reported-by: Imre Deak <imre.deak@intel.com>
Tested-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 9a7088bc634d..10223f2b18d2 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4407,13 +4407,21 @@ static int xhci_change_max_exit_latency(struct xhci_hcd *xhci,
 	int ret;
 
 	spin_lock_irqsave(&xhci->lock, flags);
-	if (max_exit_latency == xhci->devs[udev->slot_id]->current_mel) {
+
+	virt_dev = xhci->devs[udev->slot_id];
+
+	/*
+	 * virt_dev might not exists yet if xHC resumed from hibernate (S4) and
+	 * xHC was re-initialized. Exit latency will be set later after
+	 * hub_port_finish_reset() is done and xhci->devs[] are re-allocated
+	 */
+
+	if (!virt_dev || max_exit_latency == virt_dev->current_mel) {
 		spin_unlock_irqrestore(&xhci->lock, flags);
 		return 0;
 	}
 
 	/* Attempt to issue an Evaluate Context command to change the MEL. */
-	virt_dev = xhci->devs[udev->slot_id];
 	command = xhci->lpm_command;
 	xhci_slot_copy(xhci, command->in_ctx, virt_dev->out_ctx);
 	spin_unlock_irqrestore(&xhci->lock, flags);

From 14200eade5653f9e8bbcb1137e6ce15ae365efa7 Mon Sep 17 00:00:00 2001
From: Joe Lawrence <joe.lawrence@stratus.com>
Date: Wed, 10 Sep 2014 15:07:50 -0400
Subject: [PATCH 0778/1185] usb: hub: take hub->hdev reference when processing
 from eventlist

commit c605f3cdff53a743f6d875b76956b239deca1272 upstream.

During surprise device hotplug removal tests, it was observed that
hub_events may try to call usb_lock_device on a device that has already
been freed. Protect the usb_device by taking out a reference (under the
hub_event_lock) when hub_events pulls it off the list, returning the
reference after hub_events is finished using it.

Signed-off-by: Joe Lawrence <joe.lawrence@stratus.com>
Suggested-by: David Bulkow <david.bulkow@stratus.com> for using kref
Suggested-by: Alan Stern <stern@rowland.harvard.edu> for placement
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index a5631849017f..5ff0abc34535 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4702,9 +4702,10 @@ static void hub_events(void)
 
 		hub = list_entry(tmp, struct usb_hub, event_list);
 		kref_get(&hub->kref);
+		hdev = hub->hdev;
+		usb_get_dev(hdev);
 		spin_unlock_irq(&hub_event_lock);
 
-		hdev = hub->hdev;
 		hub_dev = hub->intfdev;
 		intf = to_usb_interface(hub_dev);
 		dev_dbg(hub_dev, "state %d ports %d chg %04x evt %04x\n",
@@ -4919,6 +4920,7 @@ static void hub_events(void)
 		usb_autopm_put_interface(intf);
  loop_disconnected:
 		usb_unlock_device(hdev);
+		usb_put_dev(hdev);
 		kref_put(&hub->kref, hub_release);
 
         } /* end while (1) */

From 2526df8a65f984ba8da0e83011e3524fcbee10e9 Mon Sep 17 00:00:00 2001
From: Mark <markk@clara.co.uk>
Date: Thu, 11 Sep 2014 13:15:45 +0100
Subject: [PATCH 0779/1185] storage: Add single-LUN quirk for Jaz USB Adapter

commit c66f1c62e85927357e7b3f4c701614dcb5c498a2 upstream.

The Iomega Jaz USB Adapter is a SCSI-USB converter cable. The hardware
seems to be identical to e.g. the Microtech XpressSCSI, using a Shuttle/
SCM chip set. However its firmware restricts it to only work with Jaz
drives.

On connecting the cable a message like this appears four times in the log:
 reset full speed USB device number 4 using uhci_hcd

That's non-fatal but the US_FL_SINGLE_LUN quirk fixes it.

Signed-off-by: Mark Knibbs <markk@clara.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_devs.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 042c83b01046..6e3196d8fa62 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -741,6 +741,12 @@ UNUSUAL_DEV(  0x059b, 0x0001, 0x0100, 0x0100,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
+UNUSUAL_DEV(  0x059b, 0x0040, 0x0100, 0x0100,
+		"Iomega",
+		"Jaz USB Adapter",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_SINGLE_LUN ),
+
 /* Reported by <Hendryk.Pfeiffer@gmx.de> */
 UNUSUAL_DEV(  0x059f, 0x0643, 0x0000, 0x0000,
 		"LaCie",

From e349dba628832538041356f883f414722888492d Mon Sep 17 00:00:00 2001
From: Mark <markk@clara.co.uk>
Date: Tue, 16 Sep 2014 16:22:50 +0100
Subject: [PATCH 0780/1185] USB: storage: Add quirk for Adaptec USBConnect 2000
 USB-to-SCSI Adapter

commit 67d365a57a51fb9dece6a5ceb504aa381cae1e5b upstream.

The Adaptec USBConnect 2000 is another SCSI-USB converter which uses
Shuttle Technology/SCM Microsystems chips. The US_FL_SCM_MULT_TARG quirk is
required to use SCSI devices with ID other than 0.

I don't have a USBConnect 2000, but based on the other entries for Shuttle/
SCM-based converters this patch is very likely correct. I used 0x0000 and
0x9999 for bcdDeviceMin and bcdDeviceMax because I'm not sure which
bcdDevice value the product uses.

Signed-off-by: Mark Knibbs <markk@clara.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_devs.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 6e3196d8fa62..a9f3dee75fc1 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -101,6 +101,12 @@ UNUSUAL_DEV(  0x03f0, 0x4002, 0x0001, 0x0001,
 		"PhotoSmart R707",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_CAPACITY),
 
+UNUSUAL_DEV(  0x03f3, 0x0001, 0x0000, 0x9999,
+		"Adaptec",
+		"USBConnect 2000",
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
+		US_FL_SCM_MULT_TARG ),
+
 /* Reported by Sebastian Kapfer <sebastian_kapfer@gmx.net>
  * and Olaf Hering <olh@suse.de> (different bcd's, same vendor/product)
  * for USB floppies that need the SINGLE_LUN enforcement.

From cffb70c538bec3bfd64cb73a1334329da3f7fd93 Mon Sep 17 00:00:00 2001
From: Mark <markk@clara.co.uk>
Date: Tue, 16 Sep 2014 16:51:41 +0100
Subject: [PATCH 0781/1185] USB: storage: Add quirk for Ariston Technologies
 iConnect USB to SCSI adapter

commit b6a3ed677991558ce09046397a7c4d70530d15b3 upstream.

Hi,

The Ariston Technologies iConnect 025 and iConnect 050 (also known as e.g.
iSCSI-50) are SCSI-USB converters which use Shuttle Technology/SCM
Microsystems chips. Only the connectors differ; both have the same USB ID.
The US_FL_SCM_MULT_TARG quirk is required to use SCSI devices with ID other
than 0.

I don't have one of these, but based on the other entries for Shuttle/
SCM-based converters this patch is very likely correct. I used 0x0000 and
0x9999 for bcdDeviceMin and bcdDeviceMax because I'm not sure which
bcdDevice value the products use.

Signed-off-by: Mark Knibbs <markk@clara.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_devs.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index a9f3dee75fc1..b3bfc99d1eb2 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1979,6 +1979,12 @@ UNUSUAL_DEV(  0x177f, 0x0400, 0x0000, 0x0000,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BULK_IGNORE_TAG | US_FL_MAX_SECTORS_64 ),
 
+UNUSUAL_DEV(  0x1822, 0x0001, 0x0000, 0x9999,
+		"Ariston Technologies",
+		"iConnect USB to SCSI adapter",
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
+		US_FL_SCM_MULT_TARG ),
+
 /* Reported by Hans de Goede <hdegoede@redhat.com>
  * These Appotech controllers are found in Picture Frames, they provide a
  * (buggy) emulation of a cdrom drive which contains the windows software

From 95ca7124d09640594ebb207fccf36d2701a6c7de Mon Sep 17 00:00:00 2001
From: Mark <markk@clara.co.uk>
Date: Wed, 17 Sep 2014 19:15:43 +0100
Subject: [PATCH 0782/1185] USB: storage: Add quirks for Entrega/Xircom USB to
 SCSI converters

commit c80b4495c61636edc58fe1ce300f09f24db28e10 upstream.

This patch adds quirks for Entrega Technologies (later Xircom PortGear) USB-
SCSI converters. They use Shuttle Technology EUSB-01/EUSB-S1 chips. The
US_FL_SCM_MULT_TARG quirk is needed to allow multiple devices on the SCSI
chain to be accessed. Without it only the (single) device with SCSI ID 0
can be used.

The standalone converter sold by Entrega had model number U1-SC25. Xircom
acquired Entrega and re-branded the product line PortGear. The PortGear USB
to SCSI Converter (model PGSCSI) is internally identical to the Entrega
product, but later models may use a different USB ID. The Entrega-branded
units have USB ID 1645:0007, as does my Xircom PGSCSI, but the Windows and
Macintosh drivers also support 085A:0028.

Entrega also sold the "Mac USB Dock", which provides two USB ports, a Mac
(8-pin mini-DIN) serial port and a SCSI port. It appears to the computer as
a four-port hub, USB-serial, and USB-SCSI converters. The USB-SCSI part may
have initially used the same ID as the standalone U1-SC25 (1645:0007), but
later production used 085A:0026.

My Xircom PortGear PGSCSI has bcdDevice=0x0100. Units with bcdDevice=0x0133
probably also exist.

This patch adds quirks for 1645:0007, 085A:0026 and 085A:0028. The Windows
driver INF file also mentions 085A:0032 "PortStation SCSI Module", but I
couldn't find any mention of that actually existing in the wild; perhaps it
was cancelled before release?

Signed-off-by: Mark Knibbs <markk@clara.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_devs.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index b3bfc99d1eb2..7f625306ea80 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1125,6 +1125,18 @@ UNUSUAL_DEV(  0x0851, 0x1543, 0x0200, 0x0200,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NOT_LOCKABLE),
 
+UNUSUAL_DEV(  0x085a, 0x0026, 0x0100, 0x0133,
+		"Xircom",
+		"PortGear USB-SCSI (Mac USB Dock)",
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
+		US_FL_SCM_MULT_TARG ),
+
+UNUSUAL_DEV(  0x085a, 0x0028, 0x0100, 0x0133,
+		"Xircom",
+		"PortGear USB to SCSI Converter",
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
+		US_FL_SCM_MULT_TARG ),
+
 /* Submitted by Jan De Luyck <lkml@kcore.org> */
 UNUSUAL_DEV(  0x08bd, 0x1100, 0x0000, 0x0000,
 		"CITIZEN",
@@ -1957,6 +1969,14 @@ UNUSUAL_DEV(  0x152d, 0x2329, 0x0100, 0x0100,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_SANE_SENSE ),
 
+/* Entrega Technologies U1-SC25 (later Xircom PortGear PGSCSI)
+ * and Mac USB Dock USB-SCSI */
+UNUSUAL_DEV(  0x1645, 0x0007, 0x0100, 0x0133,
+		"Entrega Technologies",
+		"USB to SCSI Converter",
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
+		US_FL_SCM_MULT_TARG ),
+
 /* Reported by Robert Schedel <r.schedel@yahoo.de>
  * Note: this is a 'super top' device like the above 14cd/6600 device */
 UNUSUAL_DEV(  0x1652, 0x6600, 0x0201, 0x0201,

From d72f379cb2981560354de1b1e8e9c553e197d762 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 17 Sep 2014 11:23:54 -0400
Subject: [PATCH 0783/1185] USB: EHCI: unlink QHs even after the controller has
 stopped

commit 7312b5ddd47fee2356baa78c5516ef8e04eed452 upstream.

Old code in ehci-hcd tries to expedite disabling endpoints after the
controller has stopped, by destroying the endpoint's associated QH
without first unlinking the QH.  This was necessary back when the
driver wasn't so careful about keeping track of the controller's
state.

But now we are careful about it, and the driver knows that when the
controller isn't running, no unlinking delay is needed.  Furthermore,
skipping the unlink step will trigger a BUG() in qh_destroy() when the
preceding QH is released, because the link pointer will be non-NULL.

Removing the lines that skip the unlinking step and go directly to
QH_STATE_IDLE fixes the problem.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Joe Lawrence <joe.lawrence@stratus.com>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-hcd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 4518b8189a9e..5a160063176d 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -972,8 +972,6 @@ ehci_endpoint_disable (struct usb_hcd *hcd, struct usb_host_endpoint *ep)
 	}
 
 	qh->exception = 1;
-	if (ehci->rh_state < EHCI_RH_RUNNING)
-		qh->qh_state = QH_STATE_IDLE;
 	switch (qh->qh_state) {
 	case QH_STATE_LINKED:
 	case QH_STATE_COMPLETING:

From e5330465e2dd7cf08d6129389324b32c09e16028 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 3 Sep 2014 16:42:57 -0500
Subject: [PATCH 0784/1185] usb: dwc3: omap: fix ordering for runtime pm calls

commit 81a60b7f5c143ab3cdcd9943c9b4b7c63c32fc31 upstream.

we don't to gate clocks until our children are
done with their remove path.

Fixes: af310e9 (usb: dwc3: omap: use runtime API's to enable clocks)
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-omap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 34638b92500d..cb5f8c44eb3a 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -395,9 +395,9 @@ static int dwc3_omap_remove(struct platform_device *pdev)
 	struct dwc3_omap	*omap = platform_get_drvdata(pdev);
 
 	dwc3_omap_disable_irqs(omap);
+	device_for_each_child(&pdev->dev, NULL, dwc3_omap_remove_core);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-	device_for_each_child(&pdev->dev, NULL, dwc3_omap_remove_core);
 
 	return 0;
 }

From 47415d5452b8945bfbe7e2adb3fa15658b3c3d21 Mon Sep 17 00:00:00 2001
From: Shen Guang <shenguang10@gmail.com>
Date: Wed, 8 Jan 2014 14:45:42 +0800
Subject: [PATCH 0785/1185] usb:hub set hub->change_bits when over-current
 happens

commit 08d1dec6f4054e3613f32051d9b149d4203ce0d2 upstream.

When we are doing compliance test with xHCI, we found that if we
enable CONFIG_USB_SUSPEND and plug in a bad device which causes
over-current condition to the root port, software will not be noticed.
The reason is that current code don't set hub->change_bits in
hub_activate() when over-current happens, and then hub_events() will
not check the port status because it thinks nothing changed.
If CONFIG_USB_SUSPEND is disabled, the interrupt pipe of the hub will
report the change and set hub->event_bits, and then hub_events() will
check what events happened.In this case over-current can be detected.

Signed-off-by: Shen Guang <shenguang10@gmail.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: Frans Klaver <fransklaver@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 5ff0abc34535..b5d42fee8a84 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1165,7 +1165,8 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 			/* Tell khubd to disconnect the device or
 			 * check for a new connection
 			 */
-			if (udev || (portstatus & USB_PORT_STAT_CONNECTION))
+			if (udev || (portstatus & USB_PORT_STAT_CONNECTION) ||
+			    (portstatus & USB_PORT_STAT_OVERCURRENT))
 				set_bit(port1, hub->change_bits);
 
 		} else if (portstatus & USB_PORT_STAT_ENABLE) {

From 648d333d7d845d4364d0fab9d31ebb48e4641bfb Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Thu, 18 Sep 2014 09:13:17 -0400
Subject: [PATCH 0786/1185] NFSv4: nfs4_state_manager() vs.
 nfs_server_remove_lists()

commit 080af20cc945d110f9912d01cf6b66f94a375b8d upstream.

There is a race between nfs4_state_manager() and
nfs_server_remove_lists() that happens during a nfsv3 mount.

The v3 mount notices there is already a supper block so
nfs_server_remove_lists() called which uses the nfs_client_lock
spin lock to synchronize access to the client list.

At the same time nfs4_state_manager() is running through
the client list looking for work to do, using the same
lock. When nfs4_state_manager() wins the race to the
list, a v3 client pointer is found and not ignored
properly which causes the panic.

Moving some protocol checks before the state checking
avoids the panic.

Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4client.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 02773aab43c5..cc143ee7a56e 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -311,6 +311,16 @@ int nfs40_walk_client_list(struct nfs_client *new,
 
 	spin_lock(&nn->nfs_client_lock);
 	list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+
+		if (pos->rpc_ops != new->rpc_ops)
+			continue;
+
+		if (pos->cl_proto != new->cl_proto)
+			continue;
+
+		if (pos->cl_minorversion != new->cl_minorversion)
+			continue;
+
 		/* If "pos" isn't marked ready, we can't trust the
 		 * remaining fields in "pos" */
 		if (pos->cl_cons_state > NFS_CS_READY) {
@@ -330,15 +340,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
 		if (pos->cl_cons_state != NFS_CS_READY)
 			continue;
 
-		if (pos->rpc_ops != new->rpc_ops)
-			continue;
-
-		if (pos->cl_proto != new->cl_proto)
-			continue;
-
-		if (pos->cl_minorversion != new->cl_minorversion)
-			continue;
-
 		if (pos->cl_clientid != new->cl_clientid)
 			continue;
 
@@ -444,6 +445,16 @@ int nfs41_walk_client_list(struct nfs_client *new,
 
 	spin_lock(&nn->nfs_client_lock);
 	list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+
+		if (pos->rpc_ops != new->rpc_ops)
+			continue;
+
+		if (pos->cl_proto != new->cl_proto)
+			continue;
+
+		if (pos->cl_minorversion != new->cl_minorversion)
+			continue;
+
 		/* If "pos" isn't marked ready, we can't trust the
 		 * remaining fields in "pos", especially the client
 		 * ID and serverowner fields.  Wait for CREATE_SESSION
@@ -469,15 +480,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
 		if (pos->cl_cons_state != NFS_CS_READY)
 			continue;
 
-		if (pos->rpc_ops != new->rpc_ops)
-			continue;
-
-		if (pos->cl_proto != new->cl_proto)
-			continue;
-
-		if (pos->cl_minorversion != new->cl_minorversion)
-			continue;
-
 		if (!nfs4_match_clientids(pos, new))
 			continue;
 

From a6ac6009711ee484135051ee8d7f7bc58438f2bb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 18 Sep 2014 11:51:32 -0400
Subject: [PATCH 0787/1185] NFSv4: Fix another bug in the close/open_downgrade
 code

commit cd9288ffaea4359d5cfe2b8d264911506aed26a4 upstream.

James Drew reports another bug whereby the NFS client is now sending
an OPEN_DOWNGRADE in a situation where it should really have sent a
CLOSE: the client is opening the file for O_RDWR, but then trying to
do a downgrade to O_RDONLY, which is not allowed by the NFSv4 spec.

Reported-by: James Drews <drews@engr.wisc.edu>
Link: http://lkml.kernel.org/r/541AD7E5.8020409@engr.wisc.edu
Fixes: aee7af356e15 (NFSv4: Fix problems with close in the presence...)
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4proc.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 25559821771b..3fc87b6f9def 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2299,23 +2299,23 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 	is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
 	is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
 	is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
-	/* Calculate the current open share mode */
-	calldata->arg.fmode = 0;
-	if (is_rdonly || is_rdwr)
-		calldata->arg.fmode |= FMODE_READ;
-	if (is_wronly || is_rdwr)
-		calldata->arg.fmode |= FMODE_WRITE;
 	/* Calculate the change in open mode */
+	calldata->arg.fmode = 0;
 	if (state->n_rdwr == 0) {
-		if (state->n_rdonly == 0) {
-			call_close |= is_rdonly || is_rdwr;
-			calldata->arg.fmode &= ~FMODE_READ;
-		}
-		if (state->n_wronly == 0) {
-			call_close |= is_wronly || is_rdwr;
-			calldata->arg.fmode &= ~FMODE_WRITE;
-		}
-	}
+		if (state->n_rdonly == 0)
+			call_close |= is_rdonly;
+		else if (is_rdonly)
+			calldata->arg.fmode |= FMODE_READ;
+		if (state->n_wronly == 0)
+			call_close |= is_wronly;
+		else if (is_wronly)
+			calldata->arg.fmode |= FMODE_WRITE;
+	} else if (is_rdwr)
+		calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
+
+	if (calldata->arg.fmode == 0)
+		call_close |= is_rdwr;
+
 	if (!nfs4_valid_open_stateid(state))
 		call_close = 0;
 	spin_unlock(&state->owner->so_lock);

From 92e40eb084aa0f91f60c1a52e0d3f75201f014b2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 15 Aug 2014 12:11:49 +0100
Subject: [PATCH 0788/1185] ARM: 8128/1: abort: don't clear the exclusive
 monitors

commit 85868313177700d20644263a782351262d2aff84 upstream.

The ARMv6 and ARMv7 early abort handlers clear the exclusive monitors
upon entry to the kernel, but this is redundant:

  - We clear the monitors on every exception return since commit
    200b812d0084 ("Clear the exclusive monitor when returning from an
    exception"), so this is not necessary to ensure the monitors are
    cleared before returning from a fault handler.

  - Any dummy STREX will target a temporary scratch area in memory, and
    may succeed or fail without corrupting useful data. Its status value
    will not be used.

  - Any other STREX in the kernel must be preceded by an LDREX, which
    will initialise the monitors consistently and will not depend on the
    earlier state of the monitors.

Therefore we have no reason to care about the initial state of the
exclusive monitors when a data abort is taken, and clearing the monitors
prior to exception return (as we already do) is sufficient.

This patch removes the redundant clearing of the exclusive monitors from
the early abort handlers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/abort-ev6.S | 6 ------
 arch/arm/mm/abort-ev7.S | 6 ------
 2 files changed, 12 deletions(-)

diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
index 80741992a9fc..5d777a567c35 100644
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -17,12 +17,6 @@
  */
 	.align	5
 ENTRY(v6_early_abort)
-#ifdef CONFIG_CPU_V6
-	sub	r1, sp, #4			@ Get unused stack location
-	strex	r0, r1, [r1]			@ Clear the exclusive monitor
-#elif defined(CONFIG_CPU_32v6K)
-	clrex
-#endif
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
 /*
diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S
index 703375277ba6..4812ad054214 100644
--- a/arch/arm/mm/abort-ev7.S
+++ b/arch/arm/mm/abort-ev7.S
@@ -13,12 +13,6 @@
  */
 	.align	5
 ENTRY(v7_early_abort)
-	/*
-	 * The effect of data aborts on on the exclusive access monitor are
-	 * UNPREDICTABLE. Do a CLREX to clear the state
-	 */
-	clrex
-
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
 

From 824a5f2628d62a196b67d6bde4a7fd43bd3f1abe Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 1 Sep 2014 17:14:29 +0100
Subject: [PATCH 0789/1185] ARM: 8133/1: use irq_set_affinity with force=false
 when migrating irqs

commit a040803a9d6b8c1876d3487a5cb69602ebcbb82c upstream.

Since commit 1dbfa187dad ("ARM: irq migration: force migration off CPU
going down") the ARM interrupt migration code on cpu offline calls
irqchip.irq_set_affinity() with the argument force=true. At the point
of this change the argument had no effect because it was not used by
any interrupt chip driver and there was no semantics defined.

This changed with commit 01f8fa4f01d8 ("genirq: Allow forcing cpu
affinity of interrupts") which made the force argument useful to route
interrupts to not yet online cpus without checking the target cpu
against the cpu online mask. The following commit ffde1de64012
("irqchip: gic: Support forced affinity setting") implemented this for
the GIC interrupt controller.

As a consequence the ARM cpu offline irq migration fails if CPU0 is
offlined, because CPU0 is still set in the affinity mask and the
validataion against cpu online mask is skipped to the force argument
being true. The following first_cpu(mask) selection always selects
CPU0 as the target.

Solve the issue by calling irq_set_affinity() with force=false from
the CPU offline irq migration code so the GIC driver validates the
affinity mask against CPU online mask and therefore removes CPU0 from
the possible target candidates.

Tested on TC2 hotpluging CPU0 in and out. Without this patch the system
locks up as the IRQs are not migrated away from CPU0.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kernel/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 9723d17b8f38..1e782bdeee49 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -163,7 +163,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
 	c = irq_data_get_irq_chip(d);
 	if (!c->irq_set_affinity)
 		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
-	else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret)
+	else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
 		cpumask_copy(d->affinity, affinity);
 
 	return ret;

From 9b21d37838fca95c85b10abf48f3059236ad5d6c Mon Sep 17 00:00:00 2001
From: Dave Martin <dave.martin@linaro.org>
Date: Mon, 25 Nov 2013 14:54:47 +0100
Subject: [PATCH 0790/1185] ARM: 7897/1: kexec: Use the right ISA for
 relocate_new_kernel

commit e2ccba49085ab5d71b092de2a5176eb9b19cc876 upstream.

Copying a function with memcpy() and then trying to execute the
result isn't trivially portable to Thumb.

This patch modifies the kexec soft restart code to copy its
assembler trampoline relocate_new_kernel() using fncpy() instead,
so that relocate_new_kernel can be in the same ISA as the rest of
the kernel without problems.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Reported-by: Taras Kondratiuk <taras.kondratiuk@linaro.org>
Tested-by: Taras Kondratiuk <taras.kondratiuk@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Integrated-by: Liu Hua <sdu.liu@huawei.com>
Signed-off-by: Liu Hua <sdu.liu@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kernel/machine_kexec.c   | 17 ++++++++++-------
 arch/arm/kernel/relocate_kernel.S |  8 ++++++--
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index c3ef920823b6..70ae735dec53 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -14,10 +14,11 @@
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
+#include <asm/fncpy.h>
 #include <asm/mach-types.h>
 #include <asm/system_misc.h>
 
-extern const unsigned char relocate_new_kernel[];
+extern void relocate_new_kernel(void);
 extern const unsigned int relocate_new_kernel_size;
 
 extern unsigned long kexec_start_address;
@@ -133,6 +134,8 @@ void machine_kexec(struct kimage *image)
 {
 	unsigned long page_list;
 	unsigned long reboot_code_buffer_phys;
+	unsigned long reboot_entry = (unsigned long)relocate_new_kernel;
+	unsigned long reboot_entry_phys;
 	void *reboot_code_buffer;
 
 	if (num_online_cpus() > 1) {
@@ -156,18 +159,18 @@ void machine_kexec(struct kimage *image)
 
 
 	/* copy our kernel relocation code to the control code page */
-	memcpy(reboot_code_buffer,
-	       relocate_new_kernel, relocate_new_kernel_size);
+	reboot_entry = fncpy(reboot_code_buffer,
+			     reboot_entry,
+			     relocate_new_kernel_size);
+	reboot_entry_phys = (unsigned long)reboot_entry +
+		(reboot_code_buffer_phys - (unsigned long)reboot_code_buffer);
 
-
-	flush_icache_range((unsigned long) reboot_code_buffer,
-			   (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
 	printk(KERN_INFO "Bye!\n");
 
 	if (kexec_reinit)
 		kexec_reinit();
 
-	soft_restart(reboot_code_buffer_phys);
+	soft_restart(reboot_entry_phys);
 }
 
 void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S
index d0cdedf4864d..95858966d84e 100644
--- a/arch/arm/kernel/relocate_kernel.S
+++ b/arch/arm/kernel/relocate_kernel.S
@@ -2,10 +2,12 @@
  * relocate_kernel.S - put the kernel image in place to boot
  */
 
+#include <linux/linkage.h>
 #include <asm/kexec.h>
 
-	.globl relocate_new_kernel
-relocate_new_kernel:
+	.align	3	/* not needed for this code, but keeps fncpy() happy */
+
+ENTRY(relocate_new_kernel)
 
 	ldr	r0,kexec_indirection_page
 	ldr	r1,kexec_start_address
@@ -79,6 +81,8 @@ kexec_mach_type:
 kexec_boot_atags:
 	.long	0x0
 
+ENDPROC(relocate_new_kernel)
+
 relocate_new_kernel_end:
 
 	.globl relocate_new_kernel_size

From fc7e5dd63936d19bbe2bb002780c06dc1cdd20a1 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 25 Sep 2014 11:56:19 +0100
Subject: [PATCH 0791/1185] ARM: 8165/1: alignment: don't break misaligned NEON
 load/store

commit 5ca918e5e3f9df4634077c06585c42bc6a8d699a upstream.

The alignment fixup incorrectly decodes faulting ARM VLDn/VSTn
instructions (where the optional alignment hint is given but incorrect)
as LDR/STR, leading to register corruption. Detect these and correctly
treat them as unhandled, so that userspace gets the fault it expects.

Reported-by: Simon Hosie <simon.hosie@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/alignment.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 6f4585b89078..1fe0bf5c7375 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -39,6 +39,7 @@
  * This code is not portable to processors with late data abort handling.
  */
 #define CODING_BITS(i)	(i & 0x0e000000)
+#define COND_BITS(i)	(i & 0xf0000000)
 
 #define LDST_I_BIT(i)	(i & (1 << 26))		/* Immediate constant	*/
 #define LDST_P_BIT(i)	(i & (1 << 24))		/* Preindex		*/
@@ -812,6 +813,8 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 		break;
 
 	case 0x04000000:	/* ldr or str immediate */
+		if (COND_BITS(instr) == 0xf0000000) /* NEON VLDn, VSTn */
+			goto bad;
 		offset.un = OFFSET_BITS(instr);
 		handler = do_alignment_ldrstr;
 		break;

From eee78f74dc248ccdcd0e5bc83240a88243407539 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Sun, 20 Jul 2014 19:58:23 +0200
Subject: [PATCH 0792/1185] MIPS: ZBOOT: add missing <linux/string.h> include

commit 29593fd5a8149462ed6fad0d522234facdaee6c8 upstream.

Commit dc4d7b37 (MIPS: ZBOOT: gather string functions into string.c)
moved the string related functions into a separate file, which might
cause the following build error, depending on the configuration:

| CC      arch/mips/boot/compressed/decompress.o
| In file included from linux/arch/mips/boot/compressed/../../../../lib/decompress_unxz.c:234:0,
|                  from linux/arch/mips/boot/compressed/decompress.c:67:
| linux/arch/mips/boot/compressed/../../../../lib/xz/xz_dec_stream.c: In function 'fill_temp':
| linux/arch/mips/boot/compressed/../../../../lib/xz/xz_dec_stream.c:162:2: error: implicit declaration of function 'memcpy' [-Werror=implicit-function-declaration]
| cc1: some warnings being treated as errors
| linux/scripts/Makefile.build:308: recipe for target 'arch/mips/boot/compressed/decompress.o' failed
| make[6]: *** [arch/mips/boot/compressed/decompress.o] Error 1
| linux/arch/mips/Makefile:308: recipe for target 'vmlinuz' failed

It does not fail with the standard configuration, as when
CONFIG_DYNAMIC_DEBUG is not enabled <linux/string.h> gets included in
include/linux/dynamic_debug.h. There might be other ways for it to
get indirectly included.

We can't add the include directly in xz_dec_stream.c as some
architectures might want to use a different version for the boot/
directory (see for example arch/x86/boot/string.h).

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7420/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/boot/compressed/decompress.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index 2c9573098c0d..d498a1f9bccf 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -13,6 +13,7 @@
 
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/string.h>
 
 #include <asm/addrspace.h>
 

From f7feecd70f44ecddb14232de49003a4600482667 Mon Sep 17 00:00:00 2001
From: Markos Chandras <markos.chandras@imgtec.com>
Date: Tue, 16 Sep 2014 15:55:12 +0100
Subject: [PATCH 0793/1185] MIPS: mcount: Adjust stack pointer for static trace
 in MIPS32

commit 8a574cfa2652545eb95595d38ac2a0bb501af0ae upstream.

Every mcount() call in the MIPS 32-bit kernel is done as follows:

[...]
move at, ra
jal _mcount
addiu sp, sp, -8
[...]

but upon returning from the mcount() function, the stack pointer
is not adjusted properly. This is explained in details in 58b69401c797
(MIPS: Function tracer: Fix broken function tracing).

Commit ad8c396936e3 ("MIPS: Unbreak function tracer for 64-bit kernel.)
fixed the stack manipulation for 64-bit but it didn't fix it completely
for MIPS32.

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/7792/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/mcount.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
index 33d067148e61..3efbf0b29c1b 100644
--- a/arch/mips/kernel/mcount.S
+++ b/arch/mips/kernel/mcount.S
@@ -123,7 +123,11 @@ NESTED(_mcount, PT_SIZE, ra)
 	 nop
 #endif
 	b	ftrace_stub
+#ifdef CONFIG_32BIT
+	 addiu sp, sp, 8
+#else
 	 nop
+#endif
 
 static_trace:
 	MCOUNT_SAVE_REGS
@@ -133,6 +137,9 @@ static_trace:
 	 move	a1, AT		/* arg2: parent's return address */
 
 	MCOUNT_RESTORE_REGS
+#ifdef CONFIG_32BIT
+	addiu sp, sp, 8
+#endif
 	.globl ftrace_stub
 ftrace_stub:
 	RETURN_BACK
@@ -181,6 +188,11 @@ NESTED(ftrace_graph_caller, PT_SIZE, ra)
 	jal	prepare_ftrace_return
 	 nop
 	MCOUNT_RESTORE_REGS
+#ifndef CONFIG_DYNAMIC_FTRACE
+#ifdef CONFIG_32BIT
+	addiu sp, sp, 8
+#endif
+#endif
 	RETURN_BACK
 	END(ftrace_graph_caller)
 

From a8c91d3c611d3ae5d3b8fbc585c4ed293d0e5519 Mon Sep 17 00:00:00 2001
From: Bob Moore <Robert.Moore@intel.com>
Date: Tue, 23 Sep 2014 10:35:47 +0800
Subject: [PATCH 0794/1185] ACPICA: Update to GPIO region handler interface.

commit 75ec6e55f1384548311a13ce4fcb39c516053314 upstream.

Changes to correct several GPIO issues:

1) The update_rule in a GPIO field definition is now ignored;
a read-modify-write operation is never performed for GPIO fields.
(Internally, this means that the field assembly/disassembly
code is completely bypassed for GPIO.)

2) The Address parameter passed to a GPIO region handler is
now the bit offset of the field from a previous Connection()
operator. Thus, it becomes a "Pin Number Index" into the
Connection() resource descriptor.

3) The bit_width parameter passed to a GPIO region handler is
now the exact bit width of the GPIO field. Thus, it can be
interpreted as "number of pins".

Overall, we can now say that the region handler interface
to GPIO handlers is a raw "bit/pin" addressed interface, not
a byte-addressed interface like the system_memory handler interface.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpica/aclocal.h  |  1 +
 drivers/acpi/acpica/acobject.h |  1 +
 drivers/acpi/acpica/dsfield.c  |  2 +
 drivers/acpi/acpica/evregion.c | 47 ++++++++++++++++--------
 drivers/acpi/acpica/exfield.c  | 67 ++++++++++++++++++++++++++++++++++
 drivers/acpi/acpica/exprep.c   |  2 +
 6 files changed, 104 insertions(+), 16 deletions(-)

diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index d5bfbd331bfd..95896886fc5a 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -254,6 +254,7 @@ struct acpi_create_field_info {
 	u32 field_bit_position;
 	u32 field_bit_length;
 	u16 resource_length;
+	u16 pin_number_index;
 	u8 field_flags;
 	u8 attribute;
 	u8 field_type;
diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h
index cc7ab6dd724e..a47cc78ffd4f 100644
--- a/drivers/acpi/acpica/acobject.h
+++ b/drivers/acpi/acpica/acobject.h
@@ -263,6 +263,7 @@ struct acpi_object_region_field {
 	ACPI_OBJECT_COMMON_HEADER ACPI_COMMON_FIELD_INFO u16 resource_length;
 	union acpi_operand_object *region_obj;	/* Containing op_region object */
 	u8 *resource_buffer;	/* resource_template for serial regions/fields */
+	u16 pin_number_index;	/* Index relative to previous Connection/Template */
 };
 
 struct acpi_object_bank_field {
diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c
index feadeed1012d..e651d4ec7c4c 100644
--- a/drivers/acpi/acpica/dsfield.c
+++ b/drivers/acpi/acpica/dsfield.c
@@ -360,6 +360,7 @@ acpi_ds_get_field_names(struct acpi_create_field_info *info,
 			 */
 			info->resource_buffer = NULL;
 			info->connection_node = NULL;
+			info->pin_number_index = 0;
 
 			/*
 			 * A Connection() is either an actual resource descriptor (buffer)
@@ -437,6 +438,7 @@ acpi_ds_get_field_names(struct acpi_create_field_info *info,
 			}
 
 			info->field_bit_position += info->field_bit_length;
+			info->pin_number_index++;	/* Index relative to previous Connection() */
 			break;
 
 		default:
diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
index 6555e350fc1f..8fab9262d98a 100644
--- a/drivers/acpi/acpica/evregion.c
+++ b/drivers/acpi/acpica/evregion.c
@@ -141,6 +141,7 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
 	union acpi_operand_object *region_obj2;
 	void *region_context = NULL;
 	struct acpi_connection_info *context;
+	acpi_physical_address address;
 
 	ACPI_FUNCTION_TRACE(ev_address_space_dispatch);
 
@@ -235,25 +236,23 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
 	/* We have everything we need, we can invoke the address space handler */
 
 	handler = handler_desc->address_space.handler;
-
-	ACPI_DEBUG_PRINT((ACPI_DB_OPREGION,
-			  "Handler %p (@%p) Address %8.8X%8.8X [%s]\n",
-			  &region_obj->region.handler->address_space, handler,
-			  ACPI_FORMAT_NATIVE_UINT(region_obj->region.address +
-						  region_offset),
-			  acpi_ut_get_region_name(region_obj->region.
-						  space_id)));
+	address = (region_obj->region.address + region_offset);
 
 	/*
 	 * Special handling for generic_serial_bus and general_purpose_io:
 	 * There are three extra parameters that must be passed to the
 	 * handler via the context:
-	 *   1) Connection buffer, a resource template from Connection() op.
-	 *   2) Length of the above buffer.
-	 *   3) Actual access length from the access_as() op.
+	 *   1) Connection buffer, a resource template from Connection() op
+	 *   2) Length of the above buffer
+	 *   3) Actual access length from the access_as() op
+	 *
+	 * In addition, for general_purpose_io, the Address and bit_width fields
+	 * are defined as follows:
+	 *   1) Address is the pin number index of the field (bit offset from
+	 *      the previous Connection)
+	 *   2) bit_width is the actual bit length of the field (number of pins)
 	 */
-	if (((region_obj->region.space_id == ACPI_ADR_SPACE_GSBUS) ||
-	     (region_obj->region.space_id == ACPI_ADR_SPACE_GPIO)) &&
+	if ((region_obj->region.space_id == ACPI_ADR_SPACE_GSBUS) &&
 	    context && field_obj) {
 
 		/* Get the Connection (resource_template) buffer */
@@ -262,6 +261,24 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
 		context->length = field_obj->field.resource_length;
 		context->access_length = field_obj->field.access_length;
 	}
+	if ((region_obj->region.space_id == ACPI_ADR_SPACE_GPIO) &&
+	    context && field_obj) {
+
+		/* Get the Connection (resource_template) buffer */
+
+		context->connection = field_obj->field.resource_buffer;
+		context->length = field_obj->field.resource_length;
+		context->access_length = field_obj->field.access_length;
+		address = field_obj->field.pin_number_index;
+		bit_width = field_obj->field.bit_length;
+	}
+
+	ACPI_DEBUG_PRINT((ACPI_DB_OPREGION,
+			  "Handler %p (@%p) Address %8.8X%8.8X [%s]\n",
+			  &region_obj->region.handler->address_space, handler,
+			  ACPI_FORMAT_NATIVE_UINT(address),
+			  acpi_ut_get_region_name(region_obj->region.
+						  space_id)));
 
 	if (!(handler_desc->address_space.handler_flags &
 	      ACPI_ADDR_HANDLER_DEFAULT_INSTALLED)) {
@@ -275,9 +292,7 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
 
 	/* Call the handler */
 
-	status = handler(function,
-			 (region_obj->region.address + region_offset),
-			 bit_width, value, context,
+	status = handler(function, address, bit_width, value, context,
 			 region_obj2->extra.region_context);
 
 	if (ACPI_FAILURE(status)) {
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index 7d4bae71e8c6..0108d59665ab 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -178,6 +178,37 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 		buffer = &buffer_desc->integer.value;
 	}
 
+	if ((obj_desc->common.type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
+	    (obj_desc->field.region_obj->region.space_id ==
+	     ACPI_ADR_SPACE_GPIO)) {
+		/*
+		 * For GPIO (general_purpose_io), the Address will be the bit offset
+		 * from the previous Connection() operator, making it effectively a
+		 * pin number index. The bit_length is the length of the field, which
+		 * is thus the number of pins.
+		 */
+		ACPI_DEBUG_PRINT((ACPI_DB_BFIELD,
+				  "GPIO FieldRead [FROM]:  Pin %u Bits %u\n",
+				  obj_desc->field.pin_number_index,
+				  obj_desc->field.bit_length));
+
+		/* Lock entire transaction if requested */
+
+		acpi_ex_acquire_global_lock(obj_desc->common_field.field_flags);
+
+		/* Perform the write */
+
+		status = acpi_ex_access_region(obj_desc, 0,
+					       (u64 *)buffer, ACPI_READ);
+		acpi_ex_release_global_lock(obj_desc->common_field.field_flags);
+		if (ACPI_FAILURE(status)) {
+			acpi_ut_remove_reference(buffer_desc);
+		} else {
+			*ret_buffer_desc = buffer_desc;
+		}
+		return_ACPI_STATUS(status);
+	}
+
 	ACPI_DEBUG_PRINT((ACPI_DB_BFIELD,
 			  "FieldRead [TO]:   Obj %p, Type %X, Buf %p, ByteLen %X\n",
 			  obj_desc, obj_desc->common.type, buffer,
@@ -325,6 +356,42 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 
 		*result_desc = buffer_desc;
 		return_ACPI_STATUS(status);
+	} else if ((obj_desc->common.type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
+		   (obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_GPIO)) {
+		/*
+		 * For GPIO (general_purpose_io), we will bypass the entire field
+		 * mechanism and handoff the bit address and bit width directly to
+		 * the handler. The Address will be the bit offset
+		 * from the previous Connection() operator, making it effectively a
+		 * pin number index. The bit_length is the length of the field, which
+		 * is thus the number of pins.
+		 */
+		if (source_desc->common.type != ACPI_TYPE_INTEGER) {
+			return_ACPI_STATUS(AE_AML_OPERAND_TYPE);
+		}
+
+		ACPI_DEBUG_PRINT((ACPI_DB_BFIELD,
+				  "GPIO FieldWrite [FROM]: (%s:%X), Val %.8X  [TO]:  Pin %u Bits %u\n",
+				  acpi_ut_get_type_name(source_desc->common.
+							type),
+				  source_desc->common.type,
+				  (u32)source_desc->integer.value,
+				  obj_desc->field.pin_number_index,
+				  obj_desc->field.bit_length));
+
+		buffer = &source_desc->integer.value;
+
+		/* Lock entire transaction if requested */
+
+		acpi_ex_acquire_global_lock(obj_desc->common_field.field_flags);
+
+		/* Perform the write */
+
+		status = acpi_ex_access_region(obj_desc, 0,
+					       (u64 *)buffer, ACPI_WRITE);
+		acpi_ex_release_global_lock(obj_desc->common_field.field_flags);
+		return_ACPI_STATUS(status);
 	}
 
 	/* Get a pointer to the data to be written */
diff --git a/drivers/acpi/acpica/exprep.c b/drivers/acpi/acpica/exprep.c
index 6b728aef2dca..df212fe4cf6c 100644
--- a/drivers/acpi/acpica/exprep.c
+++ b/drivers/acpi/acpica/exprep.c
@@ -479,6 +479,8 @@ acpi_status acpi_ex_prep_field_value(struct acpi_create_field_info *info)
 			obj_desc->field.resource_length = info->resource_length;
 		}
 
+		obj_desc->field.pin_number_index = info->pin_number_index;
+
 		/* Allow full data read from EC address space */
 
 		if ((obj_desc->field.region_obj->region.space_id ==

From 070fdd7aac3d8cf53f130f3fc6d02e2cc00549b0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Tue, 26 Aug 2014 12:12:17 +0100
Subject: [PATCH 0795/1185] regmap: Fix handling of volatile registers for
 format_write() chips

commit 5844a8b9d98ec11ce1d77610daacf3f0a0e14715 upstream.

A previous over-zealous factorisation of code means that we only treat
registers as volatile if they are readable. For most devices this is fine
since normally most registers can be read and volatility implies
readability but for format_write() devices where there is no readback from
the hardware and we use volatility to mean simply uncacheability this means
that we end up treating all registers as cacheble.

A bigger refactoring of the code to clarify this is in order but as a fix
make a minimal change and only check readability when checking volatility
if there is no format_write() operation defined for the device.

Signed-off-by: Mark Brown <broonie@linaro.org>
Tested-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/regmap/regmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index d0c81d1f409c..4b5cf2e34e9a 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -114,7 +114,7 @@ bool regmap_readable(struct regmap *map, unsigned int reg)
 
 bool regmap_volatile(struct regmap *map, unsigned int reg)
 {
-	if (!regmap_readable(map, reg))
+	if (!map->format.format_write && !regmap_readable(map, reg))
 		return false;
 
 	if (map->volatile_reg)

From bdbdc4076c10ae9d3a83b37c79fc74827e776bf9 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 11 Jun 2013 23:31:12 -0300
Subject: [PATCH 0796/1185] KVM: x86: handle idiv overflow at kvm_write_tsc

commit 8915aa27d5efbb9185357175b0acf884325565f9 upstream.

Its possible that idivl overflows (due to large delta stored in usdiff,
valid scenario).

Create an exception handler to catch the overflow exception (division by zero
is protected by vcpu->arch.virtual_tsc_khz check), and interpret it accordingly
(delta is larger than USEC_PER_SEC).

Fixes https://bugzilla.redhat.com/show_bug.cgi?id=969644

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Philipp Hahn <hahn@univention.de>
Tested-by: Philipp Hahn <hahn@univention.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1be0a9e75d1f..e8753555f144 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1196,20 +1196,37 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	elapsed = ns - kvm->arch.last_tsc_nsec;
 
 	if (vcpu->arch.virtual_tsc_khz) {
+		int faulted = 0;
+
 		/* n.b - signed multiplication and division required */
 		usdiff = data - kvm->arch.last_tsc_write;
 #ifdef CONFIG_X86_64
 		usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
 #else
 		/* do_div() only does unsigned */
-		asm("idivl %2; xor %%edx, %%edx"
-		: "=A"(usdiff)
-		: "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
+		asm("1: idivl %[divisor]\n"
+		    "2: xor %%edx, %%edx\n"
+		    "   movl $0, %[faulted]\n"
+		    "3:\n"
+		    ".section .fixup,\"ax\"\n"
+		    "4: movl $1, %[faulted]\n"
+		    "   jmp  3b\n"
+		    ".previous\n"
+
+		_ASM_EXTABLE(1b, 4b)
+
+		: "=A"(usdiff), [faulted] "=r" (faulted)
+		: "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
+
 #endif
 		do_div(elapsed, 1000);
 		usdiff -= elapsed;
 		if (usdiff < 0)
 			usdiff = -usdiff;
+
+		/* idivl overflow => difference is larger than USEC_PER_SEC */
+		if (faulted)
+			usdiff = USEC_PER_SEC;
 	} else
 		usdiff = USEC_PER_SEC; /* disable TSC match window below */
 

From 09338bb96a7ce2a8da60a0230e7b01b707eec778 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Tue, 26 Aug 2014 17:06:41 +0800
Subject: [PATCH 0797/1185] x86 early_ioremap: Increase FIX_BTMAPS_SLOTS to 8

commit 3eddc69ffeba092d288c386646bfa5ec0fce25fd upstream.

3.16 kernel boot fail with earlyprintk=efi, it keeps scrolling at the
bottom line of screen.

Bisected, the first bad commit is below:
commit 86dfc6f339886559d80ee0d4bd20fe5ee90450f0
Author: Lv Zheng <lv.zheng@intel.com>
Date:   Fri Apr 4 12:38:57 2014 +0800

    ACPICA: Tables: Fix table checksums verification before installation.

I did some debugging by enabling both serial and efi earlyprintk, below is
some debug dmesg, seems early_ioremap fails in scroll up function due to
no free slot, see below dmesg output:

  WARNING: CPU: 0 PID: 0 at mm/early_ioremap.c:116 __early_ioremap+0x90/0x1c4()
  __early_ioremap(ed00c800, 00000c80) not found slot
  Modules linked in:
  CPU: 0 PID: 0 Comm: swapper Not tainted 3.17.0-rc1+ #204
  Hardware name: Hewlett-Packard HP Z420 Workstation/1589, BIOS J61 v03.15 05/09/2013
  Call Trace:
    dump_stack+0x4e/0x7a
    warn_slowpath_common+0x75/0x8e
    ? __early_ioremap+0x90/0x1c4
    warn_slowpath_fmt+0x47/0x49
    __early_ioremap+0x90/0x1c4
    ? sprintf+0x46/0x48
    early_ioremap+0x13/0x15
    early_efi_map+0x24/0x26
    early_efi_scroll_up+0x6d/0xc0
    early_efi_write+0x1b0/0x214
    call_console_drivers.constprop.21+0x73/0x7e
    console_unlock+0x151/0x3b2
    ? vprintk_emit+0x49f/0x532
    vprintk_emit+0x521/0x532
    ? console_unlock+0x383/0x3b2
    printk+0x4f/0x51
    acpi_os_vprintf+0x2b/0x2d
    acpi_os_printf+0x43/0x45
    acpi_info+0x5c/0x63
    ? __acpi_map_table+0x13/0x18
    ? acpi_os_map_iomem+0x21/0x147
    acpi_tb_print_table_header+0x177/0x186
    acpi_tb_install_table_with_override+0x4b/0x62
    acpi_tb_install_standard_table+0xd9/0x215
    ? early_ioremap+0x13/0x15
    ? __acpi_map_table+0x13/0x18
    acpi_tb_parse_root_table+0x16e/0x1b4
    acpi_initialize_tables+0x57/0x59
    acpi_table_init+0x50/0xce
    acpi_boot_table_init+0x1e/0x85
    setup_arch+0x9b7/0xcc4
    start_kernel+0x94/0x42d
    ? early_idt_handlers+0x120/0x120
    x86_64_start_reservations+0x2a/0x2c
    x86_64_start_kernel+0xf3/0x100

Quote reply from Lv.zheng about the early ioremap slot usage in this case:

"""
In early_efi_scroll_up(), 2 mapping entries will be used for the src/dst screen buffer.
In drivers/acpi/acpica/tbutils.c, we've improved the early table loading code in acpi_tb_parse_root_table().
We now need 2 mapping entries:
1. One mapping entry is used for RSDT table mapping. Each RSDT entry contains an address for another ACPI table.
2. For each entry in RSDP, we need another mapping entry to map the table to perform necessary check/override before installing it.

When acpi_tb_parse_root_table() prints something through EFI earlyprintk console, we'll have 4 mapping entries used.
The current 4 slots setting of early_ioremap() seems to be too small for such a use case.
"""

Thus increase the slot to 8 in this patch to fix this issue.
boot-time mappings become 512 page with this patch.

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/fixmap.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 0dc7d9e21c34..9d7d36c82fc2 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -123,14 +123,14 @@ enum fixed_addresses {
 	__end_of_permanent_fixed_addresses,
 
 	/*
-	 * 256 temporary boot-time mappings, used by early_ioremap(),
+	 * 512 temporary boot-time mappings, used by early_ioremap(),
 	 * before ioremap() is functional.
 	 *
-	 * If necessary we round it up to the next 256 pages boundary so
+	 * If necessary we round it up to the next 512 pages boundary so
 	 * that we can have a single pgd entry and a single pte table:
 	 */
 #define NR_FIX_BTMAPS		64
-#define FIX_BTMAPS_SLOTS	4
+#define FIX_BTMAPS_SLOTS	8
 #define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
 	FIX_BTMAP_END =
 	 (__end_of_permanent_fixed_addresses ^

From 474740b9e629cad9479324efd3926a8e88b04761 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 24 Sep 2014 17:56:17 +0200
Subject: [PATCH 0798/1185] shmem: fix nlink for rename overwrite directory

commit b928095b0a7cff7fb9fcf4c706348ceb8ab2c295 upstream.

If overwriting an empty directory with rename, then need to drop the extra
nlink.

Test prog:

#include <stdio.h>
#include <fcntl.h>
#include <err.h>
#include <sys/stat.h>

int main(void)
{
	const char *test_dir1 = "test-dir1";
	const char *test_dir2 = "test-dir2";
	int res;
	int fd;
	struct stat statbuf;

	res = mkdir(test_dir1, 0777);
	if (res == -1)
		err(1, "mkdir(\"%s\")", test_dir1);

	res = mkdir(test_dir2, 0777);
	if (res == -1)
		err(1, "mkdir(\"%s\")", test_dir2);

	fd = open(test_dir2, O_RDONLY);
	if (fd == -1)
		err(1, "open(\"%s\")", test_dir2);

	res = rename(test_dir1, test_dir2);
	if (res == -1)
		err(1, "rename(\"%s\", \"%s\")", test_dir1, test_dir2);

	res = fstat(fd, &statbuf);
	if (res == -1)
		err(1, "fstat(%i)", fd);

	if (statbuf.st_nlink != 0) {
		fprintf(stderr, "nlink is %lu, should be 0\n", statbuf.st_nlink);
		return 1;
	}

	return 0;
}

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/shmem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 16cc1d77f70a..4e4a7349c5cd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2128,8 +2128,10 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct
 
 	if (new_dentry->d_inode) {
 		(void) shmem_unlink(new_dir, new_dentry);
-		if (they_are_dirs)
+		if (they_are_dirs) {
+			drop_nlink(new_dentry->d_inode);
 			drop_nlink(old_dir);
+		}
 	} else if (they_are_dirs) {
 		drop_nlink(old_dir);
 		inc_nlink(new_dir);

From 8e485734eb4e96853cd14e4d30ea40605b0e8e83 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Thu, 4 Sep 2014 10:52:53 +0300
Subject: [PATCH 0799/1185] ASoC: davinci-mcasp: Correct rx format unit
 configuration

commit fe0a29e163a5d045c73faab682a8dac71c2f8012 upstream.

In case of capture we should not use rotation. The reverse and mask is
enough to get the data align correctly from the bus to MCU:
Format	  data from bus    after reverse (XRBUF)
S16_LE:  |LSB|MSB|xxx|xxx|  |xxx|xxx|MSB|LSB|
S24_3LE: |LSB|DAT|MSB|xxx|  |xxx|MSB|DAT|LSB|
S24_LE:  |LSB|DAT|MSB|xxx|  |xxx|MSB|DAT|LSB|
S32_LE:  |LSB|DAT|DAT|MSB|  |MSB|DAT|DAT|LSB|

With this patch all supported formats will work for playback and capture.

Reported-by: Jyri Sarha <jsarha@ti.com> (broken S24_3LE capture)
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/davinci/davinci-mcasp.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 81490febac6d..ade9d6379c1b 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -632,8 +632,17 @@ static int davinci_config_channel_size(struct davinci_audio_dev *dev,
 {
 	u32 fmt;
 	u32 tx_rotate = (word_length / 4) & 0x7;
-	u32 rx_rotate = (32 - word_length) / 4;
 	u32 mask = (1ULL << word_length) - 1;
+	/*
+	 * For captured data we should not rotate, inversion and masking is
+	 * enoguh to get the data to the right position:
+	 * Format	  data from bus		after reverse (XRBUF)
+	 * S16_LE:	|LSB|MSB|xxx|xxx|	|xxx|xxx|MSB|LSB|
+	 * S24_3LE:	|LSB|DAT|MSB|xxx|	|xxx|MSB|DAT|LSB|
+	 * S24_LE:	|LSB|DAT|MSB|xxx|	|xxx|MSB|DAT|LSB|
+	 * S32_LE:	|LSB|DAT|DAT|MSB|	|MSB|DAT|DAT|LSB|
+	 */
+	u32 rx_rotate = 0;
 
 	/*
 	 * if s BCLK-to-LRCLK ratio has been configured via the set_clkdiv()

From c8d26061c949d8acb7a69b862f2d1a37f36dac44 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Fri, 22 Aug 2014 13:32:09 +0400
Subject: [PATCH 0800/1185] CIFS: Fix directory rename error

commit a07d322059db66b84c9eb4f98959df468e88b34b upstream.

CIFS servers process nlink counts differently for files and directories.
In cifs_rename() if we the request fails on the existing target, we
try to remove it through cifs_unlink() but this is not what we want
to do for directories. As the result the following sequence of commands

mkdir {1,2}; mv -T 1 2; rmdir {1,2}; mkdir {1,2}; echo foo > 2/bar

and XFS test generic/023 fail with -ENOENT error. That's why the second
mkdir reuses the existing inode (target inode of the mv -T command) with
S_DEAD flag.

Fix this by checking whether the target is directory or not and
calling cifs_rmdir() rather than cifs_unlink() for directories.

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/inode.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index c9bce9b43855..0dee93706c98 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1640,7 +1640,10 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
 unlink_target:
 	/* Try unlinking the target dentry if it's not negative */
 	if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) {
-		tmprc = cifs_unlink(target_dir, target_dentry);
+		if (S_ISDIR(target_dentry->d_inode->i_mode))
+			tmprc = cifs_rmdir(target_dir, target_dentry);
+		else
+			tmprc = cifs_unlink(target_dir, target_dentry);
 		if (tmprc)
 			goto cifs_rename_exit;
 		rc = cifs_do_rename(xid, source_dentry, from_name,

From 922798d573981436c85bea14e4757b51fa3f2cc2 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Mon, 18 Aug 2014 20:49:57 +0400
Subject: [PATCH 0801/1185] CIFS: Fix SMB2 readdir error handling

commit 52755808d4525f4d5b86d112d36ffc7a46f3fb48 upstream.

SMB2 servers indicates the end of a directory search with
STATUS_NO_MORE_FILE error code that is not processed now.
This causes generic/257 xfstest to fail. Fix this by triggering
the end of search by this error code in SMB2_query_directory.

Also when negotiating CIFS protocol we tell the server to close
the search automatically at the end and there is no need to do
it itself. In the case of SMB2 protocol, we need to close it
explicitly - separate close directory checks for different
protocols.

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/cifsglob.h     | 2 ++
 fs/cifs/file.c         | 2 +-
 fs/cifs/readdir.c      | 2 +-
 fs/cifs/smb1ops.c      | 7 +++++++
 fs/cifs/smb2maperror.c | 2 +-
 fs/cifs/smb2ops.c      | 8 ++++++++
 fs/cifs/smb2pdu.c      | 9 ++++-----
 7 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 52480240168e..f74dfa89c4c4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -375,6 +375,8 @@ struct smb_version_operations {
 			const char *, u32 *);
 	int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
 			int);
+	/* check if we need to issue closedir */
+	bool (*dir_needs_close)(struct cifsFileInfo *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 97b03895ac8c..5fcc10fa62bd 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -735,7 +735,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
 
 	cifs_dbg(FYI, "Freeing private data in close dir\n");
 	spin_lock(&cifs_file_list_lock);
-	if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+	if (server->ops->dir_needs_close(cfile)) {
 		cfile->invalidHandle = true;
 		spin_unlock(&cifs_file_list_lock);
 		if (server->ops->close_dir)
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 87d125f682cd..85ebdaa21015 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -582,7 +582,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon,
 		/* close and restart search */
 		cifs_dbg(FYI, "search backing up - close and restart search\n");
 		spin_lock(&cifs_file_list_lock);
-		if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+		if (server->ops->dir_needs_close(cfile)) {
 			cfile->invalidHandle = true;
 			spin_unlock(&cifs_file_list_lock);
 			if (server->ops->close_dir)
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 4885a40f3210..610c6c24d41d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -885,6 +885,12 @@ cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
 			   (__u8)type, wait, 0);
 }
 
+static bool
+cifs_dir_needs_close(struct cifsFileInfo *cfile)
+{
+	return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -948,6 +954,7 @@ struct smb_version_operations smb1_operations = {
 	.mand_lock = cifs_mand_lock,
 	.mand_unlock_range = cifs_unlock_range,
 	.push_mand_locks = cifs_push_mandatory_locks,
+	.dir_needs_close = cifs_dir_needs_close,
 #ifdef CONFIG_CIFS_XATTR
 	.query_all_EAs = CIFSSMBQAllEAs,
 	.set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 824696fb24db..4768cf8be6e2 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
 	{STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"},
 	{STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"},
 	{STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"},
-	{STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"},
+	{STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"},
 	{STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"},
 	{STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"},
 	{STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index fe7ac989c6c4..e12f258a5ffa 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -554,6 +554,12 @@ smb2_new_lease_key(struct cifs_fid *fid)
 	get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
 }
 
+static bool
+smb2_dir_needs_close(struct cifsFileInfo *cfile)
+{
+	return !cfile->invalidHandle;
+}
+
 struct smb_version_operations smb21_operations = {
 	.compare_fids = smb2_compare_fids,
 	.setup_request = smb2_setup_request,
@@ -618,6 +624,7 @@ struct smb_version_operations smb21_operations = {
 	.set_lease_key = smb2_set_lease_key,
 	.new_lease_key = smb2_new_lease_key,
 	.calc_signature = smb2_calc_signature,
+	.dir_needs_close = smb2_dir_needs_close,
 };
 
 
@@ -685,6 +692,7 @@ struct smb_version_operations smb30_operations = {
 	.set_lease_key = smb2_set_lease_key,
 	.new_lease_key = smb2_new_lease_key,
 	.calc_signature = smb3_calc_signature,
+	.dir_needs_close = smb2_dir_needs_close,
 };
 
 struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e37790841446..eb0de4c3ca76 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1800,6 +1800,10 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
 	rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
 
 	if (rc) {
+		if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) {
+			srch_inf->endOfSearch = true;
+			rc = 0;
+		}
 		cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
 		goto qdir_exit;
 	}
@@ -1837,11 +1841,6 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
 	else
 		cifs_dbg(VFS, "illegal search buffer type\n");
 
-	if (rsp->hdr.Status == STATUS_NO_MORE_FILES)
-		srch_inf->endOfSearch = 1;
-	else
-		srch_inf->endOfSearch = 0;
-
 	return rc;
 
 qdir_exit:

From 1241d7f2306311538b4283df64093644479810f6 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 22 Aug 2014 21:48:00 +0100
Subject: [PATCH 0802/1185] iio:trigger: modify return value for
 iio_trigger_get

commit f153566570fb9e32c2f59182883f4f66048788fb upstream.

Instead of a void function, return the trigger pointer.

Whilst not in of itself a fix, this makes the following set of
7 fixes cleaner than they would otherwise be.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/iio/trigger.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 3869c525b052..545deb149655 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -83,10 +83,12 @@ static inline void iio_trigger_put(struct iio_trigger *trig)
 	put_device(&trig->dev);
 }
 
-static inline void iio_trigger_get(struct iio_trigger *trig)
+static inline struct iio_trigger *iio_trigger_get(struct iio_trigger *trig)
 {
 	get_device(&trig->dev);
 	__module_get(trig->ops->owner);
+
+	return trig;
 }
 
 /**

From 4fcf3add79df4540e4c18b389e8ee653bfe39cba Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 22 Aug 2014 21:48:00 +0100
Subject: [PATCH 0803/1185] iio: gyro: itg3200: Fix indio_dev->trig assignment

commit 0b4dce2ee694a991ef38203ec5ff91a738518cb3 upstream.

This can result in wrong reference count for trigger device, call
iio_trigger_get to increment reference.
Refer to http://www.spinics.net/lists/linux-iio/msg13669.html for discussion
with Jonathan.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/gyro/itg3200_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c
index 6c43af9bb0a4..14917fae2d9d 100644
--- a/drivers/iio/gyro/itg3200_buffer.c
+++ b/drivers/iio/gyro/itg3200_buffer.c
@@ -135,7 +135,7 @@ int itg3200_probe_trigger(struct iio_dev *indio_dev)
 		goto error_free_irq;
 
 	/* select default trigger */
-	indio_dev->trig = st->trig;
+	indio_dev->trig = iio_trigger_get(st->trig);
 
 	return 0;
 

From cde9b0d1ec4f85fa3602a158c2e3ee5e6d6b925d Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 22 Aug 2014 21:48:00 +0100
Subject: [PATCH 0804/1185] iio: inv_mpu6050: Fix indio_dev->trig assignment

commit b07e3b3850b2e1f09c19f54d3ed7210d9f529e2c upstream.

This can result in wrong reference count for trigger device, call
iio_trigger_get to increment reference.
Refer to http://www.spinics.net/lists/linux-iio/msg13669.html for discussion
with Jonathan.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c
index 03b9372c1212..926fccea8de0 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c
@@ -135,7 +135,7 @@ int inv_mpu6050_probe_trigger(struct iio_dev *indio_dev)
 	ret = iio_trigger_register(st->trig);
 	if (ret)
 		goto error_free_irq;
-	indio_dev->trig = st->trig;
+	indio_dev->trig = iio_trigger_get(st->trig);
 
 	return 0;
 

From e3675a4bd5f72e2551a4f4dfae7330c8b6c854dd Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 22 Aug 2014 21:48:00 +0100
Subject: [PATCH 0805/1185] iio: meter: ade7758: Fix indio_dev->trig assignment

commit 0495081179212b758775df752e657ea71dcae020 upstream.

This can result in wrong reference count for trigger device, call
iio_trigger_get to increment reference.
Refer to http://www.spinics.net/lists/linux-iio/msg13669.html for discussion
with Jonathan.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Acked-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/iio/meter/ade7758_trigger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/iio/meter/ade7758_trigger.c b/drivers/staging/iio/meter/ade7758_trigger.c
index 7a94ddd42f59..8c4f2896cd0d 100644
--- a/drivers/staging/iio/meter/ade7758_trigger.c
+++ b/drivers/staging/iio/meter/ade7758_trigger.c
@@ -85,7 +85,7 @@ int ade7758_probe_trigger(struct iio_dev *indio_dev)
 	ret = iio_trigger_register(st->trig);
 
 	/* select default trigger */
-	indio_dev->trig = st->trig;
+	indio_dev->trig = iio_trigger_get(st->trig);
 	if (ret)
 		goto error_free_irq;
 

From 911575a54ca30553af3b07a85e3d992a1804221f Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 22 Aug 2014 21:48:00 +0100
Subject: [PATCH 0806/1185] iio: st_sensors: Fix indio_dev->trig assignment

commit f0e84acd7056e6d7ade551c6439531606ae30a46 upstream.

This can result in wrong reference count for trigger device, call
iio_trigger_get to increment reference.
Refer to http://www.spinics.net/lists/linux-iio/msg13669.html for discussion
with Jonathan.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/common/st_sensors/st_sensors_trigger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index 8fc3a97eb266..8d8ca6f1e16a 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -49,7 +49,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 		dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
 		goto iio_trigger_register_error;
 	}
-	indio_dev->trig = sdata->trig;
+	indio_dev->trig = iio_trigger_get(sdata->trig);
 
 	return 0;
 

From b81f480569c02f391e6bb28c76e98ab64ce0fc83 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 22 Aug 2014 21:48:00 +0100
Subject: [PATCH 0807/1185] iio: adc: ad_sigma_delta: Fix indio_dev->trig
 assignment

commit 9e5846be33277802c0c76e5c12825d0e4d27f639 upstream.

This can result in wrong reference count for trigger device, call
iio_trigger_get to increment reference.
Refer to http://www.spinics.net/lists/linux-iio/msg13669.html for discussion
with Jonathan.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Acked-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/adc/ad_sigma_delta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c
index f0d6335ae087..05d2733ef48c 100644
--- a/drivers/iio/adc/ad_sigma_delta.c
+++ b/drivers/iio/adc/ad_sigma_delta.c
@@ -477,7 +477,7 @@ static int ad_sd_probe_trigger(struct iio_dev *indio_dev)
 		goto error_free_irq;
 
 	/* select default trigger */
-	indio_dev->trig = sigma_delta->trig;
+	indio_dev->trig = iio_trigger_get(sigma_delta->trig);
 
 	return 0;
 

From 90c5b0fbd466cdff666077aae8626c3ff0a2f2ca Mon Sep 17 00:00:00 2001
From: Denis CIOCCA <denis.ciocca@st.com>
Date: Thu, 9 Oct 2014 13:55:00 +0100
Subject: [PATCH 0808/1185] iio:magnetometer: bugfix magnetometers gain values

commit a31d0928999fbf33b3a6042e8bcb7b7f7e07d094 upstream.

This patch fix gains values. The first driver was designed using
engineering samples, in mass production the values are changed.

Signed-off-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/magnetometer/st_magn_core.c | 52 ++++++++++++++-----------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 16f0d6df239f..3ce3769c0823 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -40,7 +40,8 @@
 #define ST_MAGN_FS_AVL_5600MG			5600
 #define ST_MAGN_FS_AVL_8000MG			8000
 #define ST_MAGN_FS_AVL_8100MG			8100
-#define ST_MAGN_FS_AVL_10000MG			10000
+#define ST_MAGN_FS_AVL_12000MG			12000
+#define ST_MAGN_FS_AVL_16000MG			16000
 
 /* CUSTOM VALUES FOR SENSOR 1 */
 #define ST_MAGN_1_WAI_EXP			0x3c
@@ -67,20 +68,20 @@
 #define ST_MAGN_1_FS_AVL_4700_VAL		0x05
 #define ST_MAGN_1_FS_AVL_5600_VAL		0x06
 #define ST_MAGN_1_FS_AVL_8100_VAL		0x07
-#define ST_MAGN_1_FS_AVL_1300_GAIN_XY		1100
-#define ST_MAGN_1_FS_AVL_1900_GAIN_XY		855
-#define ST_MAGN_1_FS_AVL_2500_GAIN_XY		670
-#define ST_MAGN_1_FS_AVL_4000_GAIN_XY		450
-#define ST_MAGN_1_FS_AVL_4700_GAIN_XY		400
-#define ST_MAGN_1_FS_AVL_5600_GAIN_XY		330
-#define ST_MAGN_1_FS_AVL_8100_GAIN_XY		230
-#define ST_MAGN_1_FS_AVL_1300_GAIN_Z		980
-#define ST_MAGN_1_FS_AVL_1900_GAIN_Z		760
-#define ST_MAGN_1_FS_AVL_2500_GAIN_Z		600
-#define ST_MAGN_1_FS_AVL_4000_GAIN_Z		400
-#define ST_MAGN_1_FS_AVL_4700_GAIN_Z		355
-#define ST_MAGN_1_FS_AVL_5600_GAIN_Z		295
-#define ST_MAGN_1_FS_AVL_8100_GAIN_Z		205
+#define ST_MAGN_1_FS_AVL_1300_GAIN_XY		909
+#define ST_MAGN_1_FS_AVL_1900_GAIN_XY		1169
+#define ST_MAGN_1_FS_AVL_2500_GAIN_XY		1492
+#define ST_MAGN_1_FS_AVL_4000_GAIN_XY		2222
+#define ST_MAGN_1_FS_AVL_4700_GAIN_XY		2500
+#define ST_MAGN_1_FS_AVL_5600_GAIN_XY		3030
+#define ST_MAGN_1_FS_AVL_8100_GAIN_XY		4347
+#define ST_MAGN_1_FS_AVL_1300_GAIN_Z		1020
+#define ST_MAGN_1_FS_AVL_1900_GAIN_Z		1315
+#define ST_MAGN_1_FS_AVL_2500_GAIN_Z		1666
+#define ST_MAGN_1_FS_AVL_4000_GAIN_Z		2500
+#define ST_MAGN_1_FS_AVL_4700_GAIN_Z		2816
+#define ST_MAGN_1_FS_AVL_5600_GAIN_Z		3389
+#define ST_MAGN_1_FS_AVL_8100_GAIN_Z		4878
 #define ST_MAGN_1_MULTIREAD_BIT			false
 
 /* CUSTOM VALUES FOR SENSOR 2 */
@@ -103,10 +104,12 @@
 #define ST_MAGN_2_FS_MASK			0x60
 #define ST_MAGN_2_FS_AVL_4000_VAL		0x00
 #define ST_MAGN_2_FS_AVL_8000_VAL		0x01
-#define ST_MAGN_2_FS_AVL_10000_VAL		0x02
-#define ST_MAGN_2_FS_AVL_4000_GAIN		430
-#define ST_MAGN_2_FS_AVL_8000_GAIN		230
-#define ST_MAGN_2_FS_AVL_10000_GAIN		230
+#define ST_MAGN_2_FS_AVL_12000_VAL		0x02
+#define ST_MAGN_2_FS_AVL_16000_VAL		0x03
+#define ST_MAGN_2_FS_AVL_4000_GAIN		146
+#define ST_MAGN_2_FS_AVL_8000_GAIN		292
+#define ST_MAGN_2_FS_AVL_12000_GAIN		438
+#define ST_MAGN_2_FS_AVL_16000_GAIN		584
 #define ST_MAGN_2_MULTIREAD_BIT			false
 #define ST_MAGN_2_OUT_X_L_ADDR			0x28
 #define ST_MAGN_2_OUT_Y_L_ADDR			0x2a
@@ -252,9 +255,14 @@ static const struct st_sensors st_magn_sensors[] = {
 					.gain = ST_MAGN_2_FS_AVL_8000_GAIN,
 				},
 				[2] = {
-					.num = ST_MAGN_FS_AVL_10000MG,
-					.value = ST_MAGN_2_FS_AVL_10000_VAL,
-					.gain = ST_MAGN_2_FS_AVL_10000_GAIN,
+					.num = ST_MAGN_FS_AVL_12000MG,
+					.value = ST_MAGN_2_FS_AVL_12000_VAL,
+					.gain = ST_MAGN_2_FS_AVL_12000_GAIN,
+				},
+				[3] = {
+					.num = ST_MAGN_FS_AVL_16000MG,
+					.value = ST_MAGN_2_FS_AVL_16000_VAL,
+					.gain = ST_MAGN_2_FS_AVL_16000_GAIN,
 				},
 			},
 		},

From db875b8f548c270ddbd8da1dcaa188723027ade3 Mon Sep 17 00:00:00 2001
From: Johannes Pointner <johannes.pointner@gmail.com>
Date: Mon, 25 Aug 2014 09:04:00 +0100
Subject: [PATCH 0809/1185] iio:inkern: fix overwritten -EPROBE_DEFER in
 of_iio_channel_get_by_name

commit 872687f626e033b4ddfaec1e410057cfc6636d77 upstream.

Fixes: a2c12493ed7e ('iio: of_iio_channel_get_by_name() returns non-null pointers for error legs')

which improperly assumes that of_iio_channel_get_by_name must always
return NULL and thus now hides -EPROBE_DEFER.

Signed-off-by: Johannes Pointner <johannes.pointner@br-automation.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/inkern.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index 1e8e94d4db7d..4fc88e617acf 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -178,7 +178,7 @@ static struct iio_channel *of_iio_channel_get_by_name(struct device_node *np,
 			index = of_property_match_string(np, "io-channel-names",
 							 name);
 		chan = of_iio_channel_get(np, index);
-		if (!IS_ERR(chan))
+		if (!IS_ERR(chan) || PTR_ERR(chan) == -EPROBE_DEFER)
 			break;
 		else if (name && index >= 0) {
 			pr_err("ERROR: could not get IIO channel %s:%s(%i)\n",

From 058ab45435fdd847e6ba1968708d5d5e9a1f121e Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 2 Jul 2014 16:19:24 +0300
Subject: [PATCH 0810/1185] Target/iser: Get isert_conn reference once got to
 connected_handler

commit c2f88b17a1d97ca4ecd96cc22333a7a4f1407d39 upstream.

In case the connection didn't reach connected state, disconnected
handler will never be invoked thus the second kref_put on
isert_conn will be missing.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 0e93152384f0..9908c9927a96 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -404,7 +404,6 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 	init_completion(&isert_conn->conn_wait);
 	init_completion(&isert_conn->conn_wait_comp_err);
 	kref_init(&isert_conn->conn_kref);
-	kref_get(&isert_conn->conn_kref);
 	mutex_init(&isert_conn->conn_mutex);
 
 	cma_id->context = isert_conn;
@@ -530,7 +529,9 @@ isert_connect_release(struct isert_conn *isert_conn)
 static void
 isert_connected_handler(struct rdma_cm_id *cma_id)
 {
-	return;
+	struct isert_conn *isert_conn = cma_id->context;
+
+	kref_get(&isert_conn->conn_kref);
 }
 
 static void

From b997982f680da4bcb63895a9d31874af9c93f4a8 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 2 Jul 2014 16:19:25 +0300
Subject: [PATCH 0811/1185] Target/iser: Don't put isert_conn inside
 disconnected handler

commit 0fc4ea701fcf5bc51ace4e288af5be741465f776 upstream.

disconnected_handler is invoked on several CM events (such
as DISCONNECTED, DEVICE_REMOVAL, TIMEWAIT_EXIT...). Since
multiple  events can occur while before isert_free_conn is
invoked, we might put all isert_conn references and free
the connection too early.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 9908c9927a96..acb3865710c2 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -583,7 +583,6 @@ isert_disconnect_work(struct work_struct *work)
 
 wake_up:
 	complete(&isert_conn->conn_wait);
-	isert_put_conn(isert_conn);
 }
 
 static void
@@ -2266,6 +2265,7 @@ static void isert_wait_conn(struct iscsi_conn *conn)
 	wait_for_completion(&isert_conn->conn_wait_comp_err);
 
 	wait_for_completion(&isert_conn->conn_wait);
+	isert_put_conn(isert_conn);
 }
 
 static void isert_free_conn(struct iscsi_conn *conn)

From c9049bb0aa11aceb507f688a5491257ae9ab1e01 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Tue, 2 Sep 2014 17:49:54 -0400
Subject: [PATCH 0812/1185] iscsi-target: avoid NULL pointer in
 iscsi_copy_param_list failure

commit 8ae757d09c45102b347a1bc2867f54ffc1ab8fda upstream.

In iscsi_copy_param_list() a failed iscsi_param_list memory allocation
currently invokes iscsi_release_param_list() to cleanup, and will promptly
trigger a NULL pointer dereference.

Instead, go ahead and return for the first iscsi_copy_param_list()
failure case.

Found by coverity.

Signed-off-by: Joern Engel <joern@logfs.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/target/iscsi/iscsi_target_parameters.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index e38222191a33..30be6c9bdbc6 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -603,7 +603,7 @@ int iscsi_copy_param_list(
 	param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
 	if (!param_list) {
 		pr_err("Unable to allocate memory for struct iscsi_param_list.\n");
-		goto err_out;
+		return -1;
 	}
 	INIT_LIST_HEAD(&param_list->param_list);
 	INIT_LIST_HEAD(&param_list->extra_response_list);

From b04cb59bad40aa369a87c000dbb9e45cd2f70354 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Wed, 17 Sep 2014 11:45:17 -0700
Subject: [PATCH 0813/1185] iscsi-target: Fix memory corruption in
 iscsit_logout_post_handler_diffcid

commit b53b0d99d6fbf7d44330395349a895521cfdbc96 upstream.

This patch fixes a bug in iscsit_logout_post_handler_diffcid() where
a pointer used as storage for list_for_each_entry() was incorrectly
being used to determine if no matching entry had been found.

This patch changes iscsit_logout_post_handler_diffcid() to key off
bool conn_found to determine if the function needs to exit early.

Reported-by: Joern Engel <joern@logfs.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/target/iscsi/iscsi_target.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 799f84e686b5..651b5768862f 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -4453,6 +4453,7 @@ static void iscsit_logout_post_handler_diffcid(
 {
 	struct iscsi_conn *l_conn;
 	struct iscsi_session *sess = conn->sess;
+	bool conn_found = false;
 
 	if (!sess)
 		return;
@@ -4461,12 +4462,13 @@ static void iscsit_logout_post_handler_diffcid(
 	list_for_each_entry(l_conn, &sess->sess_conn_list, conn_list) {
 		if (l_conn->cid == cid) {
 			iscsit_inc_conn_usage_count(l_conn);
+			conn_found = true;
 			break;
 		}
 	}
 	spin_unlock_bh(&sess->conn_lock);
 
-	if (!l_conn)
+	if (!conn_found)
 		return;
 
 	if (l_conn->sock)

From ff7510205896efaf9c5f58d656b2ce2e2b5c1135 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 1 Sep 2014 20:27:29 +0300
Subject: [PATCH 0814/1185] NFC: microread: Potential overflows in
 microread_target_discovered()

commit d07f1e8600ccb885c8f4143402b8912f7d827bcb upstream.

Smatch says that skb->data is untrusted so we need to check to make sure
that the memcpy() doesn't overflow.

Fixes: cfad1ba87150 ('NFC: Initial support for Inside Secure microread')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nfc/microread/microread.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c
index 3420d833db17..384ab8ca4b37 100644
--- a/drivers/nfc/microread/microread.c
+++ b/drivers/nfc/microread/microread.c
@@ -501,9 +501,13 @@ static void microread_target_discovered(struct nfc_hci_dev *hdev, u8 gate,
 		targets->sens_res =
 			 be16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_A_ATQA]);
 		targets->sel_res = skb->data[MICROREAD_EMCF_A_SAK];
-		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A_UID],
-		       skb->data[MICROREAD_EMCF_A_LEN]);
 		targets->nfcid1_len = skb->data[MICROREAD_EMCF_A_LEN];
+		if (targets->nfcid1_len > sizeof(targets->nfcid1)) {
+			r = -EINVAL;
+			goto exit_free;
+		}
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A_UID],
+		       targets->nfcid1_len);
 		break;
 	case MICROREAD_GATE_ID_MREAD_ISO_A_3:
 		targets->supported_protocols =
@@ -511,9 +515,13 @@ static void microread_target_discovered(struct nfc_hci_dev *hdev, u8 gate,
 		targets->sens_res =
 			 be16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_A3_ATQA]);
 		targets->sel_res = skb->data[MICROREAD_EMCF_A3_SAK];
-		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A3_UID],
-		       skb->data[MICROREAD_EMCF_A3_LEN]);
 		targets->nfcid1_len = skb->data[MICROREAD_EMCF_A3_LEN];
+		if (targets->nfcid1_len > sizeof(targets->nfcid1)) {
+			r = -EINVAL;
+			goto exit_free;
+		}
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A3_UID],
+		       targets->nfcid1_len);
 		break;
 	case MICROREAD_GATE_ID_MREAD_ISO_B:
 		targets->supported_protocols = NFC_PROTO_ISO14443_B_MASK;

From fe3ed8722e7f850cee43dfa18dd2aebd8857ed38 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 3 Sep 2014 00:00:39 -0500
Subject: [PATCH 0815/1185] SCSI: libiscsi: fix potential buffer overrun in
 __iscsi_conn_send_pdu

commit db9bfd64b14a3a8f1868d2164518fdeab1b26ad1 upstream.

This patches fixes a potential buffer overrun in __iscsi_conn_send_pdu.
This function is used by iscsi drivers and userspace to send iscsi PDUs/
commands. For login commands, we have a set buffer size. For all other
commands we do not support data buffers.

This was reported by Dan Carpenter here:
http://www.spinics.net/lists/linux-scsi/msg66838.html

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/libiscsi.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 5de946984500..f91d41788ce4 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -717,11 +717,21 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			return NULL;
 		}
 
+		if (data_size > ISCSI_DEF_MAX_RECV_SEG_LEN) {
+			iscsi_conn_printk(KERN_ERR, conn, "Invalid buffer len of %u for login task. Max len is %u\n", data_size, ISCSI_DEF_MAX_RECV_SEG_LEN);
+			return NULL;
+		}
+
 		task = conn->login_task;
 	} else {
 		if (session->state != ISCSI_STATE_LOGGED_IN)
 			return NULL;
 
+		if (data_size != 0) {
+			iscsi_conn_printk(KERN_ERR, conn, "Can not send data buffer of len %u for op 0x%x\n", data_size, opcode);
+			return NULL;
+		}
+
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 

From dc19e20cf4ebd23a11fabc48ad2f297d894ec857 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 31 Aug 2014 22:11:11 +0300
Subject: [PATCH 0816/1185] Revert "iwlwifi: dvm: don't enable CTS to self"

commit f47f46d7b09cf1d09e4b44b6cc4dd7d68a08028c upstream.

This reverts commit 43d826ca5979927131685cc2092c7ce862cb91cd.

This commit caused packet loss.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/dvm/rxon.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c
index ca17e4c9eca2..cd1ad0019185 100644
--- a/drivers/net/wireless/iwlwifi/dvm/rxon.c
+++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c
@@ -1072,6 +1072,13 @@ int iwlagn_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *ctx)
 	/* recalculate basic rates */
 	iwl_calc_basic_rates(priv, ctx);
 
+	/*
+	 * force CTS-to-self frames protection if RTS-CTS is not preferred
+	 * one aggregation protection method
+	 */
+	if (!priv->hw_params.use_rts_for_aggregation)
+		ctx->staging.flags |= RXON_FLG_SELF_CTS_EN;
+
 	if ((ctx->vif && ctx->vif->bss_conf.use_short_slot) ||
 	    !(ctx->staging.flags & RXON_FLG_BAND_24G_MSK))
 		ctx->staging.flags |= RXON_FLG_SHORT_SLOT_MSK;
@@ -1477,6 +1484,11 @@ void iwlagn_bss_info_changed(struct ieee80211_hw *hw,
 	else
 		ctx->staging.flags &= ~RXON_FLG_TGG_PROTECT_MSK;
 
+	if (bss_conf->use_cts_prot)
+		ctx->staging.flags |= RXON_FLG_SELF_CTS_EN;
+	else
+		ctx->staging.flags &= ~RXON_FLG_SELF_CTS_EN;
+
 	memcpy(ctx->staging.bssid_addr, bss_conf->bssid, ETH_ALEN);
 
 	if (vif->type == NL80211_IFTYPE_AP ||

From 545608863572bfc5836d15635e336410d9c952f8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 13 Sep 2014 04:14:30 +0900
Subject: [PATCH 0817/1185] workqueue: apply __WQ_ORDERED to
 create_singlethread_workqueue()

commit e09c2c295468476a239d13324ce9042ec4de05eb upstream.

create_singlethread_workqueue() is a compat interface for single
threaded workqueue which maps to ordered workqueue w/ rescuer in the
current implementation.  create_singlethread_workqueue() currently
implemented by invoking alloc_workqueue() w/ appropriate parameters.

8719dceae2f9 ("workqueue: reject adjusting max_active or applying
attrs to ordered workqueues") introduced __WQ_ORDERED to protect
ordered workqueues against dynamic attribute changes which can break
ordering guarantees but forgot to apply it to
create_singlethread_workqueue().  This in itself is okay as nobody
currently uses dynamic attribute change on workqueues created with
create_singlethread_workqueue().

However, 4c16bd327c ("workqueue: implement NUMA affinity for unbound
workqueues") broke singlethreaded guarantee for ordered workqueues
through allocating a separate pool_workqueue on each NUMA node by
default.  A later change 8a2b75384444 ("workqueue: fix ordered
workqueues in NUMA setups") fixed it by allocating only one global
pool_workqueue if __WQ_ORDERED is set.

Combined, the __WQ_ORDERED omission in create_singlethread_workqueue()
became critical breaking its single threadedness and ordering
guarantee.

Let's make create_singlethread_workqueue() wrap
alloc_ordered_workqueue() instead so that it inherits __WQ_ORDERED and
can implicitly track future ordered_workqueue changes.

v2: I missed that __WQ_ORDERED now protects against pwq splitting
    across NUMA nodes and incorrectly described the patch as a
    nice-to-have fix to protect against future dynamic attribute
    usages.  Oleg pointed out that this is actually a critical
    breakage due to 8a2b75384444 ("workqueue: fix ordered workqueues
    in NUMA setups").

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Mike Anderson <mike.anderson@us.ibm.com>
Cc: Oleg Nesterov <onestero@redhat.com>
Cc: Gustavo Luiz Duarte <gduarte@redhat.com>
Cc: Tomas Henzl <thenzl@redhat.com>
Fixes: 4c16bd327c ("workqueue: implement NUMA affinity for unbound workqueues")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 623488fdc1f5..ff28cf578d01 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -414,7 +414,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 #define create_freezable_workqueue(name)				\
 	alloc_workqueue((name), WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
 #define create_singlethread_workqueue(name)				\
-	alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
+	alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 

From 3710e26e8c32de6f68ec8810191b3b6c3217dad8 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 26 Aug 2014 09:05:36 -0600
Subject: [PATCH 0818/1185] block: Fix dev_t minor allocation lifetime

commit 2da78092dda13f1efd26edbbf99a567776913750 upstream.

Releases the dev_t minor when all references are closed to prevent
another device from acquiring the same major/minor.

Since the partition's release may be invoked from call_rcu's soft-irq
context, the ext_dev_idr's mutex had to be replaced with a spinlock so
as not so sleep.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/genhd.c             | 24 ++++++++++++++----------
 block/partition-generic.c |  2 +-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index cdeb5277dfd4..6190449e54e3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -28,10 +28,10 @@ struct kobject *block_depr;
 /* for extended dynamic devt allocation, currently only one major is used */
 #define NR_EXT_DEVT		(1 << MINORBITS)
 
-/* For extended devt allocation.  ext_devt_mutex prevents look up
+/* For extended devt allocation.  ext_devt_lock prevents look up
  * results from going away underneath its user.
  */
-static DEFINE_MUTEX(ext_devt_mutex);
+static DEFINE_SPINLOCK(ext_devt_lock);
 static DEFINE_IDR(ext_devt_idr);
 
 static struct device_type disk_type;
@@ -420,9 +420,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
 	}
 
 	/* allocate ext devt */
-	mutex_lock(&ext_devt_mutex);
-	idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL);
-	mutex_unlock(&ext_devt_mutex);
+	idr_preload(GFP_KERNEL);
+
+	spin_lock(&ext_devt_lock);
+	idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
+	spin_unlock(&ext_devt_lock);
+
+	idr_preload_end();
 	if (idx < 0)
 		return idx == -ENOSPC ? -EBUSY : idx;
 
@@ -447,9 +451,9 @@ void blk_free_devt(dev_t devt)
 		return;
 
 	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
-		mutex_lock(&ext_devt_mutex);
+		spin_lock(&ext_devt_lock);
 		idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
-		mutex_unlock(&ext_devt_mutex);
+		spin_unlock(&ext_devt_lock);
 	}
 }
 
@@ -665,7 +669,6 @@ void del_gendisk(struct gendisk *disk)
 		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
 	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
 	device_del(disk_to_dev(disk));
-	blk_free_devt(disk_to_dev(disk)->devt);
 }
 EXPORT_SYMBOL(del_gendisk);
 
@@ -690,13 +693,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
 	} else {
 		struct hd_struct *part;
 
-		mutex_lock(&ext_devt_mutex);
+		spin_lock(&ext_devt_lock);
 		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 		if (part && get_disk(part_to_disk(part))) {
 			*partno = part->partno;
 			disk = part_to_disk(part);
 		}
-		mutex_unlock(&ext_devt_mutex);
+		spin_unlock(&ext_devt_lock);
 	}
 
 	return disk;
@@ -1098,6 +1101,7 @@ static void disk_release(struct device *dev)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
+	blk_free_devt(dev->devt);
 	disk_release_events(disk);
 	kfree(disk->random);
 	disk_replace_part_tbl(disk, NULL);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 789cdea05893..0d9e5f97f0a8 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -211,6 +211,7 @@ static const struct attribute_group *part_attr_groups[] = {
 static void part_release(struct device *dev)
 {
 	struct hd_struct *p = dev_to_part(dev);
+	blk_free_devt(dev->devt);
 	free_part_stats(p);
 	free_part_info(p);
 	kfree(p);
@@ -253,7 +254,6 @@ void delete_partition(struct gendisk *disk, int partno)
 	rcu_assign_pointer(ptbl->last_lookup, NULL);
 	kobject_put(part->holder_dir);
 	device_del(part_to_dev(part));
-	blk_free_devt(part_devt(part));
 
 	hd_struct_put(part);
 }

From d283ad1a60a89680fe42534428d2baf29cc9f24a Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Thu, 28 Aug 2014 11:09:31 -0400
Subject: [PATCH 0819/1185] dm crypt: fix access beyond the end of allocated
 space

commit d49ec52ff6ddcda178fc2476a109cf1bd1fa19ed upstream.

The DM crypt target accesses memory beyond allocated space resulting in
a crash on 32 bit x86 systems.

This bug is very old (it dates back to 2.6.25 commit 3a7f6c990ad04 "dm
crypt: use async crypto").  However, this bug was masked by the fact
that kmalloc rounds the size up to the next power of two.  This bug
wasn't exposed until 3.17-rc1 commit 298a9fa08a ("dm crypt: use per-bio
data").  By switching to using per-bio data there was no longer any
padding beyond the end of a dm-crypt allocated memory block.

To minimize allocation overhead dm-crypt puts several structures into one
block allocated with kmalloc.  The block holds struct ablkcipher_request,
cipher-specific scratch pad (crypto_ablkcipher_reqsize(any_tfm(cc))),
struct dm_crypt_request and an initialization vector.

The variable dmreq_start is set to offset of struct dm_crypt_request
within this memory block.  dm-crypt allocates the block with this size:
cc->dmreq_start + sizeof(struct dm_crypt_request) + cc->iv_size.

When accessing the initialization vector, dm-crypt uses the function
iv_of_dmreq, which performs this calculation: ALIGN((unsigned long)(dmreq
+ 1), crypto_ablkcipher_alignmask(any_tfm(cc)) + 1).

dm-crypt allocated "cc->iv_size" bytes beyond the end of dm_crypt_request
structure.  However, when dm-crypt accesses the initialization vector, it
takes a pointer to the end of dm_crypt_request, aligns it, and then uses
it as the initialization vector.  If the end of dm_crypt_request is not
aligned on a crypto_ablkcipher_alignmask(any_tfm(cc)) boundary the
alignment causes the initialization vector to point beyond the allocated
space.

Fix this bug by calculating the variable iv_size_padding and adding it
to the allocated size.

Also correct the alignment of dm_crypt_request.  struct dm_crypt_request
is specific to dm-crypt (it isn't used by the crypto subsystem at all),
so it is aligned on __alignof__(struct dm_crypt_request).

Also align per_bio_data_size on ARCH_KMALLOC_MINALIGN, so that it is
aligned as if the block was allocated with kmalloc.

Reported-by: Krzysztof Kolasa <kkolasa@winsoft.pl>
Tested-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-crypt.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 5177ba54559b..7409d79729ee 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1506,6 +1506,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	unsigned int key_size, opt_params;
 	unsigned long long tmpll;
 	int ret;
+	size_t iv_size_padding;
 	struct dm_arg_set as;
 	const char *opt_string;
 	char dummy;
@@ -1542,12 +1543,23 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	cc->dmreq_start = sizeof(struct ablkcipher_request);
 	cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc));
-	cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment());
-	cc->dmreq_start += crypto_ablkcipher_alignmask(any_tfm(cc)) &
-			   ~(crypto_tfm_ctx_alignment() - 1);
+	cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request));
+
+	if (crypto_ablkcipher_alignmask(any_tfm(cc)) < CRYPTO_MINALIGN) {
+		/* Allocate the padding exactly */
+		iv_size_padding = -(cc->dmreq_start + sizeof(struct dm_crypt_request))
+				& crypto_ablkcipher_alignmask(any_tfm(cc));
+	} else {
+		/*
+		 * If the cipher requires greater alignment than kmalloc
+		 * alignment, we don't know the exact position of the
+		 * initialization vector. We must assume worst case.
+		 */
+		iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc));
+	}
 
 	cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
-			sizeof(struct dm_crypt_request) + cc->iv_size);
+			sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size);
 	if (!cc->req_pool) {
 		ti->error = "Cannot allocate crypt request mempool";
 		goto bad;

From 08fb11395c6d803a97b45e463375cb220d1c84d5 Mon Sep 17 00:00:00 2001
From: John Sung <penmount.touch@gmail.com>
Date: Tue, 9 Sep 2014 10:06:51 -0700
Subject: [PATCH 0820/1185] Input: serport - add compat handling for SPIOCSTYPE
 ioctl

commit a80d8b02751060a178bb1f7a6b7a93645a7a308b upstream.

When running a 32-bit inputattach utility in a 64-bit system, there will be
error code "inputattach: can't set device type". This is caused by the
serport device driver not supporting compat_ioctl, so that SPIOCSTYPE ioctl
fails.

Signed-off-by: John Sung <penmount.touch@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/serio/serport.c | 47 +++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c
index 8755f5f3ad37..e4ecf3b64794 100644
--- a/drivers/input/serio/serport.c
+++ b/drivers/input/serio/serport.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/serio.h>
 #include <linux/tty.h>
+#include <linux/compat.h>
 
 MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
 MODULE_DESCRIPTION("Input device TTY line discipline");
@@ -196,29 +197,56 @@ static ssize_t serport_ldisc_read(struct tty_struct * tty, struct file * file, u
 	return 0;
 }
 
+static void serport_set_type(struct tty_struct *tty, unsigned long type)
+{
+	struct serport *serport = tty->disc_data;
+
+	serport->id.proto = type & 0x000000ff;
+	serport->id.id    = (type & 0x0000ff00) >> 8;
+	serport->id.extra = (type & 0x00ff0000) >> 16;
+}
+
 /*
  * serport_ldisc_ioctl() allows to set the port protocol, and device ID
  */
 
-static int serport_ldisc_ioctl(struct tty_struct * tty, struct file * file, unsigned int cmd, unsigned long arg)
+static int serport_ldisc_ioctl(struct tty_struct *tty, struct file *file,
+			       unsigned int cmd, unsigned long arg)
 {
-	struct serport *serport = (struct serport*) tty->disc_data;
-	unsigned long type;
-
 	if (cmd == SPIOCSTYPE) {
+		unsigned long type;
+
 		if (get_user(type, (unsigned long __user *) arg))
 			return -EFAULT;
 
-		serport->id.proto = type & 0x000000ff;
-		serport->id.id	  = (type & 0x0000ff00) >> 8;
-		serport->id.extra = (type & 0x00ff0000) >> 16;
-
+		serport_set_type(tty, type);
 		return 0;
 	}
 
 	return -EINVAL;
 }
 
+#ifdef CONFIG_COMPAT
+#define COMPAT_SPIOCSTYPE	_IOW('q', 0x01, compat_ulong_t)
+static long serport_ldisc_compat_ioctl(struct tty_struct *tty,
+				       struct file *file,
+				       unsigned int cmd, unsigned long arg)
+{
+	if (cmd == COMPAT_SPIOCSTYPE) {
+		void __user *uarg = compat_ptr(arg);
+		compat_ulong_t compat_type;
+
+		if (get_user(compat_type, (compat_ulong_t __user *)uarg))
+			return -EFAULT;
+
+		serport_set_type(tty, compat_type);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+#endif
+
 static void serport_ldisc_write_wakeup(struct tty_struct * tty)
 {
 	struct serport *serport = (struct serport *) tty->disc_data;
@@ -241,6 +269,9 @@ static struct tty_ldisc_ops serport_ldisc = {
 	.close =	serport_ldisc_close,
 	.read =		serport_ldisc_read,
 	.ioctl =	serport_ldisc_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =	serport_ldisc_compat_ioctl,
+#endif
 	.receive_buf =	serport_ldisc_receive,
 	.write_wakeup =	serport_ldisc_write_wakeup
 };

From 13ea4bd23a2d6abb0d6bab9e138cecb4f55fe211 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 30 Aug 2014 13:51:06 -0700
Subject: [PATCH 0821/1185] Input: synaptics - add support for ForcePads

commit 5715fc764f7753d464dbe094b5ef9cffa6e479a4 upstream.

ForcePads are found on HP EliteBook 1040 laptops. They lack any kind of
physical buttons, instead they generate primary button click when user
presses somewhat hard on the surface of the touchpad. Unfortunately they
also report primary button click whenever there are 2 or more contacts
on the pad, messing up all multi-finger gestures (2-finger scrolling,
multi-finger tapping, etc). To cope with this behavior we introduce a
delay (currently 50 msecs) in reporting primary press in case more
contacts appear.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/synaptics.c | 68 +++++++++++++++++++++++++--------
 drivers/input/mouse/synaptics.h | 11 ++++++
 2 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index f36f7b88f260..d1c47d135c07 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -549,10 +549,61 @@ static int synaptics_parse_hw_state(const unsigned char buf[],
 			 ((buf[0] & 0x04) >> 1) |
 			 ((buf[3] & 0x04) >> 2));
 
+		if ((SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) ||
+			SYN_CAP_IMAGE_SENSOR(priv->ext_cap_0c)) &&
+		    hw->w == 2) {
+			synaptics_parse_agm(buf, priv, hw);
+			return 1;
+		}
+
+		hw->x = (((buf[3] & 0x10) << 8) |
+			 ((buf[1] & 0x0f) << 8) |
+			 buf[4]);
+		hw->y = (((buf[3] & 0x20) << 7) |
+			 ((buf[1] & 0xf0) << 4) |
+			 buf[5]);
+		hw->z = buf[2];
+
 		hw->left  = (buf[0] & 0x01) ? 1 : 0;
 		hw->right = (buf[0] & 0x02) ? 1 : 0;
 
-		if (SYN_CAP_CLICKPAD(priv->ext_cap_0c)) {
+		if (SYN_CAP_FORCEPAD(priv->ext_cap_0c)) {
+			/*
+			 * ForcePads, like Clickpads, use middle button
+			 * bits to report primary button clicks.
+			 * Unfortunately they report primary button not
+			 * only when user presses on the pad above certain
+			 * threshold, but also when there are more than one
+			 * finger on the touchpad, which interferes with
+			 * out multi-finger gestures.
+			 */
+			if (hw->z == 0) {
+				/* No contacts */
+				priv->press = priv->report_press = false;
+			} else if (hw->w >= 4 && ((buf[0] ^ buf[3]) & 0x01)) {
+				/*
+				 * Single-finger touch with pressure above
+				 * the threshold. If pressure stays long
+				 * enough, we'll start reporting primary
+				 * button. We rely on the device continuing
+				 * sending data even if finger does not
+				 * move.
+				 */
+				if  (!priv->press) {
+					priv->press_start = jiffies;
+					priv->press = true;
+				} else if (time_after(jiffies,
+						priv->press_start +
+							msecs_to_jiffies(50))) {
+					priv->report_press = true;
+				}
+			} else {
+				priv->press = false;
+			}
+
+			hw->left = priv->report_press;
+
+		} else if (SYN_CAP_CLICKPAD(priv->ext_cap_0c)) {
 			/*
 			 * Clickpad's button is transmitted as middle button,
 			 * however, since it is primary button, we will report
@@ -571,21 +622,6 @@ static int synaptics_parse_hw_state(const unsigned char buf[],
 			hw->down = ((buf[0] ^ buf[3]) & 0x02) ? 1 : 0;
 		}
 
-		if ((SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) ||
-			SYN_CAP_IMAGE_SENSOR(priv->ext_cap_0c)) &&
-		    hw->w == 2) {
-			synaptics_parse_agm(buf, priv, hw);
-			return 1;
-		}
-
-		hw->x = (((buf[3] & 0x10) << 8) |
-			 ((buf[1] & 0x0f) << 8) |
-			 buf[4]);
-		hw->y = (((buf[3] & 0x20) << 7) |
-			 ((buf[1] & 0xf0) << 4) |
-			 buf[5]);
-		hw->z = buf[2];
-
 		if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) &&
 		    ((buf[0] ^ buf[3]) & 0x02)) {
 			switch (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) & ~0x01) {
diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h
index e594af0b264b..fb2e076738ae 100644
--- a/drivers/input/mouse/synaptics.h
+++ b/drivers/input/mouse/synaptics.h
@@ -78,6 +78,11 @@
  * 2	0x08	image sensor		image sensor tracks 5 fingers, but only
  *					reports 2.
  * 2	0x20	report min		query 0x0f gives min coord reported
+ * 2	0x80	forcepad		forcepad is a variant of clickpad that
+ *					does not have physical buttons but rather
+ *					uses pressure above certain threshold to
+ *					report primary clicks. Forcepads also have
+ *					clickpad bit set.
  */
 #define SYN_CAP_CLICKPAD(ex0c)		((ex0c) & 0x100000) /* 1-button ClickPad */
 #define SYN_CAP_CLICKPAD2BTN(ex0c)	((ex0c) & 0x000100) /* 2-button ClickPad */
@@ -86,6 +91,7 @@
 #define SYN_CAP_ADV_GESTURE(ex0c)	((ex0c) & 0x080000)
 #define SYN_CAP_REDUCED_FILTERING(ex0c)	((ex0c) & 0x000400)
 #define SYN_CAP_IMAGE_SENSOR(ex0c)	((ex0c) & 0x000800)
+#define SYN_CAP_FORCEPAD(ex0c)		((ex0c) & 0x008000)
 
 /* synaptics modes query bits */
 #define SYN_MODE_ABSOLUTE(m)		((m) & (1 << 7))
@@ -177,6 +183,11 @@ struct synaptics_data {
 	 */
 	struct synaptics_hw_state agm;
 	bool agm_pending;			/* new AGM packet received */
+
+	/* ForcePad handling */
+	unsigned long				press_start;
+	bool					press;
+	bool					report_press;
 };
 
 void synaptics_module_init(void);

From 9ec354827ed22cfdbbe7d597f937df3bbe384bbc Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 8 Sep 2014 14:39:52 -0700
Subject: [PATCH 0822/1185] Input: elantech - fix detection of touchpad on ASUS
 s301l
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 271329b3c798b2102120f5df829071c211ef00ed upstream.

Adjust Elantech signature validation to account fo rnewer models of
touchpads.

Reported-and-tested-by: Màrius Monton <marius.monton@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elantech.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 1913301df08f..85e75239c814 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1223,6 +1223,13 @@ static bool elantech_is_signature_valid(const unsigned char *param)
 	if (param[1] == 0)
 		return true;
 
+	/*
+	 * Some models have a revision higher then 20. Meaning param[2] may
+	 * be 10 or 20, skip the rates check for these.
+	 */
+	if (param[0] == 0x46 && (param[1] & 0xef) == 0x0f && param[2] < 40)
+		return true;
+
 	for (i = 0; i < ARRAY_SIZE(rates); i++)
 		if (param[2] == rates[i])
 			return false;

From 2b4e74c40c5d6d068f48dafe3626dfa022aae38d Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 10 Sep 2014 13:50:37 -0700
Subject: [PATCH 0823/1185] Input: atkbd - do not try 'deactivate' keyboard on
 any LG laptops

commit c01206796139e2b1feb7539bc72174fef1c6dc6e upstream.

We are getting more and more reports about LG laptops not having
functioning keyboard if we try to deactivate keyboard during probe.
Given that having keyboard deactivated is merely "nice to have"
instead of a hard requirement for probing, let's disable it on all
LG boxes instead of trying to hunt down particular models.

This change is prompted by patches trying to add "LG Electronics"/"ROCKY"
and "LG Electronics"/"LW60-F27B" to the DMI list.

https://bugzilla.kernel.org/show_bug.cgi?id=77051

Reported-by: Jaime Velasco Juan <jsagarribay@gmail.com>
Reported-by: Georgios Tsalikis <georgios@tsalikis.net>
Tested-by: Jaime Velasco Juan <jsagarribay@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/keyboard/atkbd.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 2dd1d0dd4f7d..6f5d79569136 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -1791,14 +1791,6 @@ static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = {
 	{
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "LW25-B7HV"),
-		},
-		.callback = atkbd_deactivate_fixup,
-	},
-	{
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "P1-J273B"),
 		},
 		.callback = atkbd_deactivate_fixup,
 	},

From 6a45d25a83a02e5e4daf51c15439aa38efa2d2d0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 10 Sep 2014 13:53:37 -0700
Subject: [PATCH 0824/1185] Input: i8042 - add Fujitsu U574 to no_timeout dmi
 table

commit cc18a69c92d0972bc2fc5a047ee3be1e8398171b upstream.

https://bugzilla.kernel.org/show_bug.cgi?id=69731

Reported-by: Jason Robinson <mail@jasonrobinson.me>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/serio/i8042-x86ia64io.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 0ec9abbe31fe..dc37b322c6d1 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -601,6 +601,14 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv4 Notebook PC"),
 		},
 	},
+	{
+		/* Fujitsu U574 laptop */
+		/* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"),
+		},
+	},
 	{ }
 };
 

From 0663d904e1f9451ea8caa460f32ca6657a31e3f5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 11 Sep 2014 10:10:26 -0700
Subject: [PATCH 0825/1185] Input: i8042 - add nomux quirk for Avatar
 AVIU-145A6

commit d2682118f4bb3ceb835f91c1a694407a31bb7378 upstream.

The sys_vendor / product_name are somewhat generic unfortunately, so this
may lead to some false positives. But nomux usually does no harm, where as
not having it clearly is causing problems on the Avatar AVIU-145A6.

https://bugzilla.kernel.org/show_bug.cgi?id=77391

Reported-by: Hugo P <saurosii@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/serio/i8042-x86ia64io.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index dc37b322c6d1..1291673bd57e 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -458,6 +458,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv4 Notebook PC"),
 		},
 	},
+	{
+		/* Avatar AVIU-145A6 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Intel"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"),
+		},
+	},
 	{ }
 };
 

From 767d0fbb89f87a2e386b953280b03739a74a8f83 Mon Sep 17 00:00:00 2001
From: James Ralston <james.d.ralston@intel.com>
Date: Wed, 27 Aug 2014 14:31:58 -0700
Subject: [PATCH 0826/1185] ata_piix: Add Device IDs for Intel 9 Series PCH

commit 6cad1376954e591c3c41500c4e586e183e7ffe6d upstream.

This patch adds the IDE mode SATA Device IDs for the Intel 9 Series PCH.

Signed-off-by: James Ralston <james.d.ralston@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/ata_piix.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index b92913a528b6..82aa7b550ea5 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -340,6 +340,14 @@ static const struct pci_device_id piix_pci_tbl[] = {
 	{ 0x8086, 0x0F21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_byt },
 	/* SATA Controller IDE (Coleto Creek) */
 	{ 0x8086, 0x23a6, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
+	/* SATA Controller IDE (9 Series) */
+	{ 0x8086, 0x8c88, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb },
+	/* SATA Controller IDE (9 Series) */
+	{ 0x8086, 0x8c89, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb },
+	/* SATA Controller IDE (9 Series) */
+	{ 0x8086, 0x8c80, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb },
+	/* SATA Controller IDE (9 Series) */
+	{ 0x8086, 0x8c81, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb },
 
 	{ }	/* terminate list */
 };

From f9c27a7c7880c29c7579dea9134dfef355f6c3eb Mon Sep 17 00:00:00 2001
From: Honggang Li <enjoymindful@gmail.com>
Date: Tue, 12 Aug 2014 21:36:15 +0800
Subject: [PATCH 0827/1185] percpu: free percpu allocation info for
 uniprocessor system

commit 3189eddbcafcc4d827f7f19facbeddec4424eba8 upstream.

Currently, only SMP system free the percpu allocation info.
Uniprocessor system should free it too. For example, one x86 UML
virtual machine with 256MB memory, UML kernel wastes one page memory.

Signed-off-by: Honggang Li <enjoymindful@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/percpu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/percpu.c b/mm/percpu.c
index 25e2ea52db82..9bc1bf914cc8 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1910,6 +1910,8 @@ void __init setup_per_cpu_areas(void)
 
 	if (pcpu_setup_first_chunk(ai, fc) < 0)
 		panic("Failed to initialize percpu areas.");
+
+	pcpu_free_alloc_info(ai);
 }
 
 #endif	/* CONFIG_SMP */

From 5df644fd609cd0610a77dd13eed9729bd2eefbd5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 15 Aug 2014 16:06:06 -0400
Subject: [PATCH 0828/1185] percpu: fix pcpu_alloc_pages() failure path

commit f0d279654dea22b7a6ad34b9334aee80cda62cde upstream.

When pcpu_alloc_pages() fails midway, pcpu_free_pages() is invoked to
free what has already been allocated.  The invocation is across the
whole requested range and pcpu_free_pages() will try to free all
non-NULL pages; unfortunately, this is incorrect as
pcpu_get_pages_and_bitmap(), unlike what its comment suggests, doesn't
clear the pages array and thus the array may have entries from the
previous invocations making the partial failure path free incorrect
pages.

Fix it by open-coding the partial freeing of the already allocated
pages.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/percpu-vm.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 3707c71ae4cd..8d9bb2c00c68 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -108,7 +108,7 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
 			    int page_start, int page_end)
 {
 	const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
-	unsigned int cpu;
+	unsigned int cpu, tcpu;
 	int i;
 
 	for_each_possible_cpu(cpu) {
@@ -116,14 +116,23 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
 			struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
 
 			*pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
-			if (!*pagep) {
-				pcpu_free_pages(chunk, pages, populated,
-						page_start, page_end);
-				return -ENOMEM;
-			}
+			if (!*pagep)
+				goto err;
 		}
 	}
 	return 0;
+
+err:
+	while (--i >= page_start)
+		__free_page(pages[pcpu_page_idx(cpu, i)]);
+
+	for_each_possible_cpu(tcpu) {
+		if (tcpu == cpu)
+			break;
+		for (i = page_start; i < page_end; i++)
+			__free_page(pages[pcpu_page_idx(tcpu, i)]);
+	}
+	return -ENOMEM;
 }
 
 /**

From 9afaddd9781bb8a856d6cb48cf404e5f3d59b6de Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 15 Aug 2014 16:06:10 -0400
Subject: [PATCH 0829/1185] percpu: perform tlb flush after pcpu_map_pages()
 failure

commit 849f5169097e1ba35b90ac9df76b5bb6f9c0aabd upstream.

If pcpu_map_pages() fails midway, it unmaps the already mapped pages.
Currently, it doesn't flush tlb after the partial unmapping.  This may
be okay in most cases as the established mapping hasn't been used at
that point but it can go wrong and when it goes wrong it'd be
extremely difficult to track down.

Flush tlb after the partial unmapping.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/percpu-vm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 8d9bb2c00c68..51108165f829 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -272,6 +272,7 @@ static int pcpu_map_pages(struct pcpu_chunk *chunk,
 		__pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
 				   page_end - page_start);
 	}
+	pcpu_post_unmap_tlb_flush(chunk, page_start, page_end);
 	return err;
 }
 

From 2e1f0a092aae409d2f1b82a1d4026be8812f714b Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Sun, 24 Aug 2014 17:49:43 -0500
Subject: [PATCH 0830/1185] rtlwifi: rtl8192cu: Add new ID

commit c66517165610b911e4c6d268f28d8c640832dbd1 upstream.

The Sitecom WLA-2102 adapter uses this driver.

Reported-by: Nico Baggus <nico-linux@noci.xs4all.nl>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Nico Baggus <nico-linux@noci.xs4all.nl>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/rtlwifi/rtl8192cu/sw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
index 8188dcb512f0..e7a2af3ad05a 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
@@ -316,6 +316,7 @@ static struct usb_device_id rtl8192c_usb_ids[] = {
 	{RTL_USB_DEVICE(0x0bda, 0x5088, rtl92cu_hal_cfg)}, /*Thinkware-CC&C*/
 	{RTL_USB_DEVICE(0x0df6, 0x0052, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/
 	{RTL_USB_DEVICE(0x0df6, 0x005c, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/
+	{RTL_USB_DEVICE(0x0df6, 0x0070, rtl92cu_hal_cfg)}, /*Sitecom - 150N */
 	{RTL_USB_DEVICE(0x0df6, 0x0077, rtl92cu_hal_cfg)}, /*Sitecom-WLA2100V2*/
 	{RTL_USB_DEVICE(0x0eb0, 0x9071, rtl92cu_hal_cfg)}, /*NO Brand - Etop*/
 	{RTL_USB_DEVICE(0x4856, 0x0091, rtl92cu_hal_cfg)}, /*NetweeN - Feixun*/

From d4e4b73470ee5e9a93d543cc0eaa4e4d28f1c2df Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 29 Aug 2014 16:25:50 -0400
Subject: [PATCH 0831/1185] lockd: fix rpcbind crash on lockd startup failure

commit 7c17705e77b12b20fb8afb7c1b15dcdb126c0c12 upstream.

Nikita Yuschenko reported that booting a kernel with init=/bin/sh and
then nfs mounting without portmap or rpcbind running using a busybox
mount resulted in:

  # mount -t nfs 10.30.130.21:/opt /mnt
  svc: failed to register lockdv1 RPC service (errno 111).
  lockd_up: makesock failed, error=-111
  Unable to handle kernel paging request for data at address 0x00000030
  Faulting instruction address: 0xc055e65c
  Oops: Kernel access of bad area, sig: 11 [#1]
  MPC85xx CDS
  Modules linked in:
  CPU: 0 PID: 1338 Comm: mount Not tainted 3.10.44.cge #117
  task: cf29cea0 ti: cf35c000 task.ti: cf35c000
  NIP: c055e65c LR: c0566490 CTR: c055e648
  REGS: cf35dad0 TRAP: 0300   Not tainted  (3.10.44.cge)
  MSR: 00029000 <CE,EE,ME>  CR: 22442488  XER: 20000000
  DEAR: 00000030, ESR: 00000000

  GPR00: c05606f4 cf35db80 cf29cea0 cf0ded80 cf0dedb8 00000001 1dec3086
  00000000
  GPR08: 00000000 c07b1640 00000007 1dec3086 22442482 100b9758 00000000
  10090ae8
  GPR16: 00000000 000186a5 00000000 00000000 100c3018 bfa46edc 100b0000
  bfa46ef0
  GPR24: cf386ae0 c07834f0 00000000 c0565f88 00000001 cf0dedb8 00000000
  cf0ded80
  NIP [c055e65c] call_start+0x14/0x34
  LR [c0566490] __rpc_execute+0x70/0x250
  Call Trace:
  [cf35db80] [00000080] 0x80 (unreliable)
  [cf35dbb0] [c05606f4] rpc_run_task+0x9c/0xc4
  [cf35dbc0] [c0560840] rpc_call_sync+0x50/0xb8
  [cf35dbf0] [c056ee90] rpcb_register_call+0x54/0x84
  [cf35dc10] [c056f24c] rpcb_register+0xf8/0x10c
  [cf35dc70] [c0569e18] svc_unregister.isra.23+0x100/0x108
  [cf35dc90] [c0569e38] svc_rpcb_cleanup+0x18/0x30
  [cf35dca0] [c0198c5c] lockd_up+0x1dc/0x2e0
  [cf35dcd0] [c0195348] nlmclnt_init+0x2c/0xc8
  [cf35dcf0] [c015bb5c] nfs_start_lockd+0x98/0xec
  [cf35dd20] [c015ce6c] nfs_create_server+0x1e8/0x3f4
  [cf35dd90] [c0171590] nfs3_create_server+0x10/0x44
  [cf35dda0] [c016528c] nfs_try_mount+0x158/0x1e4
  [cf35de20] [c01670d0] nfs_fs_mount+0x434/0x8c8
  [cf35de70] [c00cd3bc] mount_fs+0x20/0xbc
  [cf35de90] [c00e4f88] vfs_kern_mount+0x50/0x104
  [cf35dec0] [c00e6e0c] do_mount+0x1d0/0x8e0
  [cf35df10] [c00e75ac] SyS_mount+0x90/0xd0
  [cf35df40] [c000ccf4] ret_from_syscall+0x0/0x3c

The addition of svc_shutdown_net() resulted in two calls to
svc_rpcb_cleanup(); the second is no longer necessary and crashes when
it calls rpcb_register_call with clnt=NULL.

Reported-by: Nikita Yushchenko <nyushchenko@dev.rtsoft.ru>
Fixes: 679b033df484 "lockd: ensure we tear down any live sockets when socket creation fails during lockd_up"
Acked-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/lockd/svc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index d56a9904e52a..9c8a5a6d33df 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -253,13 +253,11 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net)
 
 	error = make_socks(serv, net);
 	if (error < 0)
-		goto err_socks;
+		goto err_bind;
 	set_grace_period(net);
 	dprintk("lockd_up_net: per-net data created; net=%p\n", net);
 	return 0;
 
-err_socks:
-	svc_rpcb_cleanup(serv, net);
 err_bind:
 	ln->nlmsvc_users--;
 	return error;

From 459bd57b368c583f53faf84cf8e483837584f9ae Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 16 Sep 2014 13:38:51 -0600
Subject: [PATCH 0832/1185] genhd: fix leftover might_sleep() in
 blk_free_devt()

commit 46f341ffcfb5d8530f7d1e60f3be06cce6661b62 upstream.

Commit 2da78092 changed the locking from a mutex to a spinlock,
so we now longer sleep in this context. But there was a leftover
might_sleep() in there, which now triggers since we do the final
free from an RCU callback. Get rid of it.

Reported-by: Pontus Fuchs <pontus.fuchs@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/genhd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 6190449e54e3..e670148c3773 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -445,8 +445,6 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
  */
 void blk_free_devt(dev_t devt)
 {
-	might_sleep();
-
 	if (devt == MKDEV(0, 0))
 		return;
 

From 9e55bd93adac853928050d2c8c820a9e06e93938 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 27 Aug 2014 16:38:04 -0500
Subject: [PATCH 0833/1185] usb: host: xhci: fix compliance mode workaround

commit 96908589a8b2584b1185f834d365f5cc360e8226 upstream.

Commit 71c731a (usb: host: xhci: Fix Compliance Mode
on SN65LVP3502CP Hardware) implemented a workaround
for a known issue with Texas Instruments' USB 3.0
redriver IC but it left a condition where any xHCI
host would be taken out of reset if port was placed
in compliance mode and there was no device connected
to the port.

That condition would trigger a fake connection to a
non-existent device so that usbcore would trigger a
warm reset of the port, thus taking the link out of
reset.

This has the side-effect of preventing any xHCI host
connected to a Linux machine from starting and running
the USB 3.0 Electrical Compliance Suite because the
port will mysteriously taken out of compliance mode
and, thus, xHCI won't step through the necessary
compliance patterns for link validation.

This patch fixes the issue by just adding a missing
check for XHCI_COMP_MODE_QUIRK inside
xhci_hub_report_usb3_link_state() when PORT_CAS isn't
set.

This patch should be backported to all kernels containing
commit 71c731a.

Fixes: 71c731a (usb: host: xhci: Fix Compliance Mode on SN65LVP3502CP Hardware)
Cc: Alexis R. Cortes <alexis.cortes@ti.com>
Cc: <stable@vger.kernel.org> # v3.2+
Signed-off-by: Felipe Balbi <balbi@ti.com>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-hub.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 7cdcfd024744..d939376c5dee 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -462,7 +462,8 @@ void xhci_test_and_clear_bit(struct xhci_hcd *xhci, __le32 __iomem **port_array,
 }
 
 /* Updates Link Status for super Speed port */
-static void xhci_hub_report_link_state(u32 *status, u32 status_reg)
+static void xhci_hub_report_link_state(struct xhci_hcd *xhci,
+		u32 *status, u32 status_reg)
 {
 	u32 pls = status_reg & PORT_PLS_MASK;
 
@@ -501,7 +502,8 @@ static void xhci_hub_report_link_state(u32 *status, u32 status_reg)
 		 * in which sometimes the port enters compliance mode
 		 * caused by a delay on the host-device negotiation.
 		 */
-		if (pls == USB_SS_PORT_LS_COMP_MOD)
+		if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) &&
+				(pls == USB_SS_PORT_LS_COMP_MOD))
 			pls |= USB_PORT_STAT_CONNECTION;
 	}
 
@@ -686,7 +688,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		}
 		/* Update Port Link State for super speed ports*/
 		if (hcd->speed == HCD_USB3) {
-			xhci_hub_report_link_state(&status, temp);
+			xhci_hub_report_link_state(xhci, &status, temp);
 			/*
 			 * Verify if all USB3 Ports Have entered U0 already.
 			 * Delete Compliance Mode Timer if so.

From a42b9e79f38bbd0ec17b065eed66ccc70a152a83 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Tue, 2 Sep 2014 14:57:20 -0500
Subject: [PATCH 0834/1185] usb: dwc3: core: fix order of PM runtime calls

commit fed33afce0eda44a46ae24d93aec1b5198c0bac4 upstream.

Currently, we disable pm_runtime before all register
accesses are done, this is dangerous and might lead
to abort exceptions due to the driver trying to access
a register which is clocked by a clock which was long
gated.

Fix that by moving pm_runtime_put_sync() and pm_runtime_disable()
as the last thing we do before returning from our ->remove()
method.

Fixes: 72246da (usb: Introduce DesignWare USB3 DRD Driver)
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 358375e0b291..60ea5994e2a5 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -606,9 +606,6 @@ static int dwc3_remove(struct platform_device *pdev)
 	usb_phy_set_suspend(dwc->usb2_phy, 1);
 	usb_phy_set_suspend(dwc->usb3_phy, 1);
 
-	pm_runtime_put(&pdev->dev);
-	pm_runtime_disable(&pdev->dev);
-
 	dwc3_debugfs_exit(dwc);
 
 	switch (dwc->mode) {
@@ -631,6 +628,9 @@ static int dwc3_remove(struct platform_device *pdev)
 	dwc3_free_event_buffers(dwc);
 	dwc3_core_exit(dwc);
 
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
 	return 0;
 }
 

From c301a4b0e53bd0f49a3532e14d19061d8d0ea1db Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 3 Sep 2014 16:13:37 -0500
Subject: [PATCH 0835/1185] usb: dwc3: core: fix ordering for PHY suspend

commit dc99f16f076559235c92d3eb66d03d1310faea08 upstream.

We can't suspend the PHYs before dwc3_core_exit_mode()
has been called, that's because the host and/or device
sides might still need to communicate with the far end
link partner.

Fixes: 8ba007a (usb: dwc3: core: enable the USB2 and USB3 phy in probe)
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 60ea5994e2a5..1d386030d3c4 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -603,9 +603,6 @@ static int dwc3_remove(struct platform_device *pdev)
 {
 	struct dwc3	*dwc = platform_get_drvdata(pdev);
 
-	usb_phy_set_suspend(dwc->usb2_phy, 1);
-	usb_phy_set_suspend(dwc->usb3_phy, 1);
-
 	dwc3_debugfs_exit(dwc);
 
 	switch (dwc->mode) {
@@ -626,6 +623,10 @@ static int dwc3_remove(struct platform_device *pdev)
 
 	dwc3_event_buffers_cleanup(dwc);
 	dwc3_free_event_buffers(dwc);
+
+	usb_phy_set_suspend(dwc->usb2_phy, 1);
+	usb_phy_set_suspend(dwc->usb3_phy, 1);
+
 	dwc3_core_exit(dwc);
 
 	pm_runtime_put_sync(&pdev->dev);

From baa28fb6770eb3ccea48d4880a63bb976608a6ce Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 25 Aug 2014 12:08:09 +0200
Subject: [PATCH 0836/1185] Revert "mac80211: disable uAPSD if all ACs are
 under ACM"

commit bb512ad0732232f1d2693bb68f31a76bed8f22ae upstream.

This reverts commit 24aa11ab8ae03292d38ec0dbd9bc2ac49fe8a6dd.

That commit was wrong since it uses data that hasn't even been set
up yet, but might be a hold-over from a previous connection.

Additionally, it seems like a driver-specific workaround that
shouldn't have been in mac80211 to start with.

Fixes: 24aa11ab8ae0 ("mac80211: disable uAPSD if all ACs are under ACM")
Reviewed-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/mlme.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index fc94937cd7b3..e606e4a113e1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4395,8 +4395,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	rcu_read_unlock();
 
 	if (bss->wmm_used && bss->uapsd_supported &&
-	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) &&
-	    sdata->wmm_acm != 0xff) {
+	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
 		assoc_data->uapsd = true;
 		ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
 	} else {

From 2b952aeed3c78ce84f1135b896d6346bd467e032 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 9 Sep 2014 14:51:01 -0700
Subject: [PATCH 0837/1185] kcmp: fix standard comparison bug

commit acbbe6fbb240a927ee1f5994f04d31267d422215 upstream.

The C operator <= defines a perfectly fine total ordering on the set of
values representable in a long.  However, unlike its namesake in the
integers, it is not translation invariant, meaning that we do not have
"b <= c" iff "a+b <= a+c" for all a,b,c.

This means that it is always wrong to try to boil down the relationship
between two longs to a question about the sign of their difference,
because the resulting relation [a LEQ b iff a-b <= 0] is neither
anti-symmetric or transitive.  The former is due to -LONG_MIN==LONG_MIN
(take any two a,b with a-b = LONG_MIN; then a LEQ b and b LEQ a, but a !=
b).  The latter can either be seen observing that x LEQ x+1 for all x,
implying x LEQ x+1 LEQ x+2 ...  LEQ x-1 LEQ x; or more directly with the
simple example a=LONG_MIN, b=0, c=1, for which a-b < 0, b-c < 0, but a-c >
0.

Note that it makes absolutely no difference that a transmogrying bijection
has been applied before the comparison is done.  In fact, had the
obfuscation not been done, one could probably not observe the bug
(assuming all values being compared always lie in one half of the address
space, the mathematical value of a-b is always representable in a long).
As it stands, one can easily obtain three file descriptors exhibiting the
non-transitivity of kcmp().

Side note 1: I can't see that ensuring the MSB of the multiplier is
set serves any purpose other than obfuscating the obfuscating code.

Side note 2:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/syscall.h>

enum kcmp_type {
        KCMP_FILE,
        KCMP_VM,
        KCMP_FILES,
        KCMP_FS,
        KCMP_SIGHAND,
        KCMP_IO,
        KCMP_SYSVSEM,
        KCMP_TYPES,
};
pid_t pid;

int kcmp(pid_t pid1, pid_t pid2, int type,
	 unsigned long idx1, unsigned long idx2)
{
	return syscall(SYS_kcmp, pid1, pid2, type, idx1, idx2);
}
int cmp_fd(int fd1, int fd2)
{
	int c = kcmp(pid, pid, KCMP_FILE, fd1, fd2);
	if (c < 0) {
		perror("kcmp");
		exit(1);
	}
	assert(0 <= c && c < 3);
	return c;
}
int cmp_fdp(const void *a, const void *b)
{
	static const int normalize[] = {0, -1, 1};
	return normalize[cmp_fd(*(int*)a, *(int*)b)];
}
#define MAX 100 /* This is plenty; I've seen it trigger for MAX==3 */
int main(int argc, char *argv[])
{
	int r, s, count = 0;
	int REL[3] = {0,0,0};
	int fd[MAX];
	pid = getpid();
	while (count < MAX) {
		r = open("/dev/null", O_RDONLY);
		if (r < 0)
			break;
		fd[count++] = r;
	}
	printf("opened %d file descriptors\n", count);
	for (r = 0; r < count; ++r) {
		for (s = r+1; s < count; ++s) {
			REL[cmp_fd(fd[r], fd[s])]++;
		}
	}
	printf("== %d\t< %d\t> %d\n", REL[0], REL[1], REL[2]);
	qsort(fd, count, sizeof(fd[0]), cmp_fdp);
	memset(REL, 0, sizeof(REL));

	for (r = 0; r < count; ++r) {
		for (s = r+1; s < count; ++s) {
			REL[cmp_fd(fd[r], fd[s])]++;
		}
	}
	printf("== %d\t< %d\t> %d\n", REL[0], REL[1], REL[2]);
	return (REL[0] + REL[2] != 0);
}

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
"Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/kcmp.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index e30ac0fe61c3..0aa69ea1d8fd 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -44,11 +44,12 @@ static long kptr_obfuscate(long v, int type)
  */
 static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type)
 {
-	long ret;
+	long t1, t2;
 
-	ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type);
+	t1 = kptr_obfuscate((long)v1, type);
+	t2 = kptr_obfuscate((long)v2, type);
 
-	return (ret < 0) | ((ret > 0) << 1);
+	return (t1 < t2) | ((t1 > t2) << 1);
 }
 
 /* The caller must have pinned the task */

From d1a0f0ccd02dace999ea26813924874167a74774 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Tue, 9 Sep 2014 14:51:04 -0700
Subject: [PATCH 0838/1185] fsnotify/fdinfo: use named constants instead of
 hardcoded values

commit 1fc98d11cac6dd66342e5580cb2687e5b1e9a613 upstream.

MAX_HANDLE_SZ is equal to 128, but currently the size of pad is only 64
bytes, so exportfs_encode_inode_fh can return an error.

Signed-off-by: Andrey Vagin <avagin@openvz.org>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/notify/fdinfo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 238a5930cb3c..660d33bc1bef 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -42,7 +42,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
 {
 	struct {
 		struct file_handle handle;
-		u8 pad[64];
+		u8 pad[MAX_HANDLE_SZ];
 	} f;
 	int size, ret, i;
 
@@ -50,7 +50,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
 	size = f.handle.handle_bytes >> 2;
 
 	ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
-	if ((ret == 255) || (ret == -ENOSPC)) {
+	if ((ret == FILEID_INVALID) || (ret == -ENOSPC)) {
 		WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
 		return 0;
 	}

From f88d0f25cf3294f40a13d71c6b7033d23abe2254 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Tue, 9 Sep 2014 14:51:06 -0700
Subject: [PATCH 0839/1185] fs/notify: don't show f_handle if
 exportfs_encode_inode_fh failed

commit 7e8824816bda16bb11ff5ff1e1212d642e57b0b3 upstream.

Currently we handle only ENOSPC.  In case of other errors the file_handle
variable isn't filled properly and we will show a part of stack.

Signed-off-by: Andrey Vagin <avagin@openvz.org>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/notify/fdinfo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 660d33bc1bef..9d7e2b9659cb 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -50,7 +50,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
 	size = f.handle.handle_bytes >> 2;
 
 	ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
-	if ((ret == FILEID_INVALID) || (ret == -ENOSPC)) {
+	if ((ret == FILEID_INVALID) || (ret < 0)) {
 		WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
 		return 0;
 	}

From a0778f706909e867fde6304369a65c7ffad28e1e Mon Sep 17 00:00:00 2001
From: Andreas Rohner <andreas.rohner@gmx.net>
Date: Thu, 25 Sep 2014 16:05:14 -0700
Subject: [PATCH 0840/1185] nilfs2: fix data loss with mmap()

commit 56d7acc792c0d98f38f22058671ee715ff197023 upstream.

This bug leads to reproducible silent data loss, despite the use of
msync(), sync() and a clean unmount of the file system.  It is easily
reproducible with the following script:

  ----------------[BEGIN SCRIPT]--------------------
  mkfs.nilfs2 -f /dev/sdb
  mount /dev/sdb /mnt

  dd if=/dev/zero bs=1M count=30 of=/mnt/testfile

  umount /mnt
  mount /dev/sdb /mnt
  CHECKSUM_BEFORE="$(md5sum /mnt/testfile)"

  /root/mmaptest/mmaptest /mnt/testfile 30 10 5

  sync
  CHECKSUM_AFTER="$(md5sum /mnt/testfile)"
  umount /mnt
  mount /dev/sdb /mnt
  CHECKSUM_AFTER_REMOUNT="$(md5sum /mnt/testfile)"
  umount /mnt

  echo "BEFORE MMAP:\t$CHECKSUM_BEFORE"
  echo "AFTER MMAP:\t$CHECKSUM_AFTER"
  echo "AFTER REMOUNT:\t$CHECKSUM_AFTER_REMOUNT"
  ----------------[END SCRIPT]--------------------

The mmaptest tool looks something like this (very simplified, with
error checking removed):

  ----------------[BEGIN mmaptest]--------------------
  data = mmap(NULL, file_size - file_offset, PROT_READ | PROT_WRITE,
              MAP_SHARED, fd, file_offset);

  for (i = 0; i < write_count; ++i) {
        memcpy(data + i * 4096, buf, sizeof(buf));
        msync(data, file_size - file_offset, MS_SYNC))
  }
  ----------------[END mmaptest]--------------------

The output of the script looks something like this:

  BEFORE MMAP:    281ed1d5ae50e8419f9b978aab16de83  /mnt/testfile
  AFTER MMAP:     6604a1c31f10780331a6850371b3a313  /mnt/testfile
  AFTER REMOUNT:  281ed1d5ae50e8419f9b978aab16de83  /mnt/testfile

So it is clear, that the changes done using mmap() do not survive a
remount.  This can be reproduced a 100% of the time.  The problem was
introduced in commit 136e8770cd5d ("nilfs2: fix issue of
nilfs_set_page_dirty() for page at EOF boundary").

If the page was read with mpage_readpage() or mpage_readpages() for
example, then it has no buffers attached to it.  In that case
page_has_buffers(page) in nilfs_set_page_dirty() will be false.
Therefore nilfs_set_file_dirty() is never called and the pages are never
collected and never written to disk.

This patch fixes the problem by also calling nilfs_set_file_dirty() if the
page has no buffers attached to it.

[akpm@linux-foundation.org: s/PAGE_SHIFT/PAGE_CACHE_SHIFT/]
Signed-off-by: Andreas Rohner <andreas.rohner@gmx.net>
Tested-by: Andreas Rohner <andreas.rohner@gmx.net>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nilfs2/inode.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index bccfec8343c5..2e1372efbb00 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -24,6 +24,7 @@
 #include <linux/buffer_head.h>
 #include <linux/gfp.h>
 #include <linux/mpage.h>
+#include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/aio.h>
 #include "nilfs.h"
@@ -219,10 +220,10 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
 
 static int nilfs_set_page_dirty(struct page *page)
 {
+	struct inode *inode = page->mapping->host;
 	int ret = __set_page_dirty_nobuffers(page);
 
 	if (page_has_buffers(page)) {
-		struct inode *inode = page->mapping->host;
 		unsigned nr_dirty = 0;
 		struct buffer_head *bh, *head;
 
@@ -245,6 +246,10 @@ static int nilfs_set_page_dirty(struct page *page)
 
 		if (nr_dirty)
 			nilfs_set_file_dirty(inode, nr_dirty);
+	} else if (ret) {
+		unsigned nr_dirty = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+		nilfs_set_file_dirty(inode, nr_dirty);
 	}
 	return ret;
 }

From 64bee3b2dab89d8d9e8a81f9485c24c8314fa4c3 Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@huawei.com>
Date: Thu, 25 Sep 2014 16:05:16 -0700
Subject: [PATCH 0841/1185] ocfs2/dlm: do not get resource spinlock if lockres
 is new

commit 5760a97c7143c208fa3a8f8cad0ed7dd672ebd28 upstream.

There is a deadlock case which reported by Guozhonghua:
  https://oss.oracle.com/pipermail/ocfs2-devel/2014-September/010079.html

This case is caused by &res->spinlock and &dlm->master_lock
misordering in different threads.

It was introduced by commit 8d400b81cc83 ("ocfs2/dlm: Clean up refmap
helpers").  Since lockres is new, it doesn't not require the
&res->spinlock.  So remove it.

Fixes: 8d400b81cc83 ("ocfs2/dlm: Clean up refmap helpers")
Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Reviewed-by: joyce.xue <xuejiufei@huawei.com>
Reported-by: Guozhonghua <guozhonghua@h3c.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/dlm/dlmmaster.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 33ecbe0e6734..2b941113e423 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -653,12 +653,9 @@ void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm,
 	clear_bit(bit, res->refmap);
 }
 
-
-void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
+static void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
 				   struct dlm_lock_resource *res)
 {
-	assert_spin_locked(&res->spinlock);
-
 	res->inflight_locks++;
 
 	mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name,
@@ -666,6 +663,13 @@ void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
 	     __builtin_return_address(0));
 }
 
+void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
+				   struct dlm_lock_resource *res)
+{
+	assert_spin_locked(&res->spinlock);
+	__dlm_lockres_grab_inflight_ref(dlm, res);
+}
+
 void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
 				   struct dlm_lock_resource *res)
 {
@@ -855,10 +859,8 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
 	/* finally add the lockres to its hash bucket */
 	__dlm_insert_lockres(dlm, res);
 
-	/* Grab inflight ref to pin the resource */
-	spin_lock(&res->spinlock);
-	dlm_lockres_grab_inflight_ref(dlm, res);
-	spin_unlock(&res->spinlock);
+	/* since this lockres is new it doesn't not require the spinlock */
+	__dlm_lockres_grab_inflight_ref(dlm, res);
 
 	/* get an extra ref on the mle in case this is a BLOCK
 	 * if so, the creator of the BLOCK may try to put the last

From 90d52e914efdb0f22779888f181f232ce453592e Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Wed, 24 Sep 2014 16:38:05 +0800
Subject: [PATCH 0842/1185] sched: Fix unreleased llc_shared_mask bit during
 CPU hotplug

commit 03bd4e1f7265548832a76e7919a81f3137c44fd1 upstream.

The following bug can be triggered by hot adding and removing a large number of
xen domain0's vcpus repeatedly:

	BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 IP: [..] find_busiest_group
	PGD 5a9d5067 PUD 13067 PMD 0
	Oops: 0000 [#3] SMP
	[...]
	Call Trace:
	load_balance
	? _raw_spin_unlock_irqrestore
	idle_balance
	__schedule
	schedule
	schedule_timeout
	? lock_timer_base
	schedule_timeout_uninterruptible
	msleep
	lock_device_hotplug_sysfs
	online_store
	dev_attr_store
	sysfs_write_file
	vfs_write
	SyS_write
	system_call_fastpath

Last level cache shared mask is built during CPU up and the
build_sched_domain() routine takes advantage of it to setup
the sched domain CPU topology.

However, llc_shared_mask is not released during CPU disable,
which leads to an invalid sched domainCPU topology.

This patch fix it by releasing the llc_shared_mask correctly
during CPU disable.

Yasuaki also reported that this can happen on real hardware:

  https://lkml.org/lkml/2014/7/22/1018

His case is here:

	==
	Here is an example on my system.
	My system has 4 sockets and each socket has 15 cores and HT is
	enabled. In this case, each core of sockes is numbered as
	follows:

		 | CPU#
	Socket#0 | 0-14 , 60-74
	Socket#1 | 15-29, 75-89
	Socket#2 | 30-44, 90-104
	Socket#3 | 45-59, 105-119

	Then llc_shared_mask of CPU#30 has 0x3fff80000001fffc0000000.

	It means that last level cache of Socket#2 is shared with
	CPU#30-44 and 90-104.

	When hot-removing socket#2 and #3, each core of sockets is
	numbered as follows:

		 | CPU#
	Socket#0 | 0-14 , 60-74
	Socket#1 | 15-29, 75-89

	But llc_shared_mask is not cleared. So llc_shared_mask of CPU#30
	remains having 0x3fff80000001fffc0000000.

	After that, when hot-adding socket#2 and #3, each core of
	sockets is numbered as follows:

		 | CPU#
	Socket#0 | 0-14 , 60-74
	Socket#1 | 15-29, 75-89
	Socket#2 | 30-59
	Socket#3 | 90-119

	Then llc_shared_mask of CPU#30 becomes
	0x3fff8000fffffffc0000000. It means that last level cache of
	Socket#2 is shared with CPU#30-59 and 90-104. So the mask has
	the wrong value.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Tested-by: Linn Crosetto <linn@hp.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Toshi Kani <toshi.kani@hp.com>
Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1411547885-48165-1-git-send-email-wanpeng.li@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/smpboot.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fe862750583b..87084ab90d19 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1284,6 +1284,9 @@ static void remove_siblinginfo(int cpu)
 
 	for_each_cpu(sibling, cpu_sibling_mask(cpu))
 		cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
+	for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
+		cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
+	cpumask_clear(cpu_llc_shared_mask(cpu));
 	cpumask_clear(cpu_sibling_mask(cpu));
 	cpumask_clear(cpu_core_mask(cpu));
 	c->phys_proc_id = 0;

From 6956c1873a36168d2187980d51eb31ee86139336 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 26 Aug 2014 12:44:15 +1000
Subject: [PATCH 0843/1185] powerpc/perf: Fix ABIv2 kernel backtraces

commit 85101af13bb854a6572fa540df7c7201958624b9 upstream.

ABIv2 kernels are failing to backtrace through the kernel. An example:

39.30%  readseek2_proce  [kernel.kallsyms]    [k] find_get_entry
            |
            --- find_get_entry
               __GI___libc_read

The problem is in valid_next_sp() where we check that the new stack
pointer is at least STACK_FRAME_OVERHEAD below the previous one.

ABIv1 has a minimum stack frame size of 112 bytes consisting of 48 bytes
and 64 bytes of parameter save area. ABIv2 changes that to 32 bytes
with no paramter save area.

STACK_FRAME_OVERHEAD is in theory the minimum stack frame size,
but we over 240 uses of it, some of which assume that it includes
space for the parameter area.

We need to work through all our stack defines and rationalise them
but let's fix perf now by creating STACK_FRAME_MIN_SIZE and using
in valid_next_sp(). This fixes the issue:

30.64%  readseek2_proce  [kernel.kallsyms]    [k] find_get_entry
            |
            --- find_get_entry
               pagecache_get_page
               generic_file_read_iter
               new_sync_read
               vfs_read
               sys_read
               syscall_exit
               __GI___libc_read

Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/ptrace.h | 7 +++++++
 arch/powerpc/perf/callchain.c     | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index becc08e6a65c..637c97fcbeb5 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -35,6 +35,12 @@
 					STACK_FRAME_OVERHEAD + 288)
 #define STACK_FRAME_MARKER	12
 
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define STACK_FRAME_MIN_SIZE	32
+#else
+#define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
+#endif
+
 /* Size of dummy stack frame allocated when calling signal handler. */
 #define __SIGNAL_FRAMESIZE	128
 #define __SIGNAL_FRAMESIZE32	64
@@ -46,6 +52,7 @@
 #define STACK_FRAME_REGS_MARKER	ASM_CONST(0x72656773)
 #define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_FRAME_MARKER	2
+#define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
 
 /* Size of stack frame allocated when calling signal handler. */
 #define __SIGNAL_FRAMESIZE	64
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 74d1e780748b..2396dda282cd 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -35,7 +35,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
 		return 0;		/* must be 16-byte aligned */
 	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
 		return 0;
-	if (sp >= prev_sp + STACK_FRAME_OVERHEAD)
+	if (sp >= prev_sp + STACK_FRAME_MIN_SIZE)
 		return 1;
 	/*
 	 * sp could decrease when we jump off an interrupt stack

From 178ba7e09f5fc6847b2b1a814506e6e0e354013d Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Mon, 22 Sep 2014 20:54:50 -0400
Subject: [PATCH 0844/1185] parisc: Only use -mfast-indirect-calls option for
 32-bit kernel builds

commit d26a7730b5874a5fa6779c62f4ad7c5065a94723 upstream.

In spite of what the GCC manual says, the -mfast-indirect-calls has
never been supported in the 64-bit parisc compiler. Indirect calls have
always been done using function descriptors irrespective of the
-mfast-indirect-calls option.

Recently, it was noticed that a function descriptor was always requested
when the -mfast-indirect-calls option was specified. This caused
problems when the option was used in  application code and doesn't make
any sense because the whole point of the option is to avoid using a
function descriptor for indirect calls.

Fixing this broke 64-bit kernel builds.

I will fix GCC but for now we need the attached change. This results in
the same kernel code as before.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/Makefile | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 96ec3982be8d..94607bfa273d 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -46,7 +46,12 @@ cflags-y	:= -pipe
 
 # These flags should be implied by an hppa-linux configuration, but they
 # are not in gcc 3.2.
-cflags-y	+= -mno-space-regs -mfast-indirect-calls
+cflags-y	+= -mno-space-regs
+
+# -mfast-indirect-calls is only relevant for 32-bit kernels.
+ifndef CONFIG_64BIT
+cflags-y	+= -mfast-indirect-calls
+endif
 
 # Currently we save and restore fpregs on all kernel entry/interruption paths.
 # If that gets optimized, we might need to disable the use of fpregs in the

From 5cebda5d05729708008637b3161076beb5855a71 Mon Sep 17 00:00:00 2001
From: Richard Larocque <rlarocque@google.com>
Date: Tue, 9 Sep 2014 18:31:04 -0700
Subject: [PATCH 0845/1185] alarmtimer: Do not signal SIGEV_NONE timers

commit 265b81d23a46c39df0a735a3af4238954b41a4c2 upstream.

Avoids sending a signal to alarm timers created with sigev_notify set to
SIGEV_NONE by checking for that special case in the timeout callback.

The regular posix timers avoid sending signals to SIGEV_NONE timers by
not scheduling any callbacks for them in the first place.  Although it
would be possible to do something similar for alarm timers, it's simpler
to handle this as a special case in the timeout.

Prior to this patch, the alarm timer would ignore the sigev_notify value
and try to deliver signals to the process anyway.  Even worse, the
sanity check for the value of sigev_signo is skipped when SIGEV_NONE was
specified, so the signal number could be bogus.  If sigev_signo was an
unitialized value (as it often would be if SIGEV_NONE is used), then
it's hard to predict which signal will be sent.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Sharvil Nanavati <sharvil@google.com>
Signed-off-by: Richard Larocque <rlarocque@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/time/alarmtimer.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 294bf4ef1f47..a2d2a480701c 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -421,8 +421,10 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
 {
 	struct k_itimer *ptr = container_of(alarm, struct k_itimer,
 						it.alarm.alarmtimer);
-	if (posix_timer_event(ptr, 0) != 0)
-		ptr->it_overrun++;
+	if ((ptr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) {
+		if (posix_timer_event(ptr, 0) != 0)
+			ptr->it_overrun++;
+	}
 
 	/* Re-add periodic timers */
 	if (ptr->it.alarm.interval.tv64) {

From 3c47864204b4df3e0d506c5a53cc7f1111467411 Mon Sep 17 00:00:00 2001
From: Richard Larocque <rlarocque@google.com>
Date: Tue, 9 Sep 2014 18:31:05 -0700
Subject: [PATCH 0846/1185] alarmtimer: Lock k_itimer during timer callback

commit 474e941bed9262f5fa2394f9a4a67e24499e5926 upstream.

Locks the k_itimer's it_lock member when handling the alarm timer's
expiry callback.

The regular posix timers defined in posix-timers.c have this lock held
during timout processing because their callbacks are routed through
posix_timer_fn().  The alarm timers follow a different path, so they
ought to grab the lock somewhere else.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Sharvil Nanavati <sharvil@google.com>
Signed-off-by: Richard Larocque <rlarocque@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/time/alarmtimer.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index a2d2a480701c..7d19fca0617e 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -419,8 +419,12 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid)
 static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
 							ktime_t now)
 {
+	unsigned long flags;
 	struct k_itimer *ptr = container_of(alarm, struct k_itimer,
 						it.alarm.alarmtimer);
+	enum alarmtimer_restart result = ALARMTIMER_NORESTART;
+
+	spin_lock_irqsave(&ptr->it_lock, flags);
 	if ((ptr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) {
 		if (posix_timer_event(ptr, 0) != 0)
 			ptr->it_overrun++;
@@ -430,9 +434,11 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
 	if (ptr->it.alarm.interval.tv64) {
 		ptr->it_overrun += alarm_forward(alarm, now,
 						ptr->it.alarm.interval);
-		return ALARMTIMER_RESTART;
+		result = ALARMTIMER_RESTART;
 	}
-	return ALARMTIMER_NORESTART;
+	spin_unlock_irqrestore(&ptr->it_lock, flags);
+
+	return result;
 }
 
 /**

From a63607b953785b32fad97f621b90df882d177ea3 Mon Sep 17 00:00:00 2001
From: Cong Wang <cwang@twopensource.com>
Date: Tue, 2 Sep 2014 15:27:20 -0700
Subject: [PATCH 0847/1185] perf: Fix a race condition in
 perf_remove_from_context()

commit 3577af70a2ce4853d58e57d832e687d739281479 upstream.

We saw a kernel soft lockup in perf_remove_from_context(),
it looks like the `perf` process, when exiting, could not go
out of the retry loop. Meanwhile, the target process was forking
a child. So either the target process should execute the smp
function call to deactive the event (if it was running) or it should
do a context switch which deactives the event.

It seems we optimize out a context switch in perf_event_context_sched_out(),
and what's more important, we still test an obsolete task pointer when
retrying, so no one actually would deactive that event in this situation.
Fix it directly by reloading the task pointer in perf_remove_from_context().

This should cure the above soft lockup.

Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1409696840-843-1-git-send-email-xiyou.wangcong@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/events/core.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 459b94c94721..6bf387a60399 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1398,6 +1398,11 @@ static void perf_remove_from_context(struct perf_event *event, bool detach_group
 	 */
 	if (ctx->is_active) {
 		raw_spin_unlock_irq(&ctx->lock);
+		/*
+		 * Reload the task pointer, it might have been changed by
+		 * a concurrent perf_event_context_sched_out().
+		 */
+		task = ctx->task;
 		goto retry;
 	}
 
@@ -1829,6 +1834,11 @@ perf_install_in_context(struct perf_event_context *ctx,
 	 */
 	if (ctx->is_active) {
 		raw_spin_unlock_irq(&ctx->lock);
+		/*
+		 * Reload the task pointer, it might have been changed by
+		 * a concurrent perf_event_context_sched_out().
+		 */
+		task = ctx->task;
 		goto retry;
 	}
 

From e7fa68ba61691d21444df28ba7b6eb91517c99da Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 12 Sep 2013 18:39:36 +0200
Subject: [PATCH 0848/1185] perf kmem: Make it work again on non NUMA machines

commit 4921e320244e099bdf237fd10428594ce5f5b87d upstream.

The commit '2814eb0 perf kmem: Remove die() calls' disabled 'perf kmem'
command for machines without numa support. It made the command fail if
'/sys/devices/system/node' dir wasn't found.

Skipping the numa based initialization in case the directory is not
found and continue execution.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1379003976-5839-5-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: zhangzhiqiang <zhangzhiqiang.zhang@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/builtin-kmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 46878daca5cc..c9eac3edfe4d 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -101,7 +101,7 @@ static int setup_cpunode_map(void)
 
 	dir1 = opendir(PATH_SYS_NODE);
 	if (!dir1)
-		return -1;
+		return 0;
 
 	while ((dent1 = readdir(dir1)) != NULL) {
 		if (dent1->d_type != DT_DIR ||

From c2d331797b18cedebacc294c173e3855de81b4d0 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cam.ac.uk>
Date: Mon, 22 Sep 2014 01:53:03 +0100
Subject: [PATCH 0849/1185] Fix nasty 32-bit overflow bug in buffer i/o code.

commit f2d5a94436cc7cc0221b9a81bba2276a25187dd3 upstream.

On 32-bit architectures, the legacy buffer_head functions are not always
handling the sector number with the proper 64-bit types, and will thus
fail on 4TB+ disks.

Any code that uses __getblk() (and thus bread(), breadahead(),
sb_bread(), sb_breadahead(), sb_getblk()), and calls it using a 64-bit
block on a 32-bit arch (where "long" is 32-bit) causes an inifinite loop
in __getblk_slow() with an infinite stream of errors logged to dmesg
like this:

  __find_get_block_slow() failed. block=6740375944, b_blocknr=2445408648
  b_state=0x00000020, b_size=512
  device sda1 blocksize: 512

Note how in hex block is 0x191C1F988 and b_blocknr is 0x91C1F988 i.e. the
top 32-bits are missing (in this case the 0x1 at the top).

This is because grow_dev_page() is broken and has a 32-bit overflow due
to shifting the page index value (a pgoff_t - which is just 32 bits on
32-bit architectures) left-shifted as the block number.  But the top
bits to get lost as the pgoff_t is not type cast to sector_t / 64-bit
before the shift.

This patch fixes this issue by type casting "index" to sector_t before
doing the left shift.

Note this is not a theoretical bug but has been seen in the field on a
4TiB hard drive with logical sector size 512 bytes.

This patch has been verified to fix the infinite loop problem on 3.17-rc5
kernel using a 4TB disk image mounted using "-o loop".  Without this patch
doing a "find /nt" where /nt is an NTFS volume causes the inifinite loop
100% reproducibly whilst with the patch it works fine as expected.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/buffer.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 75964d734444..10fca21ee8aa 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -985,7 +985,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 		bh = page_buffers(page);
 		if (bh->b_size == size) {
 			end_block = init_page_buffers(page, bdev,
-						index << sizebits, size);
+						(sector_t)index << sizebits,
+						size);
 			goto done;
 		}
 		if (!try_to_free_buffers(page))
@@ -1006,7 +1007,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	 */
 	spin_lock(&inode->i_mapping->private_lock);
 	link_dev_buffers(page, bh);
-	end_block = init_page_buffers(page, bdev, index << sizebits, size);
+	end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
+			size);
 	spin_unlock(&inode->i_mapping->private_lock);
 done:
 	ret = (block < end_block) ? 1 : -ENXIO;

From 7a0a059f9866f6083f9c6e0c1ccc63f2d07ffcd6 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 26 Aug 2014 02:59:53 -0300
Subject: [PATCH 0850/1185] media: cx18: fix kernel oops with tda8290 tuner

commit 6a03dc92cc2edfa2257502557b9f714893987383 upstream.

This was caused by an uninitialized setup.config field.

Based on a suggestion from Devin Heitmueller.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Thanks-to: Devin Heitmueller <dheitmueller@kernellabs.com>
Reported-by: Scott Robinson <scott.robinson55@gmail.com>
Tested-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/pci/cx18/cx18-driver.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/pci/cx18/cx18-driver.c b/drivers/media/pci/cx18/cx18-driver.c
index 16e89f026bca..018cb9045330 100644
--- a/drivers/media/pci/cx18/cx18-driver.c
+++ b/drivers/media/pci/cx18/cx18-driver.c
@@ -1092,6 +1092,7 @@ static int cx18_probe(struct pci_dev *pci_dev,
 		setup.addr = ADDR_UNSET;
 		setup.type = cx->options.tuner;
 		setup.mode_mask = T_ANALOG_TV;  /* matches TV tuners */
+		setup.config = NULL;
 		if (cx->options.radio > 0)
 			setup.mode_mask |= T_RADIO;
 		setup.tuner_callback = (setup.type == TUNER_XC2028) ?

From a8f165f517656c5c84dcb059ea2d51fa73f2571c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 18 Sep 2014 11:09:04 +1000
Subject: [PATCH 0851/1185] md/raid1: fix_read_error should act on all
 non-faulty devices.

commit b8cb6b4c121e1bf1963c16ed69e7adcb1bc301cd upstream.

If a devices is being recovered it is not InSync and is not Faulty.

If a read error is experienced on that device, fix_read_error()
will be called, but it ignores non-InSync devices.  So it will
neither fix the error nor fail the device.

It is incorrect that fix_read_error() ignores non-InSync devices.
It should only ignore Faulty devices.  So fix it.

This became a bug when we allowed reading from a device that was being
recovered.  It is suitable for any subsequent -stable kernel.

Fixes: da8840a747c0dbf49506ec906757a6b87b9741e9
Reported-by: Alexander Lyakas <alex.bolshoy@gmail.com>
Tested-by: Alexander Lyakas <alex.bolshoy@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a176791509f6..e885dbf08c40 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2051,7 +2051,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
 			d--;
 			rdev = conf->mirrors[d].rdev;
 			if (rdev &&
-			    test_bit(In_sync, &rdev->flags))
+			    !test_bit(Faulty, &rdev->flags))
 				r1_sync_page_io(rdev, sect, s,
 						conf->tmppage, WRITE);
 		}
@@ -2063,7 +2063,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
 			d--;
 			rdev = conf->mirrors[d].rdev;
 			if (rdev &&
-			    test_bit(In_sync, &rdev->flags)) {
+			    !test_bit(Faulty, &rdev->flags)) {
 				if (r1_sync_page_io(rdev, sect, s,
 						    conf->tmppage, READ)) {
 					atomic_add(s, &rdev->corrected_errors);

From a5114ef14123a6da5a62d389ed3dacbfc3a94541 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Thu, 10 Jul 2014 09:24:01 +0300
Subject: [PATCH 0852/1185] ipvs: avoid netns exit crash on
 ip_vs_conn_drop_conntrack

commit 2627b7e15c5064ddd5e578e4efd948d48d531a3f upstream.

commit 8f4e0a18682d91 ("IPVS netns exit causes crash in conntrack")
added second ip_vs_conn_drop_conntrack call instead of just adding
the needed check. As result, the first call still can cause
crash on netns exit. Remove it.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/ipvs/ip_vs_conn.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a083bda322b6..90e756cf6e52 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -797,7 +797,6 @@ static void ip_vs_conn_expire(unsigned long data)
 			ip_vs_control_del(cp);
 
 		if (cp->flags & IP_VS_CONN_F_NFCT) {
-			ip_vs_conn_drop_conntrack(cp);
 			/* Do not access conntracks during subsys cleanup
 			 * because nf_conntrack_find_get can not be used after
 			 * conntrack cleanup for the net.

From 787fcb84098e4f9fdea75d777f010229c393690d Mon Sep 17 00:00:00 2001
From: Alex Gartrell <agartrell@fb.com>
Date: Wed, 16 Jul 2014 15:57:34 -0700
Subject: [PATCH 0853/1185] ipvs: Maintain all DSCP and ECN bits for ipv6 tun
 forwarding

commit 76f084bc10004b3050b2cff9cfac29148f1f6088 upstream.

Previously, only the four high bits of the tclass were maintained in the
ipv6 case.  This matches the behavior of ipv4, though whether or not we
should reflect ECN bits may be up for debate.

Signed-off-by: Alex Gartrell <agartrell@fb.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/ipvs/ip_vs_xmit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 7f0e1cf2d7e8..1692e7534759 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -967,8 +967,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->nexthdr		=	IPPROTO_IPV6;
 	iph->payload_len	=	old_iph->payload_len;
 	be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
-	iph->priority		=	old_iph->priority;
 	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+	ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph));
 	iph->daddr = cp->daddr.in6;
 	iph->saddr = saddr;
 	iph->hop_limit		=	old_iph->hop_limit;

From a1b7f13b63e431c141715ecc183d92409982ed63 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 22 Aug 2014 17:53:41 +0300
Subject: [PATCH 0854/1185] ipvs: fix ipv6 hook registration for local replies

commit eb90b0c734ad793d5f5bf230a9e9a4dcc48df8aa upstream.

commit fc604767613b6d2036cdc35b660bc39451040a47
("ipvs: changes for local real server") from 2.6.37
introduced DNAT support to local real server but the
IPv6 LOCAL_OUT handler ip_vs_local_reply6() is
registered incorrectly as IPv4 hook causing any outgoing
IPv4 traffic to be dropped depending on the IP header values.

Chris tracked down the problem to CONFIG_IP_VS_IPV6=y
Bug report: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1349768

Reported-by: Chris J Arges <chris.j.arges@canonical.com>
Tested-by: Chris J Arges <chris.j.arges@canonical.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/ipvs/ip_vs_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 663042e84e81..26b9a986a87f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1898,7 +1898,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	{
 		.hook		= ip_vs_local_reply6,
 		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
+		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_NAT_DST + 1,
 	},

From e89547b87ae44b464fef54572d29c0cd32cb9caa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 26 May 2014 13:40:47 +0200
Subject: [PATCH 0855/1185] PM / sleep: Add state field to pm_states[] entries

commit 27ddcc6596e50cb8f03d2e83248897667811d8f6 upstream.

To allow sleep states corresponding to the "mem", "standby" and
"freeze" lables to be different from the pm_states[] indexes of
those strings, introduce struct pm_sleep_state, consisting of
a string label and a state number, and turn pm_states[] into an
array of objects of that type.

This modification should not lead to any functional changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/power/main.c         | 16 ++++++++--------
 kernel/power/power.h        |  7 ++++++-
 kernel/power/suspend.c      | 12 ++++++------
 kernel/power/suspend_test.c | 22 ++++++++++------------
 4 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/kernel/power/main.c b/kernel/power/main.c
index d77663bfedeb..a387bfdc7b09 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -293,12 +293,12 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
 {
 	char *s = buf;
 #ifdef CONFIG_SUSPEND
-	int i;
+	suspend_state_t i;
+
+	for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
+		if (valid_state(i))
+			s += sprintf(s,"%s ", pm_states[i].label);
 
-	for (i = 0; i < PM_SUSPEND_MAX; i++) {
-		if (pm_states[i] && valid_state(i))
-			s += sprintf(s,"%s ", pm_states[i]);
-	}
 #endif
 #ifdef CONFIG_HIBERNATION
 	s += sprintf(s, "%s\n", "disk");
@@ -314,7 +314,7 @@ static suspend_state_t decode_state(const char *buf, size_t n)
 {
 #ifdef CONFIG_SUSPEND
 	suspend_state_t state = PM_SUSPEND_MIN;
-	const char * const *s;
+	struct pm_sleep_state *s;
 #endif
 	char *p;
 	int len;
@@ -328,7 +328,7 @@ static suspend_state_t decode_state(const char *buf, size_t n)
 
 #ifdef CONFIG_SUSPEND
 	for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++)
-		if (*s && len == strlen(*s) && !strncmp(buf, *s, len))
+		if (len == strlen(s->label) && !strncmp(buf, s->label, len))
 			return state;
 #endif
 
@@ -446,7 +446,7 @@ static ssize_t autosleep_show(struct kobject *kobj,
 #ifdef CONFIG_SUSPEND
 	if (state < PM_SUSPEND_MAX)
 		return sprintf(buf, "%s\n", valid_state(state) ?
-						pm_states[state] : "error");
+					pm_states[state].label : "error");
 #endif
 #ifdef CONFIG_HIBERNATION
 	return sprintf(buf, "disk\n");
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 7d4b7ffb3c1d..10d907516b28 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -175,8 +175,13 @@ extern void swsusp_show_speed(struct timeval *, struct timeval *,
 				unsigned int, char *);
 
 #ifdef CONFIG_SUSPEND
+struct pm_sleep_state {
+	const char *label;
+	suspend_state_t state;
+};
+
 /* kernel/power/suspend.c */
-extern const char *const pm_states[];
+extern struct pm_sleep_state pm_states[];
 
 extern bool valid_state(suspend_state_t state);
 extern int suspend_devices_and_enter(suspend_state_t state);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index bef86d121eb2..cc1fc82e7901 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -29,10 +29,10 @@
 
 #include "power.h"
 
-const char *const pm_states[PM_SUSPEND_MAX] = {
-	[PM_SUSPEND_FREEZE]	= "freeze",
-	[PM_SUSPEND_STANDBY]	= "standby",
-	[PM_SUSPEND_MEM]	= "mem",
+struct pm_sleep_state pm_states[PM_SUSPEND_MAX] = {
+	[PM_SUSPEND_FREEZE] = { "freeze", PM_SUSPEND_FREEZE },
+	[PM_SUSPEND_STANDBY] = { "standby", PM_SUSPEND_STANDBY },
+	[PM_SUSPEND_MEM] = { "mem", PM_SUSPEND_MEM },
 };
 
 static const struct platform_suspend_ops *suspend_ops;
@@ -337,7 +337,7 @@ static int enter_state(suspend_state_t state)
 	sys_sync();
 	printk("done.\n");
 
-	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
+	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state].label);
 	error = suspend_prepare(state);
 	if (error)
 		goto Unlock;
@@ -345,7 +345,7 @@ static int enter_state(suspend_state_t state)
 	if (suspend_test(TEST_FREEZER))
 		goto Finish;
 
-	pr_debug("PM: Entering %s sleep\n", pm_states[state]);
+	pr_debug("PM: Entering %s sleep\n", pm_states[state].label);
 	pm_restrict_gfp_mask();
 	error = suspend_devices_and_enter(state);
 	pm_restore_gfp_mask();
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
index 9b2a1d58558d..d4e3ab167a73 100644
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -92,13 +92,13 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
 	}
 
 	if (state == PM_SUSPEND_MEM) {
-		printk(info_test, pm_states[state]);
+		printk(info_test, pm_states[state].label);
 		status = pm_suspend(state);
 		if (status == -ENODEV)
 			state = PM_SUSPEND_STANDBY;
 	}
 	if (state == PM_SUSPEND_STANDBY) {
-		printk(info_test, pm_states[state]);
+		printk(info_test, pm_states[state].label);
 		status = pm_suspend(state);
 	}
 	if (status < 0)
@@ -136,18 +136,16 @@ static char warn_bad_state[] __initdata =
 
 static int __init setup_test_suspend(char *value)
 {
-	unsigned i;
+	suspend_state_t i;
 
 	/* "=mem" ==> "mem" */
 	value++;
-	for (i = 0; i < PM_SUSPEND_MAX; i++) {
-		if (!pm_states[i])
-			continue;
-		if (strcmp(pm_states[i], value) != 0)
-			continue;
-		test_state = (__force suspend_state_t) i;
-		return 0;
-	}
+	for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
+		if (!strcmp(pm_states[i].label, value)) {
+			test_state = pm_states[i].state;
+			return 0;
+		}
+
 	printk(warn_bad_state, value);
 	return 0;
 }
@@ -165,7 +163,7 @@ static int __init test_suspend(void)
 	if (test_state == PM_SUSPEND_ON)
 		goto done;
 	if (!valid_state(test_state)) {
-		printk(warn_bad_state, pm_states[test_state]);
+		printk(warn_bad_state, pm_states[test_state].label);
 		goto done;
 	}
 

From 5c95fef642043842d7168ee3ae662f80a84fb285 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 26 May 2014 13:40:53 +0200
Subject: [PATCH 0856/1185] PM / sleep: Use valid_state() for
 platform-dependent sleep states only

commit 43e8317b0bba1d6eb85f38a4a233d82d7c20d732 upstream.

Use the observation that, for platform-dependent sleep states
(PM_SUSPEND_STANDBY, PM_SUSPEND_MEM), a given state is either
always supported or always unsupported and store that information
in pm_states[] instead of calling valid_state() every time we
need to check it.

Also do not use valid_state() for PM_SUSPEND_FREEZE, which is always
valid, and move the pm_test_level validity check for PM_SUSPEND_FREEZE
directly into enter_state().

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/power/main.c         |  9 +++---
 kernel/power/power.h        |  2 --
 kernel/power/suspend.c      | 60 ++++++++++++++++++-------------------
 kernel/power/suspend_test.c |  2 +-
 4 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/kernel/power/main.c b/kernel/power/main.c
index a387bfdc7b09..312c1b2c725d 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -296,7 +296,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
 	suspend_state_t i;
 
 	for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
-		if (valid_state(i))
+		if (pm_states[i].state)
 			s += sprintf(s,"%s ", pm_states[i].label);
 
 #endif
@@ -328,8 +328,9 @@ static suspend_state_t decode_state(const char *buf, size_t n)
 
 #ifdef CONFIG_SUSPEND
 	for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++)
-		if (len == strlen(s->label) && !strncmp(buf, s->label, len))
-			return state;
+		if (s->state && len == strlen(s->label)
+		    && !strncmp(buf, s->label, len))
+			return s->state;
 #endif
 
 	return PM_SUSPEND_ON;
@@ -445,7 +446,7 @@ static ssize_t autosleep_show(struct kobject *kobj,
 
 #ifdef CONFIG_SUSPEND
 	if (state < PM_SUSPEND_MAX)
-		return sprintf(buf, "%s\n", valid_state(state) ?
+		return sprintf(buf, "%s\n", pm_states[state].state ?
 					pm_states[state].label : "error");
 #endif
 #ifdef CONFIG_HIBERNATION
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 10d907516b28..f770cad3666c 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -183,14 +183,12 @@ struct pm_sleep_state {
 /* kernel/power/suspend.c */
 extern struct pm_sleep_state pm_states[];
 
-extern bool valid_state(suspend_state_t state);
 extern int suspend_devices_and_enter(suspend_state_t state);
 #else /* !CONFIG_SUSPEND */
 static inline int suspend_devices_and_enter(suspend_state_t state)
 {
 	return -ENOSYS;
 }
-static inline bool valid_state(suspend_state_t state) { return false; }
 #endif /* !CONFIG_SUSPEND */
 
 #ifdef CONFIG_PM_TEST_SUSPEND
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index cc1fc82e7901..903c517b14da 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -30,9 +30,9 @@
 #include "power.h"
 
 struct pm_sleep_state pm_states[PM_SUSPEND_MAX] = {
-	[PM_SUSPEND_FREEZE] = { "freeze", PM_SUSPEND_FREEZE },
-	[PM_SUSPEND_STANDBY] = { "standby", PM_SUSPEND_STANDBY },
-	[PM_SUSPEND_MEM] = { "mem", PM_SUSPEND_MEM },
+	[PM_SUSPEND_FREEZE] = { .label = "freeze", .state = PM_SUSPEND_FREEZE },
+	[PM_SUSPEND_STANDBY] = { .label = "standby", },
+	[PM_SUSPEND_MEM] = { .label = "mem", },
 };
 
 static const struct platform_suspend_ops *suspend_ops;
@@ -62,42 +62,34 @@ void freeze_wake(void)
 }
 EXPORT_SYMBOL_GPL(freeze_wake);
 
+static bool valid_state(suspend_state_t state)
+{
+	/*
+	 * PM_SUSPEND_STANDBY and PM_SUSPEND_MEM states need low level
+	 * support and need to be valid to the low level
+	 * implementation, no valid callback implies that none are valid.
+	 */
+	return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
+}
+
 /**
  * suspend_set_ops - Set the global suspend method table.
  * @ops: Suspend operations to use.
  */
 void suspend_set_ops(const struct platform_suspend_ops *ops)
 {
+	suspend_state_t i;
+
 	lock_system_sleep();
+
 	suspend_ops = ops;
+	for (i = PM_SUSPEND_STANDBY; i <= PM_SUSPEND_MEM; i++)
+		pm_states[i].state = valid_state(i) ? i : 0;
+
 	unlock_system_sleep();
 }
 EXPORT_SYMBOL_GPL(suspend_set_ops);
 
-bool valid_state(suspend_state_t state)
-{
-	if (state == PM_SUSPEND_FREEZE) {
-#ifdef CONFIG_PM_DEBUG
-		if (pm_test_level != TEST_NONE &&
-		    pm_test_level != TEST_FREEZER &&
-		    pm_test_level != TEST_DEVICES &&
-		    pm_test_level != TEST_PLATFORM) {
-			printk(KERN_WARNING "Unsupported pm_test mode for "
-					"freeze state, please choose "
-					"none/freezer/devices/platform.\n");
-			return false;
-		}
-#endif
-			return true;
-	}
-	/*
-	 * PM_SUSPEND_STANDBY and PM_SUSPEND_MEMORY states need lowlevel
-	 * support and need to be valid to the lowlevel
-	 * implementation, no valid callback implies that none are valid.
-	 */
-	return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
-}
-
 /**
  * suspend_valid_only_mem - Generic memory-only valid callback.
  *
@@ -324,9 +316,17 @@ static int enter_state(suspend_state_t state)
 {
 	int error;
 
-	if (!valid_state(state))
-		return -ENODEV;
-
+	if (state == PM_SUSPEND_FREEZE) {
+#ifdef CONFIG_PM_DEBUG
+		if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
+			pr_warning("PM: Unsupported test mode for freeze state,"
+				   "please choose none/freezer/devices/platform.\n");
+			return -EAGAIN;
+		}
+#endif
+	} else if (!valid_state(state)) {
+		return -EINVAL;
+	}
 	if (!mutex_trylock(&pm_mutex))
 		return -EBUSY;
 
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
index d4e3ab167a73..269b097e78ea 100644
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -162,7 +162,7 @@ static int __init test_suspend(void)
 	/* PM is initialized by now; is that state testable? */
 	if (test_state == PM_SUSPEND_ON)
 		goto done;
-	if (!valid_state(test_state)) {
+	if (!pm_states[test_state].state) {
 		printk(warn_bad_state, pm_states[test_state].label);
 		goto done;
 	}

From 961a14671f6f79285672b76740eb87eb44ff5058 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 13 Jun 2013 17:31:28 +0200
Subject: [PATCH 0857/1185] netfilter: nf_conntrack: avoid large timeout for
 mid-stream pickup

commit 6547a221871f139cc56328a38105d47c14874cbe upstream.

When loose tracking is enabled (default), non-syn packets cause
creation of new conntracks in established state with default timeout for
established state (5 days).  This causes the table to fill up with UNREPLIED
when the 'new ack' packet happened to be the last-ack of a previous,
already timed-out connection.

Consider:

A 192.168.x.52792 > 10.184.y.80: F, 426:426(0) ack 9237 win 255
B 10.184.y.80 > 192.168.x.52792: ., ack 427 win 123
<61 second pause>
C 10.184.y.80 > 192.168.x.52792: F, 9237:9237(0) ack 427 win 123
D 192.168.x.52792 > 10.184.y.80: ., ack 9238 win 255

B moves conntrack to CLOSE_WAIT and will kill it after 60 second timeout,
C is ignored (FIN set), but last packet (D) causes new ct with 5-days timeout.

Use UNACK timeout (5 minutes) instead to get rid of these entries sooner
when in ESTABLISHED state without having seen traffic in both directions.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Florian Koch <florian.koch1981@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4d4d8f1d01fc..7dcc376eea5f 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1043,6 +1043,12 @@ static int tcp_packet(struct nf_conn *ct,
 			nf_ct_kill_acct(ct, ctinfo, skb);
 			return NF_ACCEPT;
 		}
+		/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
+		 * pickup with loose=1. Avoid large ESTABLISHED timeout.
+		 */
+		if (new_state == TCP_CONNTRACK_ESTABLISHED &&
+		    timeout > timeouts[TCP_CONNTRACK_UNACK])
+			timeout = timeouts[TCP_CONNTRACK_UNACK];
 	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
 		   && (old_state == TCP_CONNTRACK_SYN_RECV
 		       || old_state == TCP_CONNTRACK_ESTABLISHED)

From ab3e7055e9b5a99767cbe3a96db798cc68d850cc Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 5 Jun 2013 11:25:13 +0100
Subject: [PATCH 0858/1185] ARM: 7748/1: oabi: handle faults when loading swi
 instruction from userspace

commit 1aa2b3b7a6c4f3dbd3671171113a20e6a6190e3b upstream.

Running an OABI_COMPAT kernel on an SMP platform can lead to fun and
games with page aging.

If one CPU issues a swi instruction immediately before another CPU
decides to mkold the page containing the swi instruction, then we will
fault attempting to load the instruction during the vector_swi handler
in order to retrieve its immediate field. Since this fault is not
currently dealt with by our exception tables, this results in a panic:

  Unable to handle kernel paging request at virtual address 4020841c
  pgd = c490c000
  [4020841c] *pgd=84451831, *pte=bf05859d, *ppte=00000000
  Internal error: Oops: 17 [#1] PREEMPT SMP ARM
  Modules linked in: hid_sony(O)
  CPU: 1    Tainted: G        W  O  (3.4.0-perf-gf496dca-01162-gcbcc62b #1)
  PC is at vector_swi+0x28/0x88
  LR is at 0x40208420

This patch wraps all of the swi instruction loads with the USER macro
and provides a shared exception table entry which simply rewinds the
saved user PC and returns from the system call (without setting tbl, so
there's no worries with tracing or syscall restarting). Returning to
userspace will re-enter the page fault handler, from where we will
probably send SIGSEGV to the current task.

Reported-by: Wang, Yalin <yalin.wang@sonymobile.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kernel/entry-common.S | 42 +++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index bc5bc0a97131..4bc816a74a2e 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -362,6 +362,16 @@ ENTRY(vector_swi)
 	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
 	zero_fp
 
+#ifdef CONFIG_ALIGNMENT_TRAP
+	ldr	ip, __cr_alignment
+	ldr	ip, [ip]
+	mcr	p15, 0, ip, c1, c0		@ update control register
+#endif
+
+	enable_irq
+	ct_user_exit
+	get_thread_info tsk
+
 	/*
 	 * Get the system call number.
 	 */
@@ -375,9 +385,9 @@ ENTRY(vector_swi)
 #ifdef CONFIG_ARM_THUMB
 	tst	r8, #PSR_T_BIT
 	movne	r10, #0				@ no thumb OABI emulation
-	ldreq	r10, [lr, #-4]			@ get SWI instruction
+ USER(	ldreq	r10, [lr, #-4]		)	@ get SWI instruction
 #else
-	ldr	r10, [lr, #-4]			@ get SWI instruction
+ USER(	ldr	r10, [lr, #-4]		)	@ get SWI instruction
 #endif
 #ifdef CONFIG_CPU_ENDIAN_BE8
 	rev	r10, r10			@ little endian instruction
@@ -392,22 +402,13 @@ ENTRY(vector_swi)
 	/* Legacy ABI only, possibly thumb mode. */
 	tst	r8, #PSR_T_BIT			@ this is SPSR from save_user_regs
 	addne	scno, r7, #__NR_SYSCALL_BASE	@ put OS number in
-	ldreq	scno, [lr, #-4]
+ USER(	ldreq	scno, [lr, #-4]		)
 
 #else
 	/* Legacy ABI only. */
-	ldr	scno, [lr, #-4]			@ get SWI instruction
+ USER(	ldr	scno, [lr, #-4]		)	@ get SWI instruction
 #endif
 
-#ifdef CONFIG_ALIGNMENT_TRAP
-	ldr	ip, __cr_alignment
-	ldr	ip, [ip]
-	mcr	p15, 0, ip, c1, c0		@ update control register
-#endif
-	enable_irq
-	ct_user_exit
-
-	get_thread_info tsk
 	adr	tbl, sys_call_table		@ load syscall table pointer
 
 #if defined(CONFIG_OABI_COMPAT)
@@ -442,6 +443,21 @@ local_restart:
 	eor	r0, scno, #__NR_SYSCALL_BASE	@ put OS number back
 	bcs	arm_syscall	
 	b	sys_ni_syscall			@ not private func
+
+#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
+	/*
+	 * We failed to handle a fault trying to access the page
+	 * containing the swi instruction, but we're not really in a
+	 * position to return -EFAULT. Instead, return back to the
+	 * instruction and re-enter the user fault handling path trying
+	 * to page it in. This will likely result in sending SEGV to the
+	 * current task.
+	 */
+9001:
+	sub	lr, lr, #4
+	str	lr, [sp, #S_PC]
+	b	ret_fast_syscall
+#endif
 ENDPROC(vector_swi)
 
 	/*

From 1828891837182b3ae80cc83e85b4506df016557f Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Mon, 28 Apr 2014 15:59:56 +0300
Subject: [PATCH 0859/1185] serial: 8250_dma: check the result of TX buffer
 mapping

commit d4089a332883ad969700aac5dd4dd5f1c4fee825 upstream.

Using dma_mapping_error() to make sure the mapping did not
fail.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: "Petallo, MauriceX R" <mauricex.r.petallo@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_dma.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index ab9096dc3849..148ffe4c232f 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -192,21 +192,28 @@ int serial8250_request_dma(struct uart_8250_port *p)
 
 	dma->rx_buf = dma_alloc_coherent(dma->rxchan->device->dev, dma->rx_size,
 					&dma->rx_addr, GFP_KERNEL);
-	if (!dma->rx_buf) {
-		dma_release_channel(dma->rxchan);
-		dma_release_channel(dma->txchan);
-		return -ENOMEM;
-	}
+	if (!dma->rx_buf)
+		goto err;
 
 	/* TX buffer */
 	dma->tx_addr = dma_map_single(dma->txchan->device->dev,
 					p->port.state->xmit.buf,
 					UART_XMIT_SIZE,
 					DMA_TO_DEVICE);
+	if (dma_mapping_error(dma->txchan->device->dev, dma->tx_addr)) {
+		dma_free_coherent(dma->rxchan->device->dev, dma->rx_size,
+				  dma->rx_buf, dma->rx_addr);
+		goto err;
+	}
 
 	dev_dbg_ratelimited(p->port.dev, "got both dma channels\n");
 
 	return 0;
+err:
+	dma_release_channel(dma->rxchan);
+	dma_release_channel(dma->txchan);
+
+	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(serial8250_request_dma);
 

From b4644ca460891d3b5a841c99dee1d7d218eed71e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 5 Nov 2013 01:15:38 +0100
Subject: [PATCH 0860/1185] ext2: Fix fs corruption in ext2_get_xip_mem()

commit 7ba3ec5749ddb61f79f7be17b5fd7720eebc52de upstream.

Commit 8e3dffc651cb "Ext2: mark inode dirty after the function
dquot_free_block_nodirty is called" unveiled a bug in __ext2_get_block()
called from ext2_get_xip_mem(). That function called ext2_get_block()
mistakenly asking it to map 0 blocks while 1 was intended. Before the
above mentioned commit things worked out fine by luck but after that commit
we started returning that we allocated 0 blocks while we in fact
allocated 1 block and thus allocation was looping until all blocks in
the filesystem were exhausted.

Fix the problem by properly asking for one block and also add assertion
in ext2_get_blocks() to catch similar problems.

Reported-and-tested-by: Andiry Xu <andiry.xu@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext2/inode.c | 2 ++
 fs/ext2/xip.c   | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0a87bb10998d..99d84ce038b8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -632,6 +632,8 @@ static int ext2_get_blocks(struct inode *inode,
 	int count = 0;
 	ext2_fsblk_t first_block = 0;
 
+	BUG_ON(maxblocks == 0);
+
 	depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
 	if (depth == 0)
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index 1c3312858fcf..e98171a11cfe 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -35,6 +35,7 @@ __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
 	int rc;
 
 	memset(&tmp, 0, sizeof(struct buffer_head));
+	tmp.b_size = 1 << inode->i_blkbits;
 	rc = ext2_get_block(inode, pgoff, &tmp, create);
 	*result = tmp.b_blocknr;
 

From 4479103c7e6e3c5588a6b695d935cd5201947c3a Mon Sep 17 00:00:00 2001
From: Soren Brinkmann <soren.brinkmann@xilinx.com>
Date: Wed, 19 Jun 2013 10:53:03 -0700
Subject: [PATCH 0861/1185] arm: multi_v7_defconfig: Enable Zynq UART driver

commit 90de827b9c238f8d8209bc7adc70190575514315 upstream.

Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/configs/multi_v7_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 9ce8ba1a1433..adb9aa5c88c7 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -48,6 +48,8 @@ CONFIG_SERIAL_SIRFSOC=y
 CONFIG_SERIAL_SIRFSOC_CONSOLE=y
 CONFIG_SERIAL_VT8500=y
 CONFIG_SERIAL_VT8500_CONSOLE=y
+CONFIG_SERIAL_XILINX_PS_UART=y
+CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_IPMI_HANDLER=y
 CONFIG_IPMI_SI=y
 CONFIG_I2C=y

From beed61068fba1c90dc4cd553726c8b68a252a68f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 3 Jul 2013 15:08:30 -0700
Subject: [PATCH 0862/1185] kernel/fork.c:copy_process(): unify
 CLONE_THREAD-or-thread_group_leader code

commit 80628ca06c5d42929de6bc22c0a41589a834d151 upstream.

Cleanup and preparation for the next changes.

Move the "if (clone_flags & CLONE_THREAD)" code down under "if
(likely(p->pid))" and turn it into into the "else" branch.  This makes the
process/thread initialization more symmetrical and removes one check.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Sergey Dyasly <dserrg@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/fork.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 814363a69b80..c271467da4f9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1448,14 +1448,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto bad_fork_free_pid;
 	}
 
-	if (clone_flags & CLONE_THREAD) {
-		current->signal->nr_threads++;
-		atomic_inc(&current->signal->live);
-		atomic_inc(&current->signal->sigcnt);
-		p->group_leader = current->group_leader;
-		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
-	}
-
 	if (likely(p->pid)) {
 		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
 
@@ -1472,6 +1464,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			list_add_tail(&p->sibling, &p->real_parent->children);
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			__this_cpu_inc(process_counts);
+		} else {
+			current->signal->nr_threads++;
+			atomic_inc(&current->signal->live);
+			atomic_inc(&current->signal->sigcnt);
+			p->group_leader = current->group_leader;
+			list_add_tail_rcu(&p->thread_group,
+					  &p->group_leader->thread_group);
 		}
 		attach_pid(p, PIDTYPE_PID, pid);
 		nr_threads++;

From 641bc58deb5e9c1064db00e80acea791eef90622 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 21 Jan 2014 15:49:56 -0800
Subject: [PATCH 0863/1185] introduce for_each_thread() to replace the buggy
 while_each_thread()

commit 0c740d0afc3bff0a097ad03a1c8df92757516f5c upstream.

while_each_thread() and next_thread() should die, almost every lockless
usage is wrong.

1. Unless g == current, the lockless while_each_thread() is not safe.

   while_each_thread(g, t) can loop forever if g exits, next_thread()
   can't reach the unhashed thread in this case. Note that this can
   happen even if g is the group leader, it can exec.

2. Even if while_each_thread() itself was correct, people often use
   it wrongly.

   It was never safe to just take rcu_read_lock() and loop unless
   you verify that pid_alive(g) == T, even the first next_thread()
   can point to the already freed/reused memory.

This patch adds signal_struct->thread_head and task->thread_node to
create the normal rcu-safe list with the stable head.  The new
for_each_thread(g, t) helper is always safe under rcu_read_lock() as
long as this task_struct can't go away.

Note: of course it is ugly to have both task_struct->thread_node and the
old task_struct->thread_group, we will kill it later, after we change
the users of while_each_thread() to use for_each_thread().

Perhaps we can kill it even before we convert all users, we can
reimplement next_thread(t) using the new thread_head/thread_node.  But
we can't do this right now because this will lead to subtle behavioural
changes.  For example, do/while_each_thread() always sees at least one
task, while for_each_thread() can do nothing if the whole thread group
has died.  Or thread_group_empty(), currently its semantics is not clear
unless thread_group_leader(p) and we need to audit the callers before we
can change it.

So this patch adds the new interface which has to coexist with the old
one for some time, hopefully the next changes will be more or less
straightforward and the old one will go away soon.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Sergey Dyasly <dserrg@gmail.com>
Tested-by: Sergey Dyasly <dserrg@gmail.com>
Reviewed-by: Sameer Nanda <snanda@chromium.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mandeep Singh Baines <msb@chromium.org>
Cc: "Ma, Xindong" <xindong.ma@intel.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: "Tu, Xiaobing" <xiaobing.tu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/init_task.h |  2 ++
 include/linux/sched.h     | 12 ++++++++++++
 kernel/exit.c             |  1 +
 kernel/fork.c             |  7 +++++++
 4 files changed, 22 insertions(+)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 5cd0f0949927..998f4dfedecf 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -40,6 +40,7 @@ extern struct fs_struct init_fs;
 
 #define INIT_SIGNALS(sig) {						\
 	.nr_threads	= 1,						\
+	.thread_head	= LIST_HEAD_INIT(init_task.thread_node),	\
 	.wait_chldexit	= __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
 	.shared_pending	= { 						\
 		.list = LIST_HEAD_INIT(sig.shared_pending.list),	\
@@ -213,6 +214,7 @@ extern struct task_group root_task_group;
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
 	.thread_group	= LIST_HEAD_INIT(tsk.thread_group),		\
+	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),	\
 	INIT_IDS							\
 	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 597c8ab005a0..8293545ac9b7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -480,6 +480,7 @@ struct signal_struct {
 	atomic_t		sigcnt;
 	atomic_t		live;
 	int			nr_threads;
+	struct list_head	thread_head;
 
 	wait_queue_head_t	wait_chldexit;	/* for wait4() */
 
@@ -1160,6 +1161,7 @@ struct task_struct {
 	/* PID/PID hash table linkage. */
 	struct pid_link pids[PIDTYPE_MAX];
 	struct list_head thread_group;
+	struct list_head thread_node;
 
 	struct completion *vfork_done;		/* for vfork() */
 	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
@@ -2167,6 +2169,16 @@ extern bool current_is_single_threaded(void);
 #define while_each_thread(g, t) \
 	while ((t = next_thread(t)) != g)
 
+#define __for_each_thread(signal, t)	\
+	list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+
+#define for_each_thread(p, t)		\
+	__for_each_thread((p)->signal, t)
+
+/* Careful: this is a double loop, 'break' won't work as expected. */
+#define for_each_process_thread(p, t)	\
+	for_each_process(p) for_each_thread(p, t)
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
diff --git a/kernel/exit.c b/kernel/exit.c
index 6682b2ea5b11..717efbd7cb72 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -74,6 +74,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
 		__this_cpu_dec(process_counts);
 	}
 	list_del_rcu(&p->thread_group);
+	list_del_rcu(&p->thread_node);
 }
 
 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index c271467da4f9..2c76e11ba939 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1045,6 +1045,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->nr_threads = 1;
 	atomic_set(&sig->live, 1);
 	atomic_set(&sig->sigcnt, 1);
+
+	/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
+	sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
+	tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
+
 	init_waitqueue_head(&sig->wait_chldexit);
 	sig->curr_target = tsk;
 	init_sigpending(&sig->shared_pending);
@@ -1471,6 +1476,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			p->group_leader = current->group_leader;
 			list_add_tail_rcu(&p->thread_group,
 					  &p->group_leader->thread_group);
+			list_add_tail_rcu(&p->thread_node,
+					  &p->signal->thread_head);
 		}
 		attach_pid(p, PIDTYPE_PID, pid);
 		nr_threads++;

From 4940a48e7d4112053daa11be3e928e62dc52c407 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 21 Jan 2014 15:49:58 -0800
Subject: [PATCH 0864/1185] oom_kill: change oom_kill.c to use
 for_each_thread()

commit 1da4db0cd5c8a31d4468ec906b413e75e604b465 upstream.

Change oom_kill.c to use for_each_thread() rather than the racy
while_each_thread() which can loop forever if we race with exit.

Note also that most users were buggy even if while_each_thread() was
fine, the task can exit even _before_ rcu_read_lock().

Fortunately the new for_each_thread() only requires the stable
task_struct, so this change fixes both problems.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Sergey Dyasly <dserrg@gmail.com>
Tested-by: Sergey Dyasly <dserrg@gmail.com>
Reviewed-by: Sameer Nanda <snanda@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mandeep Singh Baines <msb@chromium.org>
Cc: "Ma, Xindong" <xindong.ma@intel.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: "Tu, Xiaobing" <xiaobing.tu@intel.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/oom_kill.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index dfa94ed3c7fa..76be55645da2 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -59,7 +59,7 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
 {
 	struct task_struct *start = tsk;
 
-	do {
+	for_each_thread(start, tsk) {
 		if (mask) {
 			/*
 			 * If this is a mempolicy constrained oom, tsk's
@@ -77,7 +77,7 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
 			if (cpuset_mems_allowed_intersects(current, tsk))
 				return true;
 		}
-	} while_each_thread(start, tsk);
+	}
 
 	return false;
 }
@@ -97,14 +97,14 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
  */
 struct task_struct *find_lock_task_mm(struct task_struct *p)
 {
-	struct task_struct *t = p;
+	struct task_struct *t;
 
-	do {
+	for_each_thread(p, t) {
 		task_lock(t);
 		if (likely(t->mm))
 			return t;
 		task_unlock(t);
-	} while_each_thread(p, t);
+	}
 
 	return NULL;
 }
@@ -301,7 +301,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 	unsigned long chosen_points = 0;
 
 	rcu_read_lock();
-	do_each_thread(g, p) {
+	for_each_process_thread(g, p) {
 		unsigned int points;
 
 		switch (oom_scan_process_thread(p, totalpages, nodemask,
@@ -323,7 +323,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 			chosen = p;
 			chosen_points = points;
 		}
-	} while_each_thread(g, p);
+	}
 	if (chosen)
 		get_task_struct(chosen);
 	rcu_read_unlock();
@@ -406,7 +406,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 {
 	struct task_struct *victim = p;
 	struct task_struct *child;
-	struct task_struct *t = p;
+	struct task_struct *t;
 	struct mm_struct *mm;
 	unsigned int victim_points = 0;
 	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
@@ -437,7 +437,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	 * still freeing memory.
 	 */
 	read_lock(&tasklist_lock);
-	do {
+	for_each_thread(p, t) {
 		list_for_each_entry(child, &t->children, sibling) {
 			unsigned int child_points;
 
@@ -455,7 +455,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 				get_task_struct(victim);
 			}
 		}
-	} while_each_thread(p, t);
+	}
 	read_unlock(&tasklist_lock);
 
 	rcu_read_lock();

From a214c050ee476860bccfef17e24028b9a0750360 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 21 Jan 2014 15:50:00 -0800
Subject: [PATCH 0865/1185] oom_kill: has_intersects_mems_allowed() needs
 rcu_read_lock()

commit ad96244179fbd55b40c00f10f399bc04739b8e1f upstream.

At least out_of_memory() calls has_intersects_mems_allowed() without
even rcu_read_lock(), this is obviously buggy.

Add the necessary rcu_read_lock().  This means that we can not simply
return from the loop, we need "bool ret" and "break".

While at it, swap the names of task_struct's (the argument and the
local).  This cleans up the code a little bit and avoids the unnecessary
initialization.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Sergey Dyasly <dserrg@gmail.com>
Tested-by: Sergey Dyasly <dserrg@gmail.com>
Reviewed-by: Sameer Nanda <snanda@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mandeep Singh Baines <msb@chromium.org>
Cc: "Ma, Xindong" <xindong.ma@intel.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: "Tu, Xiaobing" <xiaobing.tu@intel.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/oom_kill.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 76be55645da2..4f584d43e3b1 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -47,18 +47,20 @@ static DEFINE_SPINLOCK(zone_scan_lock);
 #ifdef CONFIG_NUMA
 /**
  * has_intersects_mems_allowed() - check task eligiblity for kill
- * @tsk: task struct of which task to consider
+ * @start: task struct of which task to consider
  * @mask: nodemask passed to page allocator for mempolicy ooms
  *
  * Task eligibility is determined by whether or not a candidate task, @tsk,
  * shares the same mempolicy nodes as current if it is bound by such a policy
  * and whether or not it has the same set of allowed cpuset nodes.
  */
-static bool has_intersects_mems_allowed(struct task_struct *tsk,
+static bool has_intersects_mems_allowed(struct task_struct *start,
 					const nodemask_t *mask)
 {
-	struct task_struct *start = tsk;
+	struct task_struct *tsk;
+	bool ret = false;
 
+	rcu_read_lock();
 	for_each_thread(start, tsk) {
 		if (mask) {
 			/*
@@ -67,19 +69,20 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
 			 * mempolicy intersects current, otherwise it may be
 			 * needlessly killed.
 			 */
-			if (mempolicy_nodemask_intersects(tsk, mask))
-				return true;
+			ret = mempolicy_nodemask_intersects(tsk, mask);
 		} else {
 			/*
 			 * This is not a mempolicy constrained oom, so only
 			 * check the mems of tsk's cpuset.
 			 */
-			if (cpuset_mems_allowed_intersects(current, tsk))
-				return true;
+			ret = cpuset_mems_allowed_intersects(current, tsk);
 		}
+		if (ret)
+			break;
 	}
+	rcu_read_unlock();
 
-	return false;
+	return ret;
 }
 #else
 static bool has_intersects_mems_allowed(struct task_struct *tsk,

From d081edee3aa2695765ff836197846b89ab712b05 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 21 Jan 2014 15:50:01 -0800
Subject: [PATCH 0866/1185] oom_kill: add rcu_read_lock() into
 find_lock_task_mm()

commit 4d4048be8a93769350efa31d2482a038b7de73d0 upstream.

find_lock_task_mm() expects it is called under rcu or tasklist lock, but
it seems that at least oom_unkillable_task()->task_in_mem_cgroup() and
mem_cgroup_out_of_memory()->oom_badness() can call it lockless.

Perhaps we could fix the callers, but this patch simply adds rcu lock
into find_lock_task_mm().  This also allows to simplify a bit one of its
callers, oom_kill_process().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Sergey Dyasly <dserrg@gmail.com>
Cc: Sameer Nanda <snanda@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mandeep Singh Baines <msb@chromium.org>
Cc: "Ma, Xindong" <xindong.ma@intel.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: "Tu, Xiaobing" <xiaobing.tu@intel.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/oom_kill.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 4f584d43e3b1..8e40908e724a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -102,14 +102,19 @@ struct task_struct *find_lock_task_mm(struct task_struct *p)
 {
 	struct task_struct *t;
 
+	rcu_read_lock();
+
 	for_each_thread(p, t) {
 		task_lock(t);
 		if (likely(t->mm))
-			return t;
+			goto found;
 		task_unlock(t);
 	}
+	t = NULL;
+found:
+	rcu_read_unlock();
 
-	return NULL;
+	return t;
 }
 
 /* return true if the task is not adequate as candidate victim task. */
@@ -461,10 +466,8 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	}
 	read_unlock(&tasklist_lock);
 
-	rcu_read_lock();
 	p = find_lock_task_mm(victim);
 	if (!p) {
-		rcu_read_unlock();
 		put_task_struct(victim);
 		return;
 	} else if (victim != p) {
@@ -490,6 +493,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	 * That thread will now get access to memory reserves since it has a
 	 * pending fatal signal.
 	 */
+	rcu_read_lock();
 	for_each_process(p)
 		if (p->mm == mm && !same_thread_group(p, victim) &&
 		    !(p->flags & PF_KTHREAD)) {

From 3581832971e7f7bac0d9d1c11c7d985b1edef7c9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 8 Aug 2014 14:19:17 -0700
Subject: [PATCH 0867/1185] vm_is_stack: use for_each_thread() rather then
 buggy while_each_thread()

commit 4449a51a7c281602d3a385044ab928322a122a02 upstream.

Aleksei hit the soft lockup during reading /proc/PID/smaps.  David
investigated the problem and suggested the right fix.

while_each_thread() is racy and should die, this patch updates
vm_is_stack().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Aleksei Besogonov <alex.besogonov@gmail.com>
Tested-by: Aleksei Besogonov <alex.besogonov@gmail.com>
Suggested-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/util.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/mm/util.c b/mm/util.c
index ab1424dbe2e6..0b1725254ff1 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -272,17 +272,14 @@ pid_t vm_is_stack(struct task_struct *task,
 
 	if (in_group) {
 		struct task_struct *t;
-		rcu_read_lock();
-		if (!pid_alive(task))
-			goto done;
 
-		t = task;
-		do {
+		rcu_read_lock();
+		for_each_thread(task, t) {
 			if (vm_is_stack_for_task(t, vma)) {
 				ret = t->pid;
 				goto done;
 			}
-		} while_each_thread(task, t);
+		}
 done:
 		rcu_read_unlock();
 	}

From 926719debff484d9678a324c71e948b7dd60352b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 5 Oct 2014 14:54:30 -0700
Subject: [PATCH 0868/1185] Linux 3.10.56

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6141df04fcb5..03bd927522f7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 55
+SUBLEVEL = 56
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From defd110d5c8bd987868c8269f8dc7860fd6daec1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 6 Oct 2014 17:26:36 +0100
Subject: [PATCH 0869/1185] arm64: fpsimd: Fix mismerge

Thanks to AKASHI Takahiro for identifying the issue.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 arch/arm64/kernel/fpsimd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 845fae3aeb26..5ba0217df39b 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -158,6 +158,7 @@ void fpsimd_flush_thread(void)
 	preempt_disable();
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
+	preempt_enable();
 }
 
 /*

From abbfed9ed1a78701ef3db74f5287958feb897035 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 4 Sep 2014 14:54:29 +0100
Subject: [PATCH 0870/1185] arm64: ptrace: add PTRACE_SET_SYSCALL

Note: This patch is from v6 of Takahiro's proposed
"arm64: add seccomp support" patchset (leecam@google.com)

To allow tracer to be able to change/skip a system call by re-writing
a syscall number, there are several approaches:

(1) modify x8 register with ptrace(PTRACE_SETREGSET), and handle this case
    later on in syscall_trace_enter(), or
(2) support ptrace(PTRACE_SET_SYSCALL) as on arm

Thinking of the fact that user_pt_regs doesn't expose 'syscallno' to
tracer as well as that secure_computing() expects a changed syscall number
to be visible, especially case of -1, before this function returns in
syscall_trace_enter(), we'd better take (2).

Signed-off-by: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
---
 arch/arm64/include/uapi/asm/ptrace.h |  1 +
 arch/arm64/kernel/ptrace.c           | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 6913643bbe54..49c61746297d 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -23,6 +23,7 @@
 
 #include <asm/hwcap.h>
 
+#define PTRACE_SET_SYSCALL	23
 
 /*
  * PSR bits
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index ee856d9f6f64..56a62ebf2baf 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1064,7 +1064,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 long arch_ptrace(struct task_struct *child, long request,
 		 unsigned long addr, unsigned long data)
 {
-	return ptrace_request(child, request, addr, data);
+	int ret;
+
+	switch (request) {
+		case PTRACE_SET_SYSCALL:
+			task_pt_regs(child)->syscallno = data;
+			ret = 0;
+			break;
+		default:
+			ret = ptrace_request(child, request, addr, data);
+			break;
+	}
+
+	return ret;
 }
 
 enum ptrace_syscall_dir {

From feb28436457d33fef9f264635291432df4b74122 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 4 Sep 2014 15:20:53 +0100
Subject: [PATCH 0871/1185] arm64: ptrace: allow tracer to skip a system call

Note: This patch is from v6 of Takahiro's proposed
"arm64: add seccomp support" patchset (leecam@google.com)

If tracer specifies -1 as a syscall number, this traced system call should
be skipped with a value in x0 used as a return value.
This patch enables this semantics, but there is a restriction here:

   when syscall(-1) is issued by user, tracer cannot skip this system call
   and modify a return value at syscall entry.

In order to ease this flavor, we need to treat whatever value in x0 as
a return value, but this might result in a bogus value being returned,
especially when tracer doesn't do anything at this syscall.
So we always return ENOSYS instead, while we have another chance to change
a return value at syscall exit.

Please also note:
* syscall entry tracing and syscall exit tracing (ftrace tracepoint and
  audit) are always executed, if enabled, even when skipping a system call
  (that is, -1).
  In this way, we can avoid a potential bug where audit_syscall_entry()
  might be called without audit_syscall_exit() at the previous system call
  being called, that would cause OOPs in audit_syscall_entry().

* syscallno may also be set to -1 if a fatal signal (SIGKILL) is detected
  in tracehook_report_syscall_entry(), but since a value set to x0 (ENOSYS)
  is not used in this case, we may neglect the case.

Signed-off-by: AKASHI Takahiro <takahiro.akashi <at> linaro.org>

Conflicts:
	arch/arm64/kernel/entry.S

Change-Id: Ifcdcdbcb7c8cf97e5b5f1086a1ea4107e1d4f9a8
---
 arch/arm64/include/asm/ptrace.h |  8 ++++++++
 arch/arm64/kernel/entry.S       |  4 ++++
 arch/arm64/kernel/ptrace.c      | 20 ++++++++++++++++++++
 3 files changed, 32 insertions(+)

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index dbe3b00d1eb7..c0ebe6fade63 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -60,6 +60,14 @@
 #define COMPAT_PT_TEXT_ADDR		0x10000
 #define COMPAT_PT_DATA_ADDR		0x10004
 #define COMPAT_PT_TEXT_END_ADDR		0x10008
+
+/*
+ * used to skip a system call when tracer changes its number to -1
+ * with ptrace(PTRACE_SET_SYSCALL)
+ */
+#define RET_SKIP_SYSCALL	-1
+#define IS_SKIP_SYSCALL(no)	((int)(no & 0xffffffff) == -1)
+
 #ifndef __ASSEMBLY__
 
 /* sizeof(struct user) for AArch32 */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 3c93e1a9ea8d..a30fab9ea2a3 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -25,6 +25,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/errno.h>
 #include <asm/esr.h>
+#include <asm/ptrace.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 
@@ -662,6 +663,8 @@ __sys_trace:
 	mov	x0, sp
 	bl	syscall_trace_enter
 	adr	lr, __sys_trace_return		// return address
+	cmp	w0, #RET_SKIP_SYSCALL		// skip syscall?
+	b.eq	__sys_trace_return_skipped
 	uxtw	scno, w0			// syscall number (possibly new)
 	mov	x1, sp				// pointer to regs
 	cmp	scno, sc_nr			// check upper syscall limit
@@ -675,6 +678,7 @@ __sys_trace:
 
 __sys_trace_return:
 	str	x0, [sp]			// save returned x0
+__sys_trace_return_skipped:			// x0 already in regs[0]
 	mov	x0, sp
 	bl	syscall_trace_exit
 	b	ret_to_user
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 56a62ebf2baf..2a6edfb0079b 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1108,9 +1108,29 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 
 asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 {
+	unsigned int saved_syscallno = regs->syscallno;
+
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
 
+	if (IS_SKIP_SYSCALL(regs->syscallno)) {
+		/*
+		 * RESTRICTION: we can't modify a return value of user
+		 * issued syscall(-1) here. In order to ease this flavor,
+		 * we need to treat whatever value in x0 as a return value,
+		 * but this might result in a bogus value being returned.
+		 */
+		/*
+		 * NOTE: syscallno may also be set to -1 if fatal signal is
+		 * detected in tracehook_report_syscall_entry(), but since
+		 * a value set to x0 here is not used in this case, we may
+		 * neglect the case.
+		 */
+		if (!test_thread_flag(TIF_SYSCALL_TRACE) ||
+				(IS_SKIP_SYSCALL(saved_syscallno)))
+			regs->regs[0] = -ENOSYS;
+	}
+
 	audit_syscall_entry(syscall_get_arch(), regs->syscallno,
 		regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]);
 

From dab10731da65a0deba46402ca9fadf6974676cc8 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 4 Sep 2014 15:34:14 +0100
Subject: [PATCH 0872/1185] asm-generic: add generic seccomp.h for secure
 computing mode 1

Note: This patch is from v6 of Takahiro's proposed
"arm64: add seccomp support" patchset (leecam@google.com)

Those values (__NR_seccomp_*) are used solely in secure_computing()
to identify mode 1 system calls. If compat system calls have different
syscall numbers, asm/seccomp.h may override them.

Acked-by: Arnd Bergmann <arnd <at> arndb.de>
Signed-off-by: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
---
 include/asm-generic/seccomp.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 include/asm-generic/seccomp.h

diff --git a/include/asm-generic/seccomp.h b/include/asm-generic/seccomp.h
new file mode 100644
index 000000000000..663ac3dc02ea
--- /dev/null
+++ b/include/asm-generic/seccomp.h
@@ -0,0 +1,29 @@
+/*
+ * include/asm-generic/seccomp.h
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_GENERIC_SECCOMP_H
+#define _ASM_GENERIC_SECCOMP_H
+
+#include <asm-generic/unistd.h>
+
+#if defined(CONFIG_COMPAT) && !defined(__NR_seccomp_read_32)
+#define __NR_seccomp_read_32		__NR_read
+#define __NR_seccomp_write_32		__NR_write
+#define __NR_seccomp_exit_32		__NR_exit
+#define __NR_seccomp_sigreturn_32	__NR_rt_sigreturn
+#endif /* CONFIG_COMPAT && ! already defined */
+
+#define __NR_seccomp_read		__NR_read
+#define __NR_seccomp_write		__NR_write
+#define __NR_seccomp_exit		__NR_exit
+#define __NR_seccomp_sigreturn		__NR_rt_sigreturn
+
+#endif /* _ASM_GENERIC_SECCOMP_H */
+

From 4f12b53f28a751406a27ef7501a22f9e32a9c30b Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 4 Sep 2014 15:39:13 +0100
Subject: [PATCH 0873/1185] add seccomp syscall for compat task

Note: This patch is from v6 of Takahiro's proposed
"arm64: add seccomp support" patchset (leecam@google.com)

This patch allows compat task to issue seccomp() system call.

Signed-off-by: AKASHI Takahiro <takahiro.akashi <at> linaro.org>

Conflicts:
	arch/arm64/include/asm/unistd32.h

Change-Id: I63d38f68da72b3333327256b4cacba2c3ddb39fc
---
 arch/arm64/include/asm/unistd.h   | 2 +-
 arch/arm64/include/asm/unistd32.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index a8f8f6992987..53f67c64a6af 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -40,7 +40,7 @@
 #define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
 
-#define __NR_compat_syscalls		378
+#define __NR_compat_syscalls		384
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index dd336c150f3f..63513ae2b59e 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -781,3 +781,11 @@ __SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev)
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 379
 __SYSCALL(__NR_finit_module, sys_finit_module)
+/* #define __NR_sched_setattr 380 */
+__SYSCALL(380, sys_ni_syscall)
+/* #define __NR_sched_getattr 381 */
+__SYSCALL(381, sys_ni_syscall)
+/* #define __NR_renameat2 382 */
+__SYSCALL(382, sys_ni_syscall)
+#define __NR_seccomp 383
+__SYSCALL(__NR_seccomp, sys_ni_syscall)

From 77227239d20ac6381fb1aee7b7cc902f0d14cd85 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 4 Sep 2014 15:48:01 +0100
Subject: [PATCH 0874/1185] arm64: add SIGSYS siginfo for compat task

Note: This patch is from v6 of Takahiro's proposed
"arm64: add seccomp support" patchset (leecam@google.com)

SIGSYS is primarily used in secure computing to notify tracer.
This patch allows signal handler on compat task to get correct information
with SA_SYSINFO specified when this signal is delivered.

Signed-off-by: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
---
 arch/arm64/include/asm/compat.h | 7 +++++++
 arch/arm64/kernel/signal32.c    | 8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index ae0004fe6c23..a79da08994f2 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -191,6 +191,13 @@ typedef struct compat_siginfo {
 			compat_long_t _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
 			int _fd;
 		} _sigpoll;
+
+		/* SIGSYS */
+		struct {
+			compat_uptr_t _call_addr; /* calling user insn */
+			int _syscall;	/* triggering system call number */
+			unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
+		} _sigsys;
 	} _sifields;
 } compat_siginfo_t;
 
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index b4692837e7ab..e1b8b9fb274e 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -211,6 +211,14 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 		err |= __put_user(from->si_uid, &to->si_uid);
 		err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr);
 		break;
+#ifdef __ARCH_SIGSYS
+	case __SI_SYS:
+		err |= __put_user((compat_uptr_t)(unsigned long)
+				from->si_call_addr, &to->si_call_addr);
+		err |= __put_user(from->si_syscall, &to->si_syscall);
+		err |= __put_user(from->si_arch, &to->si_arch);
+		break;
+#endif
 	default: /* this is just in case for now ... */
 		err |= __put_user(from->si_pid, &to->si_pid);
 		err |= __put_user(from->si_uid, &to->si_uid);

From 9499cd23f9d05ba159fac6d55dc35a7f49f9ce76 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 11 Mar 2014 12:48:43 -0400
Subject: [PATCH 0875/1185] syscall_get_arch: remove useless function arguments

Every caller of syscall_get_arch() uses current for the task and no
implementors of the function need args.  So just get rid of both of
those things.  Admittedly, since these are inline functions we aren't
wasting stack space, but it just makes the prototypes better.

Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-mips@linux-mips.org
Cc: linux390@de.ibm.com
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-s390@vger.kernel.org
Cc: linux-arch@vger.kernel.org

Conflicts:
	arch/mips/include/asm/syscall.h
	arch/mips/kernel/ptrace.c
---
 arch/arm/include/asm/syscall.h  | 3 +--
 arch/s390/include/asm/syscall.h | 5 ++---
 arch/x86/include/asm/syscall.h  | 8 +++-----
 include/asm-generic/syscall.h   | 4 +---
 kernel/seccomp.c                | 4 ++--
 5 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index f1d96d4e8092..6db3caacb31a 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -97,8 +97,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
 }
 
-static inline int syscall_get_arch(struct task_struct *task,
-				   struct pt_regs *regs)
+static inline int syscall_get_arch(void)
 {
 	/* ARM tasks don't change audit architectures on the fly. */
 	return AUDIT_ARCH_ARM;
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index cd29d2f4e4f3..bebc0bd8abc2 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -89,11 +89,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
 		regs->orig_gpr2 = args[0];
 }
 
-static inline int syscall_get_arch(struct task_struct *task,
-				   struct pt_regs *regs)
+static inline int syscall_get_arch(void)
 {
 #ifdef CONFIG_COMPAT
-	if (test_tsk_thread_flag(task, TIF_31BIT))
+	if (test_tsk_thread_flag(current, TIF_31BIT))
 		return AUDIT_ARCH_S390;
 #endif
 	return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390;
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2e188d68397c..f106908a12ec 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -90,8 +90,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->bx + i, args, n * sizeof(args[0]));
 }
 
-static inline int syscall_get_arch(struct task_struct *task,
-				   struct pt_regs *regs)
+static inline int syscall_get_arch(void)
 {
 	return AUDIT_ARCH_I386;
 }
@@ -220,8 +219,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
 		}
 }
 
-static inline int syscall_get_arch(struct task_struct *task,
-				   struct pt_regs *regs)
+static inline int syscall_get_arch(void)
 {
 #ifdef CONFIG_IA32_EMULATION
 	/*
@@ -233,7 +231,7 @@ static inline int syscall_get_arch(struct task_struct *task,
 	 *
 	 * x32 tasks should be considered AUDIT_ARCH_X86_64.
 	 */
-	if (task_thread_info(task)->status & TS_COMPAT)
+	if (task_thread_info(current)->status & TS_COMPAT)
 		return AUDIT_ARCH_I386;
 #endif
 	/* Both x32 and x86_64 are considered "64-bit". */
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 5b09392db673..d401e5463fb0 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -144,8 +144,6 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
 
 /**
  * syscall_get_arch - return the AUDIT_ARCH for the current system call
- * @task:	task of interest, must be in system call entry tracing
- * @regs:	task_pt_regs() of @task
  *
  * Returns the AUDIT_ARCH_* based on the system call convention in use.
  *
@@ -155,5 +153,5 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
  * Architectures which permit CONFIG_HAVE_ARCH_SECCOMP_FILTER must
  * provide an implementation of this.
  */
-int syscall_get_arch(struct task_struct *task, struct pt_regs *regs);
+int syscall_get_arch(void);
 #endif	/* _ASM_SYSCALL_H */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b7a10048a32c..eda2da3df822 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -95,7 +95,7 @@ u32 seccomp_bpf_load(int off)
 	if (off == BPF_DATA(nr))
 		return syscall_get_nr(current, regs);
 	if (off == BPF_DATA(arch))
-		return syscall_get_arch(current, regs);
+		return syscall_get_arch();
 	if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) {
 		unsigned long value;
 		int arg = (off - BPF_DATA(args[0])) / sizeof(u64);
@@ -351,7 +351,7 @@ static void seccomp_send_sigsys(int syscall, int reason)
 	info.si_code = SYS_SECCOMP;
 	info.si_call_addr = (void __user *)KSTK_EIP(current);
 	info.si_errno = reason;
-	info.si_arch = syscall_get_arch(current, task_pt_regs(current));
+	info.si_arch = syscall_get_arch();
 	info.si_syscall = syscall;
 	force_sig_info(SIGSYS, &info, current);
 }

From 210957c2bb3b4d111963bb296e2c42beb8721929 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 4 Sep 2014 16:01:08 +0100
Subject: [PATCH 0876/1185] arm64: add seccomp support

Note: This patch is from v6 of Takahiro's proposed
"arm64: add seccomp support" patchset (leecam@google.com)

secure_computing() is called first in syscall_trace_enter() so that a system
call will be aborted quickly without doing succeeding syscall tracing,
contrary to other cases, if seccomp rules deny that system call.

On compat task, syscall numbers for system calls allowed in seccomp mode 1
are different from those on normal tasks, and so _NR_seccomp_xxx_32's need
to be redefined.

Signed-off-by: AKASHI Takahiro <takahiro.akashi <at> linaro.org>

Conflicts:
	arch/arm64/Kconfig
	arch/arm64/kernel/entry.S

Change-Id: I5ec44507d7e536df7ec9d62d30a418c26ef15100
---
 arch/arm64/Kconfig               | 15 +++++++++++++++
 arch/arm64/include/asm/ptrace.h  |  1 +
 arch/arm64/include/asm/seccomp.h | 25 +++++++++++++++++++++++++
 arch/arm64/include/asm/unistd.h  |  3 +++
 arch/arm64/kernel/entry.S        |  2 ++
 arch/arm64/kernel/ptrace.c       |  5 +++++
 6 files changed, 51 insertions(+)
 create mode 100644 arch/arm64/include/asm/seccomp.h

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 2221396c2a6c..43f1a2ee307c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -20,6 +20,7 @@ config ARM64
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_DEBUG_KMEMLEAK
@@ -232,6 +233,20 @@ config ARMV7_COMPAT_CPUINFO
 
 source "mm/Kconfig"
 
+config SECCOMP
+	bool "Enable seccomp to safely compute untrusted bytecode"
+	---help---
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index c0ebe6fade63..58768774c7f4 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -66,6 +66,7 @@
  * with ptrace(PTRACE_SET_SYSCALL)
  */
 #define RET_SKIP_SYSCALL	-1
+#define RET_SKIP_SYSCALL_TRACE	-2
 #define IS_SKIP_SYSCALL(no)	((int)(no & 0xffffffff) == -1)
 
 #ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h
new file mode 100644
index 000000000000..bec3a43f7b17
--- /dev/null
+++ b/arch/arm64/include/asm/seccomp.h
@@ -0,0 +1,25 @@
+/*
+ * arch/arm64/include/asm/seccomp.h
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm/unistd.h>
+
+#ifdef CONFIG_COMPAT
+#define __NR_seccomp_read_32		__NR_compat_read
+#define __NR_seccomp_write_32		__NR_compat_write
+#define __NR_seccomp_exit_32		__NR_compat_exit
+#define __NR_seccomp_sigreturn_32	__NR_compat_rt_sigreturn
+#endif /* CONFIG_COMPAT */
+
+#include <asm-generic/seccomp.h>
+
+#endif /* _ASM_SECCOMP_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 53f67c64a6af..2a3957faa702 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -30,6 +30,9 @@
  * Compat syscall numbers used by the AArch64 kernel.
  */
 #define __NR_compat_restart_syscall	0
+#define __NR_compat_exit		1
+#define __NR_compat_read		3
+#define __NR_compat_write		4
 #define __NR_compat_sigreturn		119
 #define __NR_compat_rt_sigreturn	173
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a30fab9ea2a3..8927e52be32f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -663,6 +663,8 @@ __sys_trace:
 	mov	x0, sp
 	bl	syscall_trace_enter
 	adr	lr, __sys_trace_return		// return address
+	cmp	w0, #RET_SKIP_SYSCALL_TRACE	// skip syscall and tracing?
+	b.eq	ret_to_user
 	cmp	w0, #RET_SKIP_SYSCALL		// skip syscall?
 	b.eq	__sys_trace_return_skipped
 	uxtw	scno, w0			// syscall number (possibly new)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 2a6edfb0079b..f47e70fed6af 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -27,6 +27,7 @@
 #include <linux/smp.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/seccomp.h>
 #include <linux/security.h>
 #include <linux/init.h>
 #include <linux/signal.h>
@@ -1110,6 +1111,10 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 {
 	unsigned int saved_syscallno = regs->syscallno;
 
+	/* Do the secure computing check first; failures should be fast. */
+	if (secure_computing(regs->syscallno) == -1)
+		return RET_SKIP_SYSCALL_TRACE;
+
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
 

From a03a2426ea9f1d9dada33cf4a824f63e8f916c9d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 6 Nov 2013 14:57:36 +0100
Subject: [PATCH 0877/1185] arch: Introduce smp_load_acquire(),
 smp_store_release()

A number of situations currently require the heavyweight smp_mb(),
even though there is no need to order prior stores against later
loads.  Many architectures have much cheaper ways to handle these
situations, but the Linux kernel currently has no portable way
to make use of them.

This commit therefore supplies smp_load_acquire() and
smp_store_release() to remedy this situation.  The new
smp_load_acquire() primitive orders the specified load against
any subsequent reads or writes, while the new smp_store_release()
primitive orders the specifed store against any prior reads or
writes.  These primitives allow array-based circular FIFOs to be
implemented without an smp_mb(), and also allow a theoretical
hole in rcu_assign_pointer() to be closed at no additional
expense on most architectures.

In addition, the RCU experience transitioning from explicit
smp_read_barrier_depends() and smp_wmb() to rcu_dereference()
and rcu_assign_pointer(), respectively resulted in substantial
improvements in readability.  It therefore seems likely that
replacing other explicit barriers with smp_load_acquire() and
smp_store_release() will provide similar benefits.  It appears
that roughly half of the explicit barriers in core kernel code
might be so replaced.

[Changelog by PaulMck]
(cherry picked from commit 47933ad41a86a4a9b50bed7c9b9bd2ba242aac63)

Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Victor Kaplansky <VICTORK@il.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Link: http://lkml.kernel.org/r/20131213150640.908486364@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/barrier.h      | 15 +++++++++
 arch/arm64/include/asm/barrier.h    | 50 +++++++++++++++++++++++++++++
 arch/ia64/include/asm/barrier.h     | 23 +++++++++++++
 arch/metag/include/asm/barrier.h    | 15 +++++++++
 arch/mips/include/asm/barrier.h     | 15 +++++++++
 arch/powerpc/include/asm/barrier.h  | 21 +++++++++++-
 arch/s390/include/asm/barrier.h     | 15 +++++++++
 arch/sparc/include/asm/barrier_64.h | 15 +++++++++
 arch/x86/include/asm/barrier.h      | 43 ++++++++++++++++++++++++-
 include/asm-generic/barrier.h       | 15 +++++++++
 include/linux/compiler.h            |  9 ++++++
 11 files changed, 234 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 8dcd9c702d90..b00ef075bc2e 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -59,6 +59,21 @@
 #define smp_wmb()	dmb()
 #endif
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #define read_barrier_depends()		do { } while(0)
 #define smp_read_barrier_depends()	do { } while(0)
 
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index d4a63338a53c..78e20ba8806b 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -35,10 +35,60 @@
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #else
+
 #define smp_mb()	asm volatile("dmb ish" : : : "memory")
 #define smp_rmb()	asm volatile("dmb ishld" : : : "memory")
 #define smp_wmb()	asm volatile("dmb ishst" : : : "memory")
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	switch (sizeof(*p)) {						\
+	case 4:								\
+		asm volatile ("stlr %w1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	case 8:								\
+		asm volatile ("stlr %1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	}								\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1;						\
+	compiletime_assert_atomic_type(*p);				\
+	switch (sizeof(*p)) {						\
+	case 4:								\
+		asm volatile ("ldar %w0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	case 8:								\
+		asm volatile ("ldar %0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	}								\
+	___p1;								\
+})
+
 #endif
 
 #define read_barrier_depends()		do { } while(0)
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index 60576e06b6fb..d0a69aa35e27 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -45,13 +45,36 @@
 # define smp_rmb()	rmb()
 # define smp_wmb()	wmb()
 # define smp_read_barrier_depends()	read_barrier_depends()
+
 #else
+
 # define smp_mb()	barrier()
 # define smp_rmb()	barrier()
 # define smp_wmb()	barrier()
 # define smp_read_barrier_depends()	do { } while(0)
+
 #endif
 
+/*
+ * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no
+ * need for asm trickery!
+ */
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 /*
  * XXX check on this ---I suspect what Linus really wants here is
  * acquire vs release semantics but we can't discuss this stuff with
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index c90bfc6bf648..5d6b4b407dda 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -82,4 +82,19 @@ static inline void fence(void)
 #define smp_read_barrier_depends()     do { } while (0)
 #define set_mb(var, value) do { var = value; smp_mb(); } while (0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #endif /* _ASM_METAG_BARRIER_H */
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 314ab5532019..52c5b61d7aba 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -180,4 +180,19 @@
 #define nudge_writes() mb()
 #endif
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #endif /* __ASM_BARRIER_H */
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index ae782254e731..f89da808ce31 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -45,11 +45,15 @@
 #    define SMPWMB      eieio
 #endif
 
+#define __lwsync()	__asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+
 #define smp_mb()	mb()
-#define smp_rmb()	__asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+#define smp_rmb()	__lwsync()
 #define smp_wmb()	__asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
 #define smp_read_barrier_depends()	read_barrier_depends()
 #else
+#define __lwsync()	barrier()
+
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
@@ -65,4 +69,19 @@
 #define data_barrier(x)	\
 	asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	__lwsync();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	__lwsync();							\
+	___p1;								\
+})
+
 #endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 16760eeb79b0..578680f6207a 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -32,4 +32,19 @@
 
 #define set_mb(var, value)		do { var = value; mb(); } while (0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #endif /* __ASM_BARRIER_H */
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 95d45986f908..b5aad964558e 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -53,4 +53,19 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 
 #define smp_read_barrier_depends()	do { } while(0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #endif /* !(__SPARC64_BARRIER_H) */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index c6cd358a1eec..04a48903b2eb 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -92,12 +92,53 @@
 #endif
 #define smp_read_barrier_depends()	read_barrier_depends()
 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#else
+#else /* !SMP */
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #define smp_read_barrier_depends()	do { } while (0)
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif /* SMP */
+
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+
+/*
+ * For either of these options x86 doesn't have a strong TSO memory
+ * model and we should fall back to full barriers.
+ */
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
+#else /* regular x86 TSO memory ordering */
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #endif
 
 /*
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 639d7a4d033b..01613b382b0e 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -46,5 +46,20 @@
 #define read_barrier_depends()		do {} while (0)
 #define smp_read_barrier_depends()	do {} while (0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __ASM_GENERIC_BARRIER_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 92669cd182a6..fe7a686dfd8d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -298,6 +298,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 #endif
 
+/* Is this type a native word size -- useful for atomic operations */
+#ifndef __native_word
+# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+#endif
+
 /* Compile time object size, -1 for unknown */
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
@@ -337,6 +342,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #define compiletime_assert(condition, msg) \
 	_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
 
+#define compiletime_assert_atomic_type(t)				\
+	compiletime_assert(__native_word(t),				\
+		"Need native word sized stores/loads for atomicity.")
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),

From 987a0f1102321853565c4bfecde6a5a58ac6db11 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 21 Jan 2014 15:49:56 -0800
Subject: [PATCH 0878/1185] introduce for_each_thread() to replace the buggy
 while_each_thread()

while_each_thread() and next_thread() should die, almost every lockless
usage is wrong.

1. Unless g == current, the lockless while_each_thread() is not safe.

   while_each_thread(g, t) can loop forever if g exits, next_thread()
   can't reach the unhashed thread in this case. Note that this can
   happen even if g is the group leader, it can exec.

2. Even if while_each_thread() itself was correct, people often use
   it wrongly.

   It was never safe to just take rcu_read_lock() and loop unless
   you verify that pid_alive(g) == T, even the first next_thread()
   can point to the already freed/reused memory.

This patch adds signal_struct->thread_head and task->thread_node to
create the normal rcu-safe list with the stable head.  The new
for_each_thread(g, t) helper is always safe under rcu_read_lock() as
long as this task_struct can't go away.

Note: of course it is ugly to have both task_struct->thread_node and the
old task_struct->thread_group, we will kill it later, after we change
the users of while_each_thread() to use for_each_thread().

Perhaps we can kill it even before we convert all users, we can
reimplement next_thread(t) using the new thread_head/thread_node.  But
we can't do this right now because this will lead to subtle behavioural
changes.  For example, do/while_each_thread() always sees at least one
task, while for_each_thread() can do nothing if the whole thread group
has died.  Or thread_group_empty(), currently its semantics is not clear
unless thread_group_leader(p) and we need to audit the callers before we
can change it.

So this patch adds the new interface which has to coexist with the old
one for some time, hopefully the next changes will be more or less
straightforward and the old one will go away soon.

Bug 200004307

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Sergey Dyasly <dserrg@gmail.com>
Tested-by: Sergey Dyasly <dserrg@gmail.com>
Reviewed-by: Sameer Nanda <snanda@chromium.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mandeep Singh Baines <msb@chromium.org>
Cc: "Ma, Xindong" <xindong.ma@intel.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: "Tu, Xiaobing" <xiaobing.tu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(cherry picked from commit 0c740d0afc3bff0a097ad03a1c8df92757516f5c)
Signed-off-by: Sri Krishna chowdary <schowdary@nvidia.com>
Change-Id: Id689cb1383ceba2561b66188d88258619b68f5c6
Reviewed-on: http://git-master/r/419041
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 include/linux/init_task.h |  2 ++
 include/linux/sched.h     | 12 ++++++++++++
 kernel/exit.c             |  1 +
 kernel/fork.c             |  8 ++++++++
 4 files changed, 23 insertions(+)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 5cd0f0949927..998f4dfedecf 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -40,6 +40,7 @@ extern struct fs_struct init_fs;
 
 #define INIT_SIGNALS(sig) {						\
 	.nr_threads	= 1,						\
+	.thread_head	= LIST_HEAD_INIT(init_task.thread_node),	\
 	.wait_chldexit	= __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
 	.shared_pending	= { 						\
 		.list = LIST_HEAD_INIT(sig.shared_pending.list),	\
@@ -213,6 +214,7 @@ extern struct task_group root_task_group;
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
 	.thread_group	= LIST_HEAD_INIT(tsk.thread_group),		\
+	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),	\
 	INIT_IDS							\
 	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7ccfeb1e4067..036d74cf457f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -476,6 +476,7 @@ struct signal_struct {
 	atomic_t		sigcnt;
 	atomic_t		live;
 	int			nr_threads;
+	struct list_head	thread_head;
 
 	wait_queue_head_t	wait_chldexit;	/* for wait4() */
 
@@ -1156,6 +1157,7 @@ struct task_struct {
 	/* PID/PID hash table linkage. */
 	struct pid_link pids[PIDTYPE_MAX];
 	struct list_head thread_group;
+	struct list_head thread_node;
 
 	struct completion *vfork_done;		/* for vfork() */
 	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
@@ -2166,6 +2168,16 @@ extern bool current_is_single_threaded(void);
 #define while_each_thread(g, t) \
 	while ((t = next_thread(t)) != g)
 
+#define __for_each_thread(signal, t)	\
+	list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+
+#define for_each_thread(p, t)		\
+	__for_each_thread((p)->signal, t)
+
+/* Careful: this is a double loop, 'break' won't work as expected. */
+#define for_each_process_thread(p, t)	\
+	for_each_process(p) for_each_thread(p, t)
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
diff --git a/kernel/exit.c b/kernel/exit.c
index 6a057750ebbb..8e2166363b4b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -74,6 +74,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
 		__this_cpu_dec(process_counts);
 	}
 	list_del_rcu(&p->thread_group);
+	list_del_rcu(&p->thread_node);
 }
 
 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index 41671a5d637d..32a78ff21949 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1061,6 +1061,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->nr_threads = 1;
 	atomic_set(&sig->live, 1);
 	atomic_set(&sig->sigcnt, 1);
+
+	/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
+	sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
+	tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
+
 	init_waitqueue_head(&sig->wait_chldexit);
 	sig->curr_target = tsk;
 	init_sigpending(&sig->shared_pending);
@@ -1487,6 +1492,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			list_add_tail(&p->sibling, &p->real_parent->children);
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			__this_cpu_inc(process_counts);
+		} else {
+			list_add_tail_rcu(&p->thread_node,
+					  &p->signal->thread_head);
 		}
 		attach_pid(p, PIDTYPE_PID, pid);
 		nr_threads++;

From 2a30a4386e4a7e1283157c4cf4cfcc0306b22ac8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 21 May 2014 15:02:11 -0700
Subject: [PATCH 0879/1185] seccomp: create internal mode-setting function

In preparation for having other callers of the seccomp mode setting
logic, split the prctl entry point away from the core logic that performs
seccomp mode setting.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index eda2da3df822..d445b9c24d27 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -467,7 +467,7 @@ long prctl_get_seccomp(void)
 }
 
 /**
- * prctl_set_seccomp: configures current->seccomp.mode
+ * seccomp_set_mode: internal function for setting seccomp mode
  * @seccomp_mode: requested mode to use
  * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
  *
@@ -480,7 +480,7 @@ long prctl_get_seccomp(void)
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
 {
 	long ret = -EINVAL;
 
@@ -511,3 +511,15 @@ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 out:
 	return ret;
 }
+
+/**
+ * prctl_set_seccomp: configures current->seccomp.mode
+ * @seccomp_mode: requested mode to use
+ * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+{
+	return seccomp_set_mode(seccomp_mode, filter);
+}

From b8a9cff6dbe9cfddbb4d17e2dea496e523544687 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Jun 2014 15:38:02 -0700
Subject: [PATCH 0880/1185] seccomp: extract check/assign mode helpers

To support splitting mode 1 from mode 2, extract the mode checking and
assignment logic into common functions.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d445b9c24d27..b5f2538755bf 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -219,7 +219,23 @@ static u32 seccomp_run_filters(int syscall)
 	}
 	return ret;
 }
+#endif /* CONFIG_SECCOMP_FILTER */
 
+static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
+{
+	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
+		return false;
+
+	return true;
+}
+
+static inline void seccomp_assign_mode(unsigned long seccomp_mode)
+{
+	current->seccomp.mode = seccomp_mode;
+	set_tsk_thread_flag(current, TIF_SECCOMP);
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
 /**
  * seccomp_attach_filter: Attaches a seccomp filter to current.
  * @fprog: BPF program to install
@@ -484,8 +500,7 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
 {
 	long ret = -EINVAL;
 
-	if (current->seccomp.mode &&
-	    current->seccomp.mode != seccomp_mode)
+	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
 	switch (seccomp_mode) {
@@ -506,8 +521,7 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
 		goto out;
 	}
 
-	current->seccomp.mode = seccomp_mode;
-	set_thread_flag(TIF_SECCOMP);
+	seccomp_assign_mode(seccomp_mode);
 out:
 	return ret;
 }

From 8908dde5a7fdca974374b0dbe6dfb10f69df7216 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Jun 2014 15:55:25 -0700
Subject: [PATCH 0881/1185] seccomp: split mode setting routines

Separates the two mode setting paths to make things more readable with
fewer #ifdefs within function bodies.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c | 71 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 23 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b5f2538755bf..dab81904040f 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -483,48 +483,66 @@ long prctl_get_seccomp(void)
 }
 
 /**
- * seccomp_set_mode: internal function for setting seccomp mode
- * @seccomp_mode: requested mode to use
- * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
- *
- * This function may be called repeatedly with a @seccomp_mode of
- * SECCOMP_MODE_FILTER to install additional filters.  Every filter
- * successfully installed will be evaluated (in reverse order) for each system
- * call the task makes.
+ * seccomp_set_mode_strict: internal function for setting strict seccomp
  *
  * Once current->seccomp.mode is non-zero, it may not be changed.
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
+static long seccomp_set_mode_strict(void)
 {
+	const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
 	long ret = -EINVAL;
 
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
-	switch (seccomp_mode) {
-	case SECCOMP_MODE_STRICT:
-		ret = 0;
 #ifdef TIF_NOTSC
-		disable_TSC();
+	disable_TSC();
 #endif
-		break;
+	seccomp_assign_mode(seccomp_mode);
+	ret = 0;
+
+out:
+
+	return ret;
+}
+
 #ifdef CONFIG_SECCOMP_FILTER
-	case SECCOMP_MODE_FILTER:
-		ret = seccomp_attach_user_filter(filter);
-		if (ret)
-			goto out;
-		break;
-#endif
-	default:
+/**
+ * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @filter: struct sock_fprog containing filter
+ *
+ * This function may be called repeatedly to install additional filters.
+ * Every filter successfully installed will be evaluated (in reverse order)
+ * for each system call the task makes.
+ *
+ * Once current->seccomp.mode is non-zero, it may not be changed.
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+static long seccomp_set_mode_filter(char __user *filter)
+{
+	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
+	long ret = -EINVAL;
+
+	if (!seccomp_may_assign_mode(seccomp_mode))
+		goto out;
+
+	ret = seccomp_attach_user_filter(filter);
+	if (ret)
 		goto out;
-	}
 
 	seccomp_assign_mode(seccomp_mode);
 out:
 	return ret;
 }
+#else
+static inline long seccomp_set_mode_filter(char __user *filter)
+{
+	return -EINVAL;
+}
+#endif
 
 /**
  * prctl_set_seccomp: configures current->seccomp.mode
@@ -535,5 +553,12 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
  */
 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 {
-	return seccomp_set_mode(seccomp_mode, filter);
+	switch (seccomp_mode) {
+	case SECCOMP_MODE_STRICT:
+		return seccomp_set_mode_strict();
+	case SECCOMP_MODE_FILTER:
+		return seccomp_set_mode_filter(filter);
+	default:
+		return -EINVAL;
+	}
 }

From e985fd474debedb269fba27006eda50d0b6f07ef Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Jun 2014 16:08:24 -0700
Subject: [PATCH 0882/1185] seccomp: add "seccomp" syscall

This adds the new "seccomp" syscall with both an "operation" and "flags"
parameter for future expansion. The third argument is a pointer value,
used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must
be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...).

In addition to the TSYNC flag later in this patch series, there is a
non-zero chance that this syscall could be used for configuring a fixed
argument area for seccomp-tracer-aware processes to pass syscall arguments
in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter"
for this syscall. Additionally, this syscall uses operation, flags,
and user pointer for arguments because strictly passing arguments via
a user pointer would mean seccomp itself would be unable to trivially
filter the seccomp syscall itself.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Conflicts:
	arch/x86/syscalls/syscall_32.tbl
	arch/x86/syscalls/syscall_64.tbl
	include/uapi/asm-generic/unistd.h
	kernel/seccomp.c

And fixup of unistd32.h to truly enable sys_secomp.

Change-Id: I95bea02382c52007d22e5e9dc563c7d055c2c83f
---
 arch/Kconfig                      |  1 +
 arch/arm64/include/asm/unistd32.h |  2 +-
 arch/x86/syscalls/syscall_32.tbl  |  1 +
 arch/x86/syscalls/syscall_64.tbl  |  1 +
 include/linux/syscalls.h          |  2 ++
 include/uapi/asm-generic/unistd.h | 12 ++++++-
 include/uapi/linux/seccomp.h      |  4 +++
 kernel/seccomp.c                  | 55 ++++++++++++++++++++++++++++---
 kernel/sys_ni.c                   |  3 ++
 9 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index a4429bcd609e..84c94a89e75b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER
 	  - secure_computing is called from a ptrace_event()-safe context
 	  - secure_computing return value is checked and a return value of -1
 	    results in the system call being skipped immediately.
+	  - seccomp syscall wired up
 
 config SECCOMP_FILTER
 	def_bool y
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 63513ae2b59e..76d094565090 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -788,4 +788,4 @@ __SYSCALL(381, sys_ni_syscall)
 /* #define __NR_renameat2 382 */
 __SYSCALL(382, sys_ni_syscall)
 #define __NR_seccomp 383
-__SYSCALL(__NR_seccomp, sys_ni_syscall)
+__SYSCALL(__NR_seccomp, sys_seccomp)
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index aabfb8380a1c..8605e9e0f19a 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -357,3 +357,4 @@
 348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
 350	i386	finit_module		sys_finit_module
+351	i386	seccomp			sys_seccomp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 38ae65dfd14f..4cb491567b85 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -320,6 +320,7 @@
 311	64	process_vm_writev	sys_process_vm_writev
 312	common	kcmp			sys_kcmp
 313	common	finit_module		sys_finit_module
+314	common	seccomp			sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4147d700a293..2a955dcc863c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -841,4 +841,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
+			    const char __user *uargs);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 0cc74c4403e4..b422ad5d238b 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -692,9 +692,19 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 273
 __SYSCALL(__NR_finit_module, sys_finit_module)
+/* Backporting seccomp, skip a few ...
+ * #define __NR_sched_setattr 274
+__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
+ * #define __NR_sched_getattr 275
+__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
+ * #define __NR_renameat2 276
+__SYSCALL(__NR_renameat2, sys_renameat2)
+ */
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
 
 #undef __NR_syscalls
-#define __NR_syscalls 274
+#define __NR_syscalls 278
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index ac2dc9f72973..b258878ba754 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -10,6 +10,10 @@
 #define SECCOMP_MODE_STRICT	1 /* uses hard-coded filter. */
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT	0
+#define SECCOMP_SET_MODE_FILTER	1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index dab81904040f..d4b61b967423 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
 
@@ -309,7 +310,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
  *
  * Returns 0 on success and non-zero otherwise.
  */
-long seccomp_attach_user_filter(char __user *user_filter)
+static long seccomp_attach_user_filter(const char __user *user_filter)
 {
 	struct sock_fprog fprog;
 	long ret = -EFAULT;
@@ -511,6 +512,7 @@ static long seccomp_set_mode_strict(void)
 #ifdef CONFIG_SECCOMP_FILTER
 /**
  * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @flags:  flags to change filter behavior
  * @filter: struct sock_fprog containing filter
  *
  * This function may be called repeatedly to install additional filters.
@@ -521,11 +523,16 @@ static long seccomp_set_mode_strict(void)
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-static long seccomp_set_mode_filter(char __user *filter)
+static long seccomp_set_mode_filter(unsigned int flags,
+				    const char __user *filter)
 {
 	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
 	long ret = -EINVAL;
 
+	/* Validate flags. */
+	if (flags != 0)
+		goto out;
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -538,12 +545,35 @@ static long seccomp_set_mode_filter(char __user *filter)
 	return ret;
 }
 #else
-static inline long seccomp_set_mode_filter(char __user *filter)
+static inline long seccomp_set_mode_filter(unsigned int flags,
+					   const char __user *filter)
 {
 	return -EINVAL;
 }
 #endif
 
+/* Common entry point for both prctl and syscall. */
+static long do_seccomp(unsigned int op, unsigned int flags,
+		       const char __user *uargs)
+{
+	switch (op) {
+	case SECCOMP_SET_MODE_STRICT:
+		if (flags != 0 || uargs != NULL)
+			return -EINVAL;
+		return seccomp_set_mode_strict();
+	case SECCOMP_SET_MODE_FILTER:
+		return seccomp_set_mode_filter(flags, uargs);
+	default:
+		return -EINVAL;
+	}
+}
+
+SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
+			 const char __user *, uargs)
+{
+	return do_seccomp(op, flags, uargs);
+}
+
 /**
  * prctl_set_seccomp: configures current->seccomp.mode
  * @seccomp_mode: requested mode to use
@@ -553,12 +583,27 @@ static inline long seccomp_set_mode_filter(char __user *filter)
  */
 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 {
+	unsigned int op;
+	char __user *uargs;
+
 	switch (seccomp_mode) {
 	case SECCOMP_MODE_STRICT:
-		return seccomp_set_mode_strict();
+		op = SECCOMP_SET_MODE_STRICT;
+		/*
+		 * Setting strict mode through prctl always ignored filter,
+		 * so make sure it is always NULL here to pass the internal
+		 * check in do_seccomp().
+		 */
+		uargs = NULL;
+		break;
 	case SECCOMP_MODE_FILTER:
-		return seccomp_set_mode_filter(filter);
+		op = SECCOMP_SET_MODE_FILTER;
+		uargs = filter;
+		break;
 	default:
 		return -EINVAL;
 	}
+
+	/* prctl interface doesn't have flags, so they are always zero. */
+	return do_seccomp(op, 0, uargs);
 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7078052284fd..7e7fc0a082c4 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -209,3 +209,6 @@ cond_syscall(compat_sys_open_by_handle_at);
 
 /* compare kernel pointers */
 cond_syscall(sys_kcmp);
+
+/* operate on Secure Computing state */
+cond_syscall(sys_seccomp);

From 9d0ff694bc22fb458acb763811a677696c60725b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 21 May 2014 15:23:46 -0700
Subject: [PATCH 0883/1185] sched: move no_new_privs into new atomic flags

Since seccomp transitions between threads requires updates to the
no_new_privs flag to be atomic, the flag must be part of an atomic flag
set. This moves the nnp flag into a separate task field, and introduces
accessors.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Conflicts:
	kernel/sys.c
---
 fs/exec.c                  |  4 ++--
 include/linux/sched.h      | 18 +++++++++++++++---
 kernel/seccomp.c           |  2 +-
 kernel/sys.c               |  4 ++--
 security/apparmor/domain.c |  4 ++--
 5 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index ffd7a813ad3d..cb5fb9aa320e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1239,7 +1239,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
 	 * This isn't strictly necessary, but it makes it harder for LSMs to
 	 * mess up.
 	 */
-	if (current->no_new_privs)
+	if (task_no_new_privs(current))
 		bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
 
 	n_fs = 1;
@@ -1286,7 +1286,7 @@ int prepare_binprm(struct linux_binprm *bprm)
 	bprm->cred->egid = current_egid();
 
 	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-	    !current->no_new_privs &&
+	    !task_no_new_privs(current) &&
 	    kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
 	    kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
 		/* Set-uid? */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 036d74cf457f..da5fe76a069b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1118,13 +1118,12 @@ struct task_struct {
 				 * execve */
 	unsigned in_iowait:1;
 
-	/* task may not gain privileges */
-	unsigned no_new_privs:1;
-
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
 
+	unsigned long atomic_flags; /* Flags needing atomic access. */
+
 	pid_t pid;
 	pid_t tgid;
 
@@ -1689,6 +1688,19 @@ static inline void memalloc_noio_restore(unsigned int flags)
 	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
 }
 
+/* Per-process atomic flags. */
+#define PFA_NO_NEW_PRIVS 0x00000001	/* May not gain new privileges. */
+
+static inline bool task_no_new_privs(struct task_struct *p)
+{
+	return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
+}
+
+static inline void task_set_no_new_privs(struct task_struct *p)
+{
+	set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
+}
+
 /*
  * task->jobctl flags
  */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d4b61b967423..5390739066a6 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -264,7 +264,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	 * This avoids scenarios where unprivileged tasks can affect the
 	 * behavior of privileged children.
 	 */
-	if (!current->no_new_privs &&
+	if (!task_no_new_privs(current) &&
 	    security_capable_noaudit(current_cred(), current_user_ns(),
 				     CAP_SYS_ADMIN) != 0)
 		return -EACCES;
diff --git a/kernel/sys.c b/kernel/sys.c
index 65d3e55bd282..0b08c9f000f3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2427,12 +2427,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		if (arg2 != 1 || arg3 || arg4 || arg5)
 			return -EINVAL;
 
-		current->no_new_privs = 1;
+		task_set_no_new_privs(current);
 		break;
 	case PR_GET_NO_NEW_PRIVS:
 		if (arg2 || arg3 || arg4 || arg5)
 			return -EINVAL;
-		return current->no_new_privs ? 1 : 0;
+		return task_no_new_privs(current) ? 1 : 0;
 	case PR_SET_VMA:
 		error = prctl_set_vma(arg2, arg3, arg4, arg5);
 		break;
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 859abdaac1ea..9aaa4e72cc1f 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -629,7 +629,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
 	 * There is no exception for unconfined as change_hat is not
 	 * available.
 	 */
-	if (current->no_new_privs)
+	if (task_no_new_privs(current))
 		return -EPERM;
 
 	/* released below */
@@ -780,7 +780,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
 	 * no_new_privs is set because this aways results in a reduction
 	 * of permissions.
 	 */
-	if (current->no_new_privs && !unconfined(profile)) {
+	if (task_no_new_privs(current) && !unconfined(profile)) {
 		put_cred(cred);
 		return -EPERM;
 	}

From b6a12bf4dd762236c7f637b19cfe10a268304b9b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 27 Jun 2014 15:16:33 -0700
Subject: [PATCH 0884/1185] seccomp: split filter prep from check and apply

In preparation for adding seccomp locking, move filter creation away
from where it is checked and applied. This will allow for locking where
no memory allocation is happening. The validation, filter attachment,
and seccomp mode setting can all happen under the future locks.

For extreme defensiveness, I've added a BUG_ON check for the calculated
size of the buffer allocation in case BPF_MAXINSN ever changes, which
shouldn't ever happen. The compiler should actually optimize out this
check since the test above it makes it impossible.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Conflicts:
	kernel/seccomp.c
---
 kernel/seccomp.c | 89 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 66 insertions(+), 23 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5390739066a6..1afc10b05eca 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/slab.h>
 #include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
@@ -27,7 +28,6 @@
 #include <linux/filter.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
-#include <linux/slab.h>
 #include <linux/tracehook.h>
 #include <linux/uaccess.h>
 
@@ -238,12 +238,12 @@ static inline void seccomp_assign_mode(unsigned long seccomp_mode)
 
 #ifdef CONFIG_SECCOMP_FILTER
 /**
- * seccomp_attach_filter: Attaches a seccomp filter to current.
+ * seccomp_prepare_filter: Prepares a seccomp filter for use.
  * @fprog: BPF program to install
  *
- * Returns 0 on success or an errno on failure.
+ * Returns filter on success or an ERR_PTR on failure.
  */
-static long seccomp_attach_filter(struct sock_fprog *fprog)
+static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 {
 	struct seccomp_filter *filter;
 	unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
@@ -251,12 +251,13 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	long ret;
 
 	if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
+	BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
 
 	for (filter = current->seccomp.filter; filter; filter = filter->prev)
 		total_insns += filter->len + 4;  /* include a 4 instr penalty */
 	if (total_insns > MAX_INSNS_PER_PATH)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/*
 	 * Installing a seccomp filter requires that the task have
@@ -267,13 +268,13 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (!task_no_new_privs(current) &&
 	    security_capable_noaudit(current_cred(), current_user_ns(),
 				     CAP_SYS_ADMIN) != 0)
-		return -EACCES;
+		return ERR_PTR(-EACCES);
 
 	/* Allocate a new seccomp_filter */
 	filter = kzalloc(sizeof(struct seccomp_filter) + fp_size,
 			 GFP_KERNEL|__GFP_NOWARN);
 	if (!filter)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);;
 	atomic_set(&filter->usage, 1);
 	filter->len = fprog->len;
 
@@ -292,28 +293,24 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (ret)
 		goto fail;
 
-	/*
-	 * If there is an existing filter, make it the prev and don't drop its
-	 * task reference.
-	 */
-	filter->prev = current->seccomp.filter;
-	current->seccomp.filter = filter;
-	return 0;
+	return filter;
+
 fail:
 	kfree(filter);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 /**
- * seccomp_attach_user_filter - attaches a user-supplied sock_fprog
+ * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
  * @user_filter: pointer to the user data containing a sock_fprog.
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static long seccomp_attach_user_filter(const char __user *user_filter)
+static struct seccomp_filter *
+seccomp_prepare_user_filter(const char __user *user_filter)
 {
 	struct sock_fprog fprog;
-	long ret = -EFAULT;
+	struct seccomp_filter *filter = ERR_PTR(-EFAULT);
 
 #ifdef CONFIG_COMPAT
 	if (is_compat_task()) {
@@ -326,9 +323,39 @@ static long seccomp_attach_user_filter(const char __user *user_filter)
 #endif
 	if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
 		goto out;
-	ret = seccomp_attach_filter(&fprog);
+	filter = seccomp_prepare_filter(&fprog);
 out:
-	return ret;
+	return filter;
+}
+
+/**
+ * seccomp_attach_filter: validate and attach filter
+ * @flags:  flags to change filter behavior
+ * @filter: seccomp filter to add to the current process
+ *
+ * Returns 0 on success, -ve on error.
+ */
+static long seccomp_attach_filter(unsigned int flags,
+				  struct seccomp_filter *filter)
+{
+	unsigned long total_insns;
+	struct seccomp_filter *walker;
+
+	/* Validate resulting filter length. */
+	total_insns = filter->len;
+	for (walker = current->seccomp.filter; walker; walker = walker->prev)
+		total_insns += walker->len + 4;  /* 4 instr penalty */
+	if (total_insns > MAX_INSNS_PER_PATH)
+		return -ENOMEM;
+
+	/*
+	 * If there is an existing filter, make it the prev and don't drop its
+	 * task reference.
+	 */
+	filter->prev = current->seccomp.filter;
+	current->seccomp.filter = filter;
+
+	return 0;
 }
 
 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
@@ -341,6 +368,13 @@ void get_seccomp_filter(struct task_struct *tsk)
 	atomic_inc(&orig->usage);
 }
 
+static inline void seccomp_filter_free(struct seccomp_filter *filter)
+{
+	if (filter) {
+		kfree(filter);
+	}
+}
+
 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
 void put_seccomp_filter(struct task_struct *tsk)
 {
@@ -349,7 +383,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
-		kfree(freeme);
+		seccomp_filter_free(freeme);
 	}
 }
 
@@ -527,21 +561,30 @@ static long seccomp_set_mode_filter(unsigned int flags,
 				    const char __user *filter)
 {
 	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
+	struct seccomp_filter *prepared = NULL;
 	long ret = -EINVAL;
 
 	/* Validate flags. */
 	if (flags != 0)
 		goto out;
 
+	/* Prepare the new filter before holding any locks. */
+	prepared = seccomp_prepare_user_filter(filter);
+	if (IS_ERR(prepared))
+		return PTR_ERR(prepared);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
-	ret = seccomp_attach_user_filter(filter);
+	ret = seccomp_attach_filter(flags, prepared);
 	if (ret)
 		goto out;
+	/* Do not free the successfully attached filter. */
+	prepared = NULL;
 
 	seccomp_assign_mode(seccomp_mode);
 out:
+	seccomp_filter_free(prepared);
 	return ret;
 }
 #else

From 61b6b882a0abfeb627d25a069cfa1d232b84c8eb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 27 Jun 2014 15:18:48 -0700
Subject: [PATCH 0885/1185] seccomp: introduce writer locking

Normally, task_struct.seccomp.filter is only ever read or modified by
the task that owns it (current). This property aids in fast access
during system call filtering as read access is lockless.

Updating the pointer from another task, however, opens up race
conditions. To allow cross-thread filter pointer updates, writes to the
seccomp fields are now protected by the sighand spinlock (which is shared
by all threads in the thread group). Read access remains lockless because
pointer updates themselves are atomic.  However, writes (or cloning)
often entail additional checking (like maximum instruction counts)
which require locking to perform safely.

In the case of cloning threads, the child is invisible to the system
until it enters the task list. To make sure a child can't be cloned from
a thread and left in a prior state, seccomp duplication is additionally
moved under the sighand lock. Then parent and child are certain have
the same seccomp state when they exit the lock.

Based on patches by Will Drewry and David Drysdale.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Conflicts:
	kernel/fork.c
---
 include/linux/seccomp.h |  6 ++---
 kernel/fork.c           | 49 ++++++++++++++++++++++++++++++++++++++++-
 kernel/seccomp.c        | 16 +++++++++++++-
 3 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 6f19cfd1840e..9ab63a574d40 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -14,11 +14,11 @@ struct seccomp_filter;
  *
  * @mode:  indicates one of the valid values above for controlled
  *         system calls available to a process.
- * @filter: The metadata and ruleset for determining what system calls
- *          are allowed for a task.
+ * @filter: must always point to a valid seccomp-filter or NULL as it is
+ *          accessed without locking during system call entry.
  *
  *          @filter must only be accessed from the context of current as there
- *          is no locking.
+ *          is no read locking.
  */
 struct seccomp {
 	int mode;
diff --git a/kernel/fork.c b/kernel/fork.c
index 32a78ff21949..a0abbb536e9a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -327,6 +327,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 		goto free_ti;
 
 	tsk->stack = ti;
+#ifdef CONFIG_SECCOMP
+	/*
+	 * We must handle setting up seccomp filters once we're under
+	 * the sighand lock in case orig has changed between now and
+	 * then. Until then, filter must be NULL to avoid messing up
+	 * the usage counts on the error path calling free_task.
+	 */
+	tsk->seccomp.filter = NULL;
+#endif
 
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
@@ -1107,6 +1116,39 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 	p->flags = new_flags;
 }
 
+static void copy_seccomp(struct task_struct *p)
+{
+#ifdef CONFIG_SECCOMP
+	/*
+	 * Must be called with sighand->lock held, which is common to
+	 * all threads in the group. Holding cred_guard_mutex is not
+	 * needed because this new task is not yet running and cannot
+	 * be racing exec.
+	 */
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Ref-count the new filter user, and assign it. */
+	get_seccomp_filter(current);
+	p->seccomp = current->seccomp;
+
+	/*
+	 * Explicitly enable no_new_privs here in case it got set
+	 * between the task_struct being duplicated and holding the
+	 * sighand lock. The seccomp state and nnp must be in sync.
+	 */
+	if (task_no_new_privs(current))
+		task_set_no_new_privs(p);
+
+	/*
+	 * If the parent gained a seccomp mode after copying thread
+	 * flags and between before we held the sighand lock, we have
+	 * to manually enable the seccomp thread flag here.
+	 */
+	if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
+		set_tsk_thread_flag(p, TIF_SECCOMP);
+#endif
+}
+
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
 	current->clear_child_tid = tidptr;
@@ -1210,7 +1252,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto fork_out;
 
 	ftrace_graph_init_task(p);
-	get_seccomp_filter(p);
 
 	rt_mutex_init_task(p);
 
@@ -1452,6 +1493,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	spin_lock(&current->sighand->siglock);
 
+	/*
+	 * Copy seccomp details explicitly here, in case they were changed
+	 * before holding sighand lock.
+	 */
+	copy_seccomp(p);
+
 	/*
 	 * Process group and session signals need to be delivered to just the
 	 * parent before the fork or both the parent and the child after the
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 1afc10b05eca..421d0f87ffed 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -224,6 +224,8 @@ static u32 seccomp_run_filters(int syscall)
 
 static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 {
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
 		return false;
 
@@ -232,6 +234,8 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 
 static inline void seccomp_assign_mode(unsigned long seccomp_mode)
 {
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	current->seccomp.mode = seccomp_mode;
 	set_tsk_thread_flag(current, TIF_SECCOMP);
 }
@@ -333,6 +337,8 @@ seccomp_prepare_user_filter(const char __user *user_filter)
  * @flags:  flags to change filter behavior
  * @filter: seccomp filter to add to the current process
  *
+ * Caller must be holding current->sighand->siglock lock.
+ *
  * Returns 0 on success, -ve on error.
  */
 static long seccomp_attach_filter(unsigned int flags,
@@ -341,6 +347,8 @@ static long seccomp_attach_filter(unsigned int flags,
 	unsigned long total_insns;
 	struct seccomp_filter *walker;
 
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	/* Validate resulting filter length. */
 	total_insns = filter->len;
 	for (walker = current->seccomp.filter; walker; walker = walker->prev)
@@ -529,6 +537,8 @@ static long seccomp_set_mode_strict(void)
 	const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
 	long ret = -EINVAL;
 
+	spin_lock_irq(&current->sighand->siglock);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -539,6 +549,7 @@ static long seccomp_set_mode_strict(void)
 	ret = 0;
 
 out:
+	spin_unlock_irq(&current->sighand->siglock);
 
 	return ret;
 }
@@ -566,13 +577,15 @@ static long seccomp_set_mode_filter(unsigned int flags,
 
 	/* Validate flags. */
 	if (flags != 0)
-		goto out;
+		return -EINVAL;
 
 	/* Prepare the new filter before holding any locks. */
 	prepared = seccomp_prepare_user_filter(filter);
 	if (IS_ERR(prepared))
 		return PTR_ERR(prepared);
 
+	spin_lock_irq(&current->sighand->siglock);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -584,6 +597,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 
 	seccomp_assign_mode(seccomp_mode);
 out:
+	spin_unlock_irq(&current->sighand->siglock);
 	seccomp_filter_free(prepared);
 	return ret;
 }

From c852ef778224ecf5fe995d74ad96087038778bca Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 27 Jun 2014 15:01:35 -0700
Subject: [PATCH 0886/1185] seccomp: allow mode setting across threads

This changes the mode setting helper to allow threads to change the
seccomp mode from another thread. We must maintain barriers to keep
TIF_SECCOMP synchronized with the rest of the seccomp state.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Conflicts:
	kernel/seccomp.c
---
 kernel/seccomp.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 421d0f87ffed..5f2962e4aee4 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -202,19 +202,23 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  */
 static u32 seccomp_run_filters(int syscall)
 {
-	struct seccomp_filter *f;
+	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
 	u32 ret = SECCOMP_RET_ALLOW;
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
-	if (WARN_ON(current->seccomp.filter == NULL))
+	if (unlikely(WARN_ON(f == NULL)))
 		return SECCOMP_RET_KILL;
 
+	/* Make sure cross-thread synced filter points somewhere sane. */
+	smp_read_barrier_depends();
+
 	/*
 	 * All filters in the list are evaluated and the lowest BPF return
 	 * value always takes priority (ignoring the DATA).
 	 */
-	for (f = current->seccomp.filter; f; f = f->prev) {
+	for (; f; f = f->prev) {
 		u32 cur_ret = sk_run_filter(NULL, f->insns);
+		
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
 	}
@@ -232,12 +236,18 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 	return true;
 }
 
-static inline void seccomp_assign_mode(unsigned long seccomp_mode)
+static inline void seccomp_assign_mode(struct task_struct *task,
+				       unsigned long seccomp_mode)
 {
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+	BUG_ON(!spin_is_locked(&task->sighand->siglock));
 
-	current->seccomp.mode = seccomp_mode;
-	set_tsk_thread_flag(current, TIF_SECCOMP);
+	task->seccomp.mode = seccomp_mode;
+	/*
+	 * Make sure TIF_SECCOMP cannot be set before the mode (and
+	 * filter) is set.
+	 */
+	smp_mb();
+	set_tsk_thread_flag(task, TIF_SECCOMP);
 }
 
 #ifdef CONFIG_SECCOMP_FILTER
@@ -435,12 +445,17 @@ static int mode1_syscalls_32[] = {
 
 int __secure_computing(int this_syscall)
 {
-	int mode = current->seccomp.mode;
 	int exit_sig = 0;
 	int *syscall;
 	u32 ret;
 
-	switch (mode) {
+	/*
+	 * Make sure that any changes to mode from another thread have
+	 * been seen after TIF_SECCOMP was seen.
+	 */
+	rmb();
+
+	switch (current->seccomp.mode) {
 	case SECCOMP_MODE_STRICT:
 		syscall = mode1_syscalls;
 #ifdef CONFIG_COMPAT
@@ -545,7 +560,7 @@ static long seccomp_set_mode_strict(void)
 #ifdef TIF_NOTSC
 	disable_TSC();
 #endif
-	seccomp_assign_mode(seccomp_mode);
+	seccomp_assign_mode(current, seccomp_mode);
 	ret = 0;
 
 out:
@@ -595,7 +610,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	/* Do not free the successfully attached filter. */
 	prepared = NULL;
 
-	seccomp_assign_mode(seccomp_mode);
+	seccomp_assign_mode(current, seccomp_mode);
 out:
 	spin_unlock_irq(&current->sighand->siglock);
 	seccomp_filter_free(prepared);

From f14a5db2398afed8f416d244e6da6b23940997c6 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 5 Jun 2014 00:23:17 -0700
Subject: [PATCH 0887/1185] seccomp: implement SECCOMP_FILTER_FLAG_TSYNC

Applying restrictive seccomp filter programs to large or diverse
codebases often requires handling threads which may be started early in
the process lifetime (e.g., by code that is linked in). While it is
possible to apply permissive programs prior to process start up, it is
difficult to further restrict the kernel ABI to those threads after that
point.

This change adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER for
synchronizing thread group seccomp filters at filter installation time.

When calling seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
filter) an attempt will be made to synchronize all threads in current's
threadgroup to its new seccomp filter program. This is possible iff all
threads are using a filter that is an ancestor to the filter current is
attempting to synchronize to. NULL filters (where the task is running as
SECCOMP_MODE_NONE) are also treated as ancestors allowing threads to be
transitioned into SECCOMP_MODE_FILTER. If prctrl(PR_SET_NO_NEW_PRIVS,
...) has been set on the calling thread, no_new_privs will be set for
all synchronized threads too. On success, 0 is returned. On failure,
the pid of one of the failing threads will be returned and no filters
will have been applied.

The race conditions against another thread are:
- requesting TSYNC (already handled by sighand lock)
- performing a clone (already handled by sighand lock)
- changing its filter (already handled by sighand lock)
- calling exec (handled by cred_guard_mutex)
The clone case is assisted by the fact that new threads will have their
seccomp state duplicated from their parent before appearing on the tasklist.

Holding cred_guard_mutex means that seccomp filters cannot be assigned
while in the middle of another thread's exec (potentially bypassing
no_new_privs or similar). The call to de_thread() may kill threads waiting
for the mutex.

Changes across threads to the filter pointer includes a barrier.

Based on patches by Will Drewry.

Suggested-by: Julien Tinnes <jln@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 fs/exec.c                    |   2 +-
 include/linux/seccomp.h      |   2 +
 include/uapi/linux/seccomp.h |   3 +
 kernel/seccomp.c             | 135 ++++++++++++++++++++++++++++++++++-
 4 files changed, 140 insertions(+), 2 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index cb5fb9aa320e..c568bdce6413 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds);
 /*
  * determine how safe it is to execute the proposed program
  * - the caller must hold ->cred_guard_mutex to protect against
- *   PTRACE_ATTACH
+ *   PTRACE_ATTACH or seccomp thread-sync
  */
 static int check_unsafe_exec(struct linux_binprm *bprm)
 {
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 9ab63a574d40..9687691799ff 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,6 +3,8 @@
 
 #include <uapi/linux/seccomp.h>
 
+#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC)
+
 #ifdef CONFIG_SECCOMP
 
 #include <linux/thread_info.h>
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index b258878ba754..0f238a43ff1e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -14,6 +14,9 @@
 #define SECCOMP_SET_MODE_STRICT	0
 #define SECCOMP_SET_MODE_FILTER	1
 
+/* Valid flags for SECCOMP_SET_MODE_FILTER */
+#define SECCOMP_FILTER_FLAG_TSYNC	1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5f2962e4aee4..ebdaaf427de2 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -26,6 +26,7 @@
 #ifdef CONFIG_SECCOMP_FILTER
 #include <asm/syscall.h>
 #include <linux/filter.h>
+#include <linux/pid.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
 #include <linux/tracehook.h>
@@ -251,6 +252,114 @@ static inline void seccomp_assign_mode(struct task_struct *task,
 }
 
 #ifdef CONFIG_SECCOMP_FILTER
+/* Returns 1 if the parent is an ancestor of the child. */
+static int is_ancestor(struct seccomp_filter *parent,
+		       struct seccomp_filter *child)
+{
+	/* NULL is the root ancestor. */
+	if (parent == NULL)
+		return 1;
+	for (; child; child = child->prev)
+		if (child == parent)
+			return 1;
+	return 0;
+}
+
+/**
+ * seccomp_can_sync_threads: checks if all threads can be synchronized
+ *
+ * Expects sighand and cred_guard_mutex locks to be held.
+ *
+ * Returns 0 on success, -ve on error, or the pid of a thread which was
+ * either not in the correct seccomp mode or it did not have an ancestral
+ * seccomp filter.
+ */
+static inline pid_t seccomp_can_sync_threads(void)
+{
+	struct task_struct *thread, *caller;
+
+	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Validate all threads being eligible for synchronization. */
+	caller = current;
+	for_each_thread(caller, thread) {
+		pid_t failed;
+
+		/* Skip current, since it is initiating the sync. */
+		if (thread == caller)
+			continue;
+
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
+		    (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
+		     is_ancestor(thread->seccomp.filter,
+				 caller->seccomp.filter)))
+			continue;
+
+		/* Return the first thread that cannot be synchronized. */
+		failed = task_pid_vnr(thread);
+		/* If the pid cannot be resolved, then return -ESRCH */
+		if (unlikely(WARN_ON(failed == 0)))
+			failed = -ESRCH;
+		return failed;
+	}
+
+	return 0;
+}
+
+/**
+ * seccomp_sync_threads: sets all threads to use current's filter
+ *
+ * Expects sighand and cred_guard_mutex locks to be held, and for
+ * seccomp_can_sync_threads() to have returned success already
+ * without dropping the locks.
+ *
+ */
+static inline void seccomp_sync_threads(void)
+{
+	struct task_struct *thread, *caller;
+
+	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Synchronize all threads. */
+	caller = current;
+	for_each_thread(caller, thread) {
+		/* Skip current, since it needs no changes. */
+		if (thread == caller)
+			continue;
+
+		/* Get a task reference for the new leaf node. */
+		get_seccomp_filter(caller);
+		/*
+		 * Drop the task reference to the shared ancestor since
+		 * current's path will hold a reference.  (This also
+		 * allows a put before the assignment.)
+		 */
+		put_seccomp_filter(thread);
+		smp_store_release(&thread->seccomp.filter,
+				  caller->seccomp.filter);
+		/*
+		 * Opt the other thread into seccomp if needed.
+		 * As threads are considered to be trust-realm
+		 * equivalent (see ptrace_may_access), it is safe to
+		 * allow one thread to transition the other.
+		 */
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
+			/*
+			 * Don't let an unprivileged task work around
+			 * the no_new_privs restriction by creating
+			 * a thread that sets it up, enters seccomp,
+			 * then dies.
+			 */
+			if (task_no_new_privs(caller))
+				task_set_no_new_privs(thread);
+
+			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+		}
+	}
+}
+
 /**
  * seccomp_prepare_filter: Prepares a seccomp filter for use.
  * @fprog: BPF program to install
@@ -366,6 +475,15 @@ static long seccomp_attach_filter(unsigned int flags,
 	if (total_insns > MAX_INSNS_PER_PATH)
 		return -ENOMEM;
 
+	/* If thread sync has been requested, check that it is possible. */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
+		int ret;
+
+		ret = seccomp_can_sync_threads();
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
 	 * task reference.
@@ -373,6 +491,10 @@ static long seccomp_attach_filter(unsigned int flags,
 	filter->prev = current->seccomp.filter;
 	current->seccomp.filter = filter;
 
+	/* Now that the new filter is in place, synchronize to all threads. */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+		seccomp_sync_threads();
+
 	return 0;
 }
 
@@ -591,7 +713,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	long ret = -EINVAL;
 
 	/* Validate flags. */
-	if (flags != 0)
+	if (flags & ~SECCOMP_FILTER_FLAG_MASK)
 		return -EINVAL;
 
 	/* Prepare the new filter before holding any locks. */
@@ -599,6 +721,14 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	if (IS_ERR(prepared))
 		return PTR_ERR(prepared);
 
+	/*
+	 * Make sure we cannot change seccomp or nnp state via TSYNC
+	 * while another thread is in the middle of calling exec.
+	 */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
+	    mutex_lock_killable(&current->signal->cred_guard_mutex))
+		goto out_free;
+
 	spin_lock_irq(&current->sighand->siglock);
 
 	if (!seccomp_may_assign_mode(seccomp_mode))
@@ -613,6 +743,9 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	seccomp_assign_mode(current, seccomp_mode);
 out:
 	spin_unlock_irq(&current->sighand->siglock);
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+		mutex_unlock(&current->signal->cred_guard_mutex);
+out_free:
 	seccomp_filter_free(prepared);
 	return ret;
 }

From c4673ca7850dae7146ecc35c69e8f17f2337c7d2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 12 Sep 2014 22:17:23 +0200
Subject: [PATCH 0888/1185] mm: export symbol dependencies of is_zero_pfn()

In order to make the static inline function is_zero_pfn() callable by
modules, export its symbol dependencies 'zero_pfn' and (for s390 and
mips) 'zero_page_mask'.

We need this for KVM, as CONFIG_KVM is a tristate for all supported
architectures except ARM and arm64, and testing a pfn whether it refers
to the zero page is required to correctly distinguish the zero page
from other special RAM ranges that may also have the PG_reserved bit
set, but need to be treated as MMIO memory.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0b70068e47e8f0c813a902dc3d6def601fd15acb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/mips/mm/init.c | 1 +
 arch/s390/mm/init.c | 1 +
 mm/memory.c         | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9b973e0af9cb..d340d53c345b 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -74,6 +74,7 @@
  */
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL_GPL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
 
 /*
  * Not static inline because used by IP27 special magic initialization code
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index eba15f18fd38..a4dfc0bd05db 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -43,6 +43,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
 
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
 
 static void __init setup_zero_pages(void)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 5a35443c01ad..20bb9e901781 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -118,6 +118,8 @@ __setup("norandmaps", disable_randmaps);
 unsigned long zero_pfn __read_mostly;
 unsigned long highest_memmap_pfn __read_mostly;
 
+EXPORT_SYMBOL(zero_pfn);
+
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
  */

From 331a9a729183832840d656036c5bc2b81942c294 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 12 Sep 2014 15:16:00 +0200
Subject: [PATCH 0889/1185] KVM: check for !is_zero_pfn() in kvm_is_mmio_pfn()

Read-only memory ranges may be backed by the zero page, so avoid
misidentifying it a a MMIO pfn.

This fixes another issue I identified when testing QEMU+KVM_UEFI, where
a read to an uninitialized emulated NOR flash brought in the zero page,
but mapped as a read-write device region, because kvm_is_mmio_pfn()
misidentifies it as a MMIO pfn due to its PG_reserved bit being set.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Fixes: b88657674d39 ("ARM: KVM: user_mem_abort: support stage 2 MMIO page mapping")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 85c8555ff07ef09261bd50d603cd4290cff5a8cc)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f019669674a5..b64d44219f27 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -110,7 +110,7 @@ static bool largepages_enabled = true;
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
-		return PageReserved(pfn_to_page(pfn));
+		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
 
 	return true;
 }

From b8a669d29702a8fb529f4fae450a86b8676b0e42 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sam.bobroff@au1.ibm.com>
Date: Fri, 19 Sep 2014 09:40:41 +1000
Subject: [PATCH 0890/1185] KVM: correct null pid check in kvm_vcpu_yield_to()

Correct a simple mistake of checking the wrong variable
before a dereference, resulting in the dereference not being
properly protected by rcu_dereference().

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 27fbe64bfa63cfb9da025975b59d96568caa2d53)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b64d44219f27..9cae94206f41 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1728,7 +1728,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 	rcu_read_lock();
 	pid = rcu_dereference(target->pid);
 	if (pid)
-		task = get_pid_task(target->pid, PIDTYPE_PID);
+		task = get_pid_task(pid, PIDTYPE_PID);
 	rcu_read_unlock();
 	if (!task)
 		return ret;

From 9ac860041db860a59bfd6ac82b31d6b6f76ebb52 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 10 Aug 2014 20:50:30 -0700
Subject: [PATCH 0891/1185] seccomp: Replace BUG(!spin_is_locked()) with
 assert_spin_lock

Current upstream kernel hangs with mips and powerpc targets in
uniprocessor mode if SECCOMP is configured.

Bisect points to commit dbd952127d11 ("seccomp: introduce writer locking").
Turns out that code such as
	BUG_ON(!spin_is_locked(&list_lock));
can not be used in uniprocessor mode because spin_is_locked() always
returns false in this configuration, and that assert_spin_locked()
exists for that very purpose and must be used instead.

Fixes: dbd952127d11 ("seccomp: introduce writer locking")
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/fork.c    |  2 +-
 kernel/seccomp.c | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index a0abbb536e9a..0ff07d94e07c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1125,7 +1125,7 @@ static void copy_seccomp(struct task_struct *p)
 	 * needed because this new task is not yet running and cannot
 	 * be racing exec.
 	 */
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+	assert_spin_locked(&current->sighand->siglock);
 
 	/* Ref-count the new filter user, and assign it. */
 	get_seccomp_filter(current);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ebdaaf427de2..1fbb1a2bc459 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -229,7 +229,7 @@ static u32 seccomp_run_filters(int syscall)
 
 static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 {
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+	assert_spin_locked(&current->sighand->siglock);
 
 	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
 		return false;
@@ -240,7 +240,7 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 static inline void seccomp_assign_mode(struct task_struct *task,
 				       unsigned long seccomp_mode)
 {
-	BUG_ON(!spin_is_locked(&task->sighand->siglock));
+	assert_spin_locked(&task->sighand->siglock);
 
 	task->seccomp.mode = seccomp_mode;
 	/*
@@ -279,7 +279,7 @@ static inline pid_t seccomp_can_sync_threads(void)
 	struct task_struct *thread, *caller;
 
 	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+	assert_spin_locked(&current->sighand->siglock);
 
 	/* Validate all threads being eligible for synchronization. */
 	caller = current;
@@ -320,7 +320,7 @@ static inline void seccomp_sync_threads(void)
 	struct task_struct *thread, *caller;
 
 	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+	assert_spin_locked(&current->sighand->siglock);
 
 	/* Synchronize all threads. */
 	caller = current;
@@ -466,7 +466,7 @@ static long seccomp_attach_filter(unsigned int flags,
 	unsigned long total_insns;
 	struct seccomp_filter *walker;
 
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+	assert_spin_locked(&current->sighand->siglock);
 
 	/* Validate resulting filter length. */
 	total_insns = filter->len;

From 900e9fd0d5d15c596cacfb89ce007c933cea6e1c Mon Sep 17 00:00:00 2001
From: Lee Campbell <leecam@chromium.org>
Date: Wed, 8 Oct 2014 14:40:22 -0700
Subject: [PATCH 0892/1185] seccomp: fix syscall numbers for x86 and x86_64

Correcting syscall numbers for seccomp

Signed-off-by: Lee Campbell <leecam@chromium.org>
---
 arch/x86/syscalls/syscall_32.tbl | 5 ++++-
 arch/x86/syscalls/syscall_64.tbl | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 8605e9e0f19a..01ed50255473 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -357,4 +357,7 @@
 348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
 350	i386	finit_module		sys_finit_module
-351	i386	seccomp			sys_seccomp
+# 351	i386	sched_setattr		sys_sched_setattr
+# 352	i386	sched_getattr		sys_sched_getattr
+# 353	i386	renameat2		sys_renameat2
+354	i386	seccomp			sys_seccomp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 4cb491567b85..a3c38bbd6f01 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -320,7 +320,10 @@
 311	64	process_vm_writev	sys_process_vm_writev
 312	common	kcmp			sys_kcmp
 313	common	finit_module		sys_finit_module
-314	common	seccomp			sys_seccomp
+# 314	common	sched_setattr		sys_sched_setattr
+# 315	common	sched_getattr		sys_sched_getattr
+# 316	common	renameat2		sys_renameat2
+317	common	seccomp			sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact

From 07d209bd092d023976fdb881ba6d4b30fe18aebe Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 4 Sep 2014 14:06:55 +0200
Subject: [PATCH 0893/1185] udf: Avoid infinite loop when processing indirect
 ICBs

commit c03aa9f6e1f938618e6db2e23afef0574efeeb65 upstream.

We did not implement any bound on number of indirect ICBs we follow when
loading inode. Thus corrupted medium could cause kernel to go into an
infinite loop, possibly causing a stack overflow.

Fix the possible stack overflow by removing recursion from
__udf_read_inode() and limit number of indirect ICBs we follow to avoid
infinite loops.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Chuck Ebbert <cebbert.lkml@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/udf/inode.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index b6d15d349810..aa023283cc8a 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1270,13 +1270,22 @@ int udf_setsize(struct inode *inode, loff_t newsize)
 	return 0;
 }
 
+/*
+ * Maximum length of linked list formed by ICB hierarchy. The chosen number is
+ * arbitrary - just that we hopefully don't limit any real use of rewritten
+ * inode on write-once media but avoid looping for too long on corrupted media.
+ */
+#define UDF_MAX_ICB_NESTING 1024
+
 static void __udf_read_inode(struct inode *inode)
 {
 	struct buffer_head *bh = NULL;
 	struct fileEntry *fe;
 	uint16_t ident;
 	struct udf_inode_info *iinfo = UDF_I(inode);
+	unsigned int indirections = 0;
 
+reread:
 	/*
 	 * Set defaults, but the inode is still incomplete!
 	 * Note: get_new_inode() sets the following on a new inode:
@@ -1313,28 +1322,26 @@ static void __udf_read_inode(struct inode *inode)
 		ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1,
 					&ident);
 		if (ident == TAG_IDENT_IE && ibh) {
-			struct buffer_head *nbh = NULL;
 			struct kernel_lb_addr loc;
 			struct indirectEntry *ie;
 
 			ie = (struct indirectEntry *)ibh->b_data;
 			loc = lelb_to_cpu(ie->indirectICB.extLocation);
 
-			if (ie->indirectICB.extLength &&
-				(nbh = udf_read_ptagged(inode->i_sb, &loc, 0,
-							&ident))) {
-				if (ident == TAG_IDENT_FE ||
-					ident == TAG_IDENT_EFE) {
-					memcpy(&iinfo->i_location,
-						&loc,
-						sizeof(struct kernel_lb_addr));
-					brelse(bh);
-					brelse(ibh);
-					brelse(nbh);
-					__udf_read_inode(inode);
+			if (ie->indirectICB.extLength) {
+				brelse(bh);
+				brelse(ibh);
+				memcpy(&iinfo->i_location, &loc,
+				       sizeof(struct kernel_lb_addr));
+				if (++indirections > UDF_MAX_ICB_NESTING) {
+					udf_err(inode->i_sb,
+						"too many ICBs in ICB hierarchy"
+						" (max %d supported)\n",
+						UDF_MAX_ICB_NESTING);
+					make_bad_inode(inode);
 					return;
 				}
-				brelse(nbh);
+				goto reread;
 			}
 		}
 		brelse(ibh);

From bee870fc1af7c5109a0f167af3bfe7002a02e7f3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 2 Oct 2014 16:17:02 -0700
Subject: [PATCH 0894/1185] perf: fix perf bug in fork()

commit 6c72e3501d0d62fc064d3680e5234f3463ec5a86 upstream.

Oleg noticed that a cleanup by Sylvain actually uncovered a bug; by
calling perf_event_free_task() when failing sched_fork() we will not yet
have done the memset() on ->perf_event_ctxp[] and will therefore try and
'free' the inherited contexts, which are still in use by the parent
process.  This is bad..

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Sylvain 'ythier' Hitier <sylvain.hitier@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/events/core.c | 4 +++-
 kernel/fork.c        | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6bf387a60399..0b4733447151 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7482,8 +7482,10 @@ int perf_event_init_task(struct task_struct *child)
 
 	for_each_task_context_nr(ctxn) {
 		ret = perf_event_init_context(child, ctxn);
-		if (ret)
+		if (ret) {
+			perf_event_free_task(child);
 			return ret;
+		}
 	}
 
 	return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index 2c76e11ba939..514dbc40f98f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1324,7 +1324,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto bad_fork_cleanup_policy;
 	retval = audit_alloc(p);
 	if (retval)
-		goto bad_fork_cleanup_policy;
+		goto bad_fork_cleanup_perf;
 	/* copy all the process information */
 	retval = copy_semundo(clone_flags, p);
 	if (retval)
@@ -1522,8 +1522,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	exit_sem(p);
 bad_fork_cleanup_audit:
 	audit_free(p);
-bad_fork_cleanup_policy:
+bad_fork_cleanup_perf:
 	perf_event_free_task(p);
+bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:

From 0000372a96216b393bacfa50fb0253c40f8cf3d1 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Fri, 3 Oct 2014 16:19:24 -0700
Subject: [PATCH 0895/1185] init/Kconfig: Fix HAVE_FUTEX_CMPXCHG to not break
 up the EXPERT menu

commit 62b4d2041117f35ab2409c9f5c4b8d3dc8e59d0f upstream.

commit 03b8c7b623c80af264c4c8d6111e5c6289933666 ("futex: Allow
architectures to skip futex_atomic_cmpxchg_inatomic() test") added the
HAVE_FUTEX_CMPXCHG symbol right below FUTEX.  This placed it right in
the middle of the options for the EXPERT menu.  However,
HAVE_FUTEX_CMPXCHG does not depend on EXPERT or FUTEX, so Kconfig stops
placing items in the EXPERT menu, and displays the remaining several
EXPERT items (starting with EPOLL) directly in the General Setup menu.

Since both users of HAVE_FUTEX_CMPXCHG only select it "if FUTEX", make
HAVE_FUTEX_CMPXCHG itself depend on FUTEX.  With this change, the
subsequent items display as part of the EXPERT menu again; the EMBEDDED
menu now appears as the next top-level item in the General Setup menu,
which makes General Setup much shorter and more usable.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 init/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/init/Kconfig b/init/Kconfig
index 5d6febaea56d..8fa4f758821a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1367,6 +1367,7 @@ config FUTEX
 
 config HAVE_FUTEX_CMPXCHG
 	bool
+	depends on FUTEX
 	help
 	  Architectures should select this if futex_atomic_cmpxchg_inatomic()
 	  is implemented and always working. This removes a couple of runtime

From 78a3db11cb0e9521572c7d0effbc63f2bd5dac12 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 2 Oct 2014 16:51:18 -0400
Subject: [PATCH 0896/1185] ring-buffer: Fix infinite spin in reading buffer

commit 24607f114fd14f2f37e3e0cb3d47bce96e81e848 upstream.

Commit 651e22f2701b "ring-buffer: Always reset iterator to reader page"
fixed one bug but in the process caused another one. The reset is to
update the header page, but that fix also changed the way the cached
reads were updated. The cache reads are used to test if an iterator
needs to be updated or not.

A ring buffer iterator, when created, disables writes to the ring buffer
but does not stop other readers or consuming reads from happening.
Although all readers are synchronized via a lock, they are only
synchronized when in the ring buffer functions. Those functions may
be called by any number of readers. The iterator continues down when
its not interrupted by a consuming reader. If a consuming read
occurs, the iterator starts from the beginning of the buffer.

The way the iterator sees that a consuming read has happened since
its last read is by checking the reader "cache". The cache holds the
last counts of the read and the reader page itself.

Commit 651e22f2701b changed what was saved by the cache_read when
the rb_iter_reset() occurred, making the iterator never match the cache.
Then if the iterator calls rb_iter_reset(), it will go into an
infinite loop by checking if the cache doesn't match, doing the reset
and retrying, just to see that the cache still doesn't match! Which
should never happen as the reset is suppose to set the cache to the
current value and there's locks that keep a consuming reader from
having access to the data.

Fixes: 651e22f2701b "ring-buffer: Always reset iterator to reader page"
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/ring_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0bc181b0524c..3d9fee3a80b3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3371,7 +3371,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
 	iter->head = cpu_buffer->reader_page->read;
 
 	iter->cache_reader_page = iter->head_page;
-	iter->cache_read = iter->head;
+	iter->cache_read = cpu_buffer->read;
 
 	if (iter->head)
 		iter->read_stamp = cpu_buffer->read_stamp;

From 183c062c51c7e7663752b94ac399c830e4cb3c44 Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Wed, 6 Aug 2014 16:05:36 -0700
Subject: [PATCH 0897/1185] mm, thp: move invariant bug check out of loop in
 __split_huge_page_map

commit f8303c2582b889351e261ff18c4d8eb197a77db2 upstream.

In __split_huge_page_map(), the check for page_mapcount(page) is
invariant within the for loop.  Because of the fact that the macro is
implemented using atomic_read(), the redundant check cannot be optimized
away by the compiler leading to unnecessary read to the page structure.

This patch moves the invariant bug check out of the loop so that it will
be done only once.  On a 3.16-rc1 based kernel, the execution time of a
microbenchmark that broke up 1000 transparent huge pages using munmap()
had an execution time of 38,245us and 38,548us with and without the
patch respectively.  The performance gain is about 1%.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Scott J Norton <scott.norton@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/huge_memory.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index eb00e81601a5..b9a096a36edc 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1733,6 +1733,8 @@ static int __split_huge_page_map(struct page *page,
 	if (pmd) {
 		pgtable = pgtable_trans_huge_withdraw(mm);
 		pmd_populate(mm, &_pmd, pgtable);
+		if (pmd_write(*pmd))
+			BUG_ON(page_mapcount(page) != 1);
 
 		haddr = address;
 		for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -1742,8 +1744,6 @@ static int __split_huge_page_map(struct page *page,
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 			if (!pmd_write(*pmd))
 				entry = pte_wrprotect(entry);
-			else
-				BUG_ON(page_mapcount(page) != 1);
 			if (!pmd_young(*pmd))
 				entry = pte_mkold(entry);
 			if (pmd_numa(*pmd))

From f35407acce23bab3727190a94468362dc8f030a1 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 2 Oct 2014 19:47:42 +0100
Subject: [PATCH 0898/1185] mm: numa: Do not mark PTEs pte_numa when splitting
 huge pages

commit abc40bd2eeb77eb7c2effcaf63154aad929a1d5f upstream.

This patch reverts 1ba6e0b50b ("mm: numa: split_huge_page: transfer the
NUMA type from the pmd to the pte"). If a huge page is being split due
a protection change and the tail will be in a PROT_NONE vma then NUMA
hinting PTEs are temporarily created in the protected VMA.

 VM_RW|VM_PROTNONE
|-----------------|
      ^
      split here

In the specific case above, it should get fixed up by change_pte_range()
but there is a window of opportunity for weirdness to happen. Similarly,
if a huge page is shrunk and split during a protection update but before
pmd_numa is cleared then a pte_numa can be left behind.

Instead of adding complexity trying to deal with the case, this patch
will not mark PTEs NUMA when splitting a huge page. NUMA hinting faults
will not be triggered which is marginal in comparison to the complexity
in dealing with the corner cases during THP split.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/huge_memory.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b9a096a36edc..d21c9ef0943c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1740,14 +1740,17 @@ static int __split_huge_page_map(struct page *page,
 		for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
 			pte_t *pte, entry;
 			BUG_ON(PageCompound(page+i));
+			/*
+			 * Note that pmd_numa is not transferred deliberately
+			 * to avoid any possibility that pte_numa leaks to
+			 * a PROT_NONE VMA by accident.
+			 */
 			entry = mk_pte(page + i, vma->vm_page_prot);
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 			if (!pmd_write(*pmd))
 				entry = pte_wrprotect(entry);
 			if (!pmd_young(*pmd))
 				entry = pte_mkold(entry);
-			if (pmd_numa(*pmd))
-				entry = pte_mknuma(entry);
 			pte = pte_offset_map(&_pmd, haddr);
 			BUG_ON(!pte_none(*pte));
 			set_pte_at(mm, haddr, pte, entry);

From f5d34b7cae6c6ddddb1797ebc0d0918954544108 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Sat, 20 Sep 2014 16:16:35 -0300
Subject: [PATCH 0899/1185] media: vb2: fix VBI/poll regression

commit 58d75f4b1ce26324b4d809b18f94819843a98731 upstream.

The recent conversion of saa7134 to vb2 unconvered a poll() bug that
broke the teletext applications alevt and mtt. These applications
expect that calling poll() without having called VIDIOC_STREAMON will
cause poll() to return POLLERR. That did not happen in vb2.

This patch fixes that behavior. It also fixes what should happen when
poll() is called when STREAMON is called but no buffers have been
queued. In that case poll() will also return POLLERR, but only for
capture queues since output queues will always return POLLOUT
anyway in that situation.

This brings the vb2 behavior in line with the old videobuf behavior.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/videobuf2-core.c | 15 +++++++++++++--
 include/media/videobuf2-core.h           |  4 ++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index e3bdc3be91e1..5e47ba479e53 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -666,6 +666,7 @@ static int __reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req)
 	 * to the userspace.
 	 */
 	req->count = allocated_buffers;
+	q->waiting_for_buffers = !V4L2_TYPE_IS_OUTPUT(q->type);
 
 	return 0;
 }
@@ -714,6 +715,7 @@ static int __create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create
 		memset(q->plane_sizes, 0, sizeof(q->plane_sizes));
 		memset(q->alloc_ctx, 0, sizeof(q->alloc_ctx));
 		q->memory = create->memory;
+		q->waiting_for_buffers = !V4L2_TYPE_IS_OUTPUT(q->type);
 	}
 
 	num_buffers = min(create->count, VIDEO_MAX_FRAME - q->num_buffers);
@@ -1355,6 +1357,7 @@ int vb2_qbuf(struct vb2_queue *q, struct v4l2_buffer *b)
 	 * dequeued in dqbuf.
 	 */
 	list_add_tail(&vb->queued_entry, &q->queued_list);
+	q->waiting_for_buffers = false;
 	vb->state = VB2_BUF_STATE_QUEUED;
 
 	/*
@@ -1724,6 +1727,7 @@ int vb2_streamoff(struct vb2_queue *q, enum v4l2_buf_type type)
 	 * and videobuf, effectively returning control over them to userspace.
 	 */
 	__vb2_queue_cancel(q);
+	q->waiting_for_buffers = !V4L2_TYPE_IS_OUTPUT(q->type);
 
 	dprintk(3, "Streamoff successful\n");
 	return 0;
@@ -2009,9 +2013,16 @@ unsigned int vb2_poll(struct vb2_queue *q, struct file *file, poll_table *wait)
 	}
 
 	/*
-	 * There is nothing to wait for if no buffers have already been queued.
+	 * There is nothing to wait for if the queue isn't streaming.
 	 */
-	if (list_empty(&q->queued_list))
+	if (!vb2_is_streaming(q))
+		return res | POLLERR;
+	/*
+	 * For compatibility with vb1: if QBUF hasn't been called yet, then
+	 * return POLLERR as well. This only affects capture queues, output
+	 * queues will always initialize waiting_for_buffers to false.
+	 */
+	if (q->waiting_for_buffers)
 		return res | POLLERR;
 
 	if (list_empty(&q->done_list))
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index d88a098d1aff..2cc4e0df9c5d 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -318,6 +318,9 @@ struct v4l2_fh;
  * @done_wq:	waitqueue for processes waiting for buffers ready to be dequeued
  * @alloc_ctx:	memory type/allocator-specific contexts for each plane
  * @streaming:	current streaming state
+ * @waiting_for_buffers: used in poll() to check if vb2 is still waiting for
+ *		buffers. Only set for capture queues if qbuf has not yet been
+ *		called since poll() needs to return POLLERR in that situation.
  * @fileio:	file io emulator internal data, used only if emulator is active
  */
 struct vb2_queue {
@@ -350,6 +353,7 @@ struct vb2_queue {
 	unsigned int			plane_sizes[VIDEO_MAX_PLANES];
 
 	unsigned int			streaming:1;
+	unsigned int			waiting_for_buffers:1;
 
 	struct vb2_fileio_data		*fileio;
 };

From 06905ff8a6f07cb59b20311694f5d1454654808f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 2 Oct 2014 13:45:00 +1000
Subject: [PATCH 0900/1185] md/raid5: disable 'DISCARD' by default due to
 safety concerns.

commit 8e0e99ba64c7ba46133a7c8a3e3f7de01f23bd93 upstream.

It has come to my attention (thanks Martin) that 'discard_zeroes_data'
is only a hint.  Some devices in some cases don't do what it
says on the label.

The use of DISCARD in RAID5 depends on reads from discarded regions
being predictably zero.  If a write to a previously discarded region
performs a read-modify-write cycle it assumes that the parity block
was consistent with the data blocks.  If all were zero, this would
be the case.  If some are and some aren't this would not be the case.
This could lead to data corruption after a device failure when
data needs to be reconstructed from the parity.

As we cannot trust 'discard_zeroes_data', ignore it by default
and so disallow DISCARD on all raid4/5/6 arrays.

As many devices are trustworthy, and as there are benefits to using
DISCARD, add a module parameter to over-ride this caution and cause
DISCARD to work if discard_zeroes_data is set.

If a site want to enable DISCARD on some arrays but not on others they
should select DISCARD support at the filesystem level, and set the
raid456 module parameter.
    raid456.devices_handle_discard_safely=Y

As this is a data-safety issue, I believe this patch is suitable for
-stable.
DISCARD support for RAID456 was added in 3.7

Cc: Shaohua Li <shli@kernel.org>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Heinz Mauelshagen <heinzm@redhat.com>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Fixes: 620125f2bf8ff0c4969b79653b54d7bcc9d40637
Signed-off-by: NeilBrown <neilb@suse.de>
[bwh: Backported to 3.10: adjust context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/raid5.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 774f81423d78..2332b5ced0dd 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -60,6 +60,10 @@
 #include "raid0.h"
 #include "bitmap.h"
 
+static bool devices_handle_discard_safely = false;
+module_param(devices_handle_discard_safely, bool, 0644);
+MODULE_PARM_DESC(devices_handle_discard_safely,
+		 "Set to Y if all devices in each array reliably return zeroes on reads from discarded regions");
 /*
  * Stripe cache
  */
@@ -5611,7 +5615,7 @@ static int run(struct mddev *mddev)
 		mddev->queue->limits.discard_granularity = stripe;
 		/*
 		 * unaligned part of discard request will be ignored, so can't
-		 * guarantee discard_zerors_data
+		 * guarantee discard_zeroes_data
 		 */
 		mddev->queue->limits.discard_zeroes_data = 0;
 
@@ -5636,6 +5640,18 @@ static int run(struct mddev *mddev)
 			    !bdev_get_queue(rdev->bdev)->
 						limits.discard_zeroes_data)
 				discard_supported = false;
+			/* Unfortunately, discard_zeroes_data is not currently
+			 * a guarantee - just a hint.  So we only allow DISCARD
+			 * if the sysadmin has confirmed that only safe devices
+			 * are in use by setting a module parameter.
+			 */
+			if (!devices_handle_discard_safely) {
+				if (discard_supported) {
+					pr_info("md/raid456: discard support disabled due to uncertainty.\n");
+					pr_info("Set raid456.devices_handle_discard_safely=Y to override.\n");
+				}
+				discard_supported = false;
+			}
 		}
 
 		if (discard_supported &&

From 00790d4526bd88e711999b9af04a0e896cfbf5a8 Mon Sep 17 00:00:00 2001
From: Andrew Hunter <ahh@google.com>
Date: Thu, 4 Sep 2014 14:17:16 -0700
Subject: [PATCH 0901/1185] jiffies: Fix timeval conversion to jiffies

commit d78c9300c51d6ceed9f6d078d4e9366f259de28c upstream.

timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:

setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);

would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.)  Doing
this repeatedly would cause unbounded growth in val.  So fix the math.

Here's what was wrong with the conversion: we essentially computed
(eliding seconds)

jiffies = usec  * (NSEC_PER_USEC/TICK_NSEC)

by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:

jiffies = (usec * x) >> USEC_JIFFIE_SC

and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)

In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.

We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies.  This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.

Tested: the following program:

int main() {
  struct itimerval zero = {{0, 0}, {0, 0}};
  /* Initially set to 10 ms. */
  struct itimerval initial = zero;
  initial.it_interval.tv_usec = 10000;
  setitimer(ITIMER_PROF, &initial, NULL);
  /* Save and restore several times. */
  for (size_t i = 0; i < 10; ++i) {
    struct itimerval prev;
    setitimer(ITIMER_PROF, &zero, &prev);
    /* on old kernels, this goes up by TICK_USEC every iteration */
    printf("previous value: %ld %ld %ld %ld\n",
           prev.it_interval.tv_sec, prev.it_interval.tv_usec,
           prev.it_value.tv_sec, prev.it_value.tv_usec);
    setitimer(ITIMER_PROF, &prev, NULL);
  }
    return 0;
}


Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
[bwh: Backported to 3.16: adjust filename]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/jiffies.h | 12 ---------
 kernel/time.c           | 54 +++++++++++++++++++++++------------------
 2 files changed, 30 insertions(+), 36 deletions(-)

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 7b5d4a8ab199..c039fe1315eb 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -254,23 +254,11 @@ extern unsigned long preset_lpj;
 #define SEC_JIFFIE_SC (32 - SHIFT_HZ)
 #endif
 #define NSEC_JIFFIE_SC (SEC_JIFFIE_SC + 29)
-#define USEC_JIFFIE_SC (SEC_JIFFIE_SC + 19)
 #define SEC_CONVERSION ((unsigned long)((((u64)NSEC_PER_SEC << SEC_JIFFIE_SC) +\
                                 TICK_NSEC -1) / (u64)TICK_NSEC))
 
 #define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\
                                         TICK_NSEC -1) / (u64)TICK_NSEC))
-#define USEC_CONVERSION  \
-                    ((unsigned long)((((u64)NSEC_PER_USEC << USEC_JIFFIE_SC) +\
-                                        TICK_NSEC -1) / (u64)TICK_NSEC))
-/*
- * USEC_ROUND is used in the timeval to jiffie conversion.  See there
- * for more details.  It is the scaled resolution rounding value.  Note
- * that it is a 64-bit value.  Since, when it is applied, we are already
- * in jiffies (albit scaled), it is nothing but the bits we will shift
- * off.
- */
-#define USEC_ROUND (u64)(((u64)1 << USEC_JIFFIE_SC) - 1)
 /*
  * The maximum jiffie value is (MAX_INT >> 1).  Here we translate that
  * into seconds.  The 64-bit case will overflow if we are not careful,
diff --git a/kernel/time.c b/kernel/time.c
index d3617dbd3dca..d21398e6da87 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -496,17 +496,20 @@ EXPORT_SYMBOL(usecs_to_jiffies);
  * that a remainder subtract here would not do the right thing as the
  * resolution values don't fall on second boundries.  I.e. the line:
  * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
+ * Note that due to the small error in the multiplier here, this
+ * rounding is incorrect for sufficiently large values of tv_nsec, but
+ * well formed timespecs should have tv_nsec < NSEC_PER_SEC, so we're
+ * OK.
  *
  * Rather, we just shift the bits off the right.
  *
  * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
  * value to a scaled second value.
  */
-unsigned long
-timespec_to_jiffies(const struct timespec *value)
+static unsigned long
+__timespec_to_jiffies(unsigned long sec, long nsec)
 {
-	unsigned long sec = value->tv_sec;
-	long nsec = value->tv_nsec + TICK_NSEC - 1;
+	nsec = nsec + TICK_NSEC - 1;
 
 	if (sec >= MAX_SEC_IN_JIFFIES){
 		sec = MAX_SEC_IN_JIFFIES;
@@ -517,6 +520,13 @@ timespec_to_jiffies(const struct timespec *value)
 		 (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
 
 }
+
+unsigned long
+timespec_to_jiffies(const struct timespec *value)
+{
+	return __timespec_to_jiffies(value->tv_sec, value->tv_nsec);
+}
+
 EXPORT_SYMBOL(timespec_to_jiffies);
 
 void
@@ -533,31 +543,27 @@ jiffies_to_timespec(const unsigned long jiffies, struct timespec *value)
 }
 EXPORT_SYMBOL(jiffies_to_timespec);
 
-/* Same for "timeval"
+/*
+ * We could use a similar algorithm to timespec_to_jiffies (with a
+ * different multiplier for usec instead of nsec). But this has a
+ * problem with rounding: we can't exactly add TICK_NSEC - 1 to the
+ * usec value, since it's not necessarily integral.
  *
- * Well, almost.  The problem here is that the real system resolution is
- * in nanoseconds and the value being converted is in micro seconds.
- * Also for some machines (those that use HZ = 1024, in-particular),
- * there is a LARGE error in the tick size in microseconds.
-
- * The solution we use is to do the rounding AFTER we convert the
- * microsecond part.  Thus the USEC_ROUND, the bits to be shifted off.
- * Instruction wise, this should cost only an additional add with carry
- * instruction above the way it was done above.
+ * We could instead round in the intermediate scaled representation
+ * (i.e. in units of 1/2^(large scale) jiffies) but that's also
+ * perilous: the scaling introduces a small positive error, which
+ * combined with a division-rounding-upward (i.e. adding 2^(scale) - 1
+ * units to the intermediate before shifting) leads to accidental
+ * overflow and overestimates.
+ *
+ * At the cost of one additional multiplication by a constant, just
+ * use the timespec implementation.
  */
 unsigned long
 timeval_to_jiffies(const struct timeval *value)
 {
-	unsigned long sec = value->tv_sec;
-	long usec = value->tv_usec;
-
-	if (sec >= MAX_SEC_IN_JIFFIES){
-		sec = MAX_SEC_IN_JIFFIES;
-		usec = 0;
-	}
-	return (((u64)sec * SEC_CONVERSION) +
-		(((u64)usec * USEC_CONVERSION + USEC_ROUND) >>
-		 (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
+	return __timespec_to_jiffies(value->tv_sec,
+				     value->tv_usec * NSEC_PER_USEC);
 }
 EXPORT_SYMBOL(timeval_to_jiffies);
 

From 6353c97aa7c7dd6b0c3fe717eeacb39e3873259e Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 9 Jul 2014 21:18:32 +0200
Subject: [PATCH 0902/1185] drbd: fix regression 'out of mem, failed to invoke
 fence-peer helper'

commit bbc1c5e8ad6dfebf9d13b8a4ccdf66c92913eac9 upstream.

Since linux kernel 3.13, kthread_run() internally uses
wait_for_completion_killable().  We sometimes may use kthread_run()
while we still have a signal pending, which we used to kick our threads
out of potentially blocking network functions, causing kthread_run() to
mistake that as a new fatal signal and fail.

Fix: flush_signals() before kthread_run().

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/drbd/drbd_nl.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9e3f441e7e84..9c37f3d896a2 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -514,6 +514,12 @@ void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
 	struct task_struct *opa;
 
 	kref_get(&tconn->kref);
+	/* We may just have force_sig()'ed this thread
+	 * to get it out of some blocking network function.
+	 * Clear signals; otherwise kthread_run(), which internally uses
+	 * wait_on_completion_killable(), will mistake our pending signal
+	 * for a new fatal signal and fail. */
+	flush_signals(current);
 	opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
 	if (IS_ERR(opa)) {
 		conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n");

From 7dd311128022551d7876b26b7193157883494cd3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 30 Jul 2014 14:55:26 +0200
Subject: [PATCH 0903/1185] nl80211: clear skb cb before passing to netlink

commit bd8c78e78d5011d8111bc2533ee73b13a3bd6c42 upstream.

In testmode and vendor command reply/event SKBs we use the
skb cb data to store nl80211 parameters between allocation
and sending. This causes the code for CONFIG_NETLINK_MMAP
to get confused, because it takes ownership of the skb cb
data when the SKB is handed off to netlink, and it doesn't
explicitly clear it.

Clear the skb cb explicitly when we're done and before it
gets passed to netlink to avoid this issue.

Reported-by: Assaf Azulay <assaf.azulay@intel.com>
Reported-by: David Spinadel <david.spinadel@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/wireless/nl80211.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 448c034184e2..62aebed7c6e2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6568,6 +6568,9 @@ int cfg80211_testmode_reply(struct sk_buff *skb)
 	void *hdr = ((void **)skb->cb)[1];
 	struct nlattr *data = ((void **)skb->cb)[2];
 
+	/* clear CB data for netlink core to own from now on */
+	memset(skb->cb, 0, sizeof(skb->cb));
+
 	if (WARN_ON(!rdev->testmode_info)) {
 		kfree_skb(skb);
 		return -EINVAL;
@@ -6594,6 +6597,9 @@ void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
 	void *hdr = ((void **)skb->cb)[1];
 	struct nlattr *data = ((void **)skb->cb)[2];
 
+	/* clear CB data for netlink core to own from now on */
+	memset(skb->cb, 0, sizeof(skb->cb));
+
 	nla_nest_end(skb, data);
 	genlmsg_end(skb, hdr);
 	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0,

From 35c239149f6e5794da2285f30bdeb3b4dd4df3b6 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@linux-m68k.org>
Date: Sat, 7 Sep 2013 18:35:08 +0200
Subject: [PATCH 0904/1185] cpufreq: Fix wrong time unit conversion

commit a857c0b9e24e39fe5be82451b65377795f9538d8 upstream.

The time spent by a CPU under a given frequency is stored in jiffies unit
in the cpu var cpufreq_stats_table->time_in_state[i], i being the index of
the frequency.

This is what is displayed in the following file on the right column:

     cat /sys/devices/system/cpu/cpuX/cpufreq/stats/time_in_state
     2301000 19835820
     2300000 3172
     [...]

Now cpufreq converts this jiffies unit delta to clock_t before returning it
to the user as in the above file. And that conversion is achieved using the API
cputime64_to_clock_t().

Although it accidentally works on traditional tick based cputime accounting, where
cputime_t maps directly to jiffies, it doesn't work with other types of cputime
accounting such as CONFIG_VIRT_CPU_ACCOUNTING_* where cputime_t can map to nsecs
or any granularity preffered by the architecture.

For example we get a buggy zero delta on full dyntick configurations:

     cat /sys/devices/system/cpu/cpuX/cpufreq/stats/time_in_state
     2301000 0
     2300000 0
     [...]

Fix this with using the proper jiffies_64_t to clock_t conversion.

Reported-and-tested-by: Carsten Emde <C.Emde@osadl.org>
Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpufreq/cpufreq_stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index bfd6273fd873..7fb600239059 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -81,7 +81,7 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
 	for (i = 0; i < stat->state_num; i++) {
 		len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i],
 			(unsigned long long)
-			cputime64_to_clock_t(stat->time_in_state[i]));
+			jiffies_64_to_clock_t(stat->time_in_state[i]));
 	}
 	return len;
 }

From bed5396573366682b2e07d79a08aefde1c5a8f52 Mon Sep 17 00:00:00 2001
From: Stratos Karafotis <stratosk@semaphore.gr>
Date: Wed, 5 Jun 2013 19:01:25 +0300
Subject: [PATCH 0905/1185] cpufreq: ondemand: Change the calculation of target
 frequency

commit dfa5bb622555d9da0df21b50f46ebdeef390041b upstream.

The ondemand governor calculates load in terms of frequency and
increases it only if load_freq is greater than up_threshold
multiplied by the current or average frequency.  This appears to
produce oscillations of frequency between min and max because,
for example, a relatively small load can easily saturate minimum
frequency and lead the CPU to the max.  Then, it will decrease
back to the min due to small load_freq.

Change the calculation method of load and target frequency on the
basis of the following two observations:

 - Load computation should not depend on the current or average
   measured frequency.  For example, absolute load of 80% at 100MHz
   is not necessarily equivalent to 8% at 1000MHz in the next
   sampling interval.

 - It should be possible to increase the target frequency to any
   value present in the frequency table proportional to the absolute
   load, rather than to the max only, so that:

   Target frequency = C * load

   where we take C = policy->cpuinfo.max_freq / 100.

Tested on Intel i7-3770 CPU @ 3.40GHz and on Quad core 1500MHz Krait.
Phoronix benchmark of Linux Kernel Compilation 3.1 test shows an
increase ~1.5% in performance. cpufreq_stats (time_in_state) shows
that middle frequencies are used more, with this patch.  Highest
and lowest frequencies were used less by ~9%.

[rjw: We have run multiple other tests on kernels with this
 change applied and in the vast majority of cases it turns out
 that the resulting performance improvement also leads to reduced
 consumption of energy.  The change is additionally justified by
 the overall simplification of the code in question.]

Signed-off-by: Stratos Karafotis <stratosk@semaphore.gr>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpufreq/cpufreq_governor.c | 10 +-------
 drivers/cpufreq/cpufreq_governor.h |  1 -
 drivers/cpufreq/cpufreq_ondemand.c | 39 ++++++------------------------
 3 files changed, 8 insertions(+), 42 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 28a0b32c73b3..27b0e2a295ea 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -97,7 +97,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 
 	policy = cdbs->cur_policy;
 
-	/* Get Absolute Load (in terms of freq for ondemand gov) */
+	/* Get Absolute Load */
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_common_info *j_cdbs;
 		u64 cur_wall_time, cur_idle_time;
@@ -148,14 +148,6 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 
 		load = 100 * (wall_time - idle_time) / wall_time;
 
-		if (dbs_data->cdata->governor == GOV_ONDEMAND) {
-			int freq_avg = __cpufreq_driver_getavg(policy, j);
-			if (freq_avg <= 0)
-				freq_avg = policy->cur;
-
-			load *= freq_avg;
-		}
-
 		if (load > max_load)
 			max_load = load;
 	}
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 0d9e6befe1d5..4a9058aeb57e 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -169,7 +169,6 @@ struct od_dbs_tuners {
 	unsigned int sampling_rate;
 	unsigned int sampling_down_factor;
 	unsigned int up_threshold;
-	unsigned int adj_up_threshold;
 	unsigned int powersave_bias;
 	unsigned int io_is_busy;
 };
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index c087347d6688..25438bbf96bb 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -29,11 +29,9 @@
 #include "cpufreq_governor.h"
 
 /* On-demand governor macros */
-#define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(10)
 #define DEF_FREQUENCY_UP_THRESHOLD		(80)
 #define DEF_SAMPLING_DOWN_FACTOR		(1)
 #define MAX_SAMPLING_DOWN_FACTOR		(100000)
-#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL	(3)
 #define MICRO_FREQUENCY_UP_THRESHOLD		(95)
 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
 #define MIN_FREQUENCY_UP_THRESHOLD		(11)
@@ -161,14 +159,10 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
 
 /*
  * Every sampling_rate, we check, if current idle time is less than 20%
- * (default), then we try to increase frequency. Every sampling_rate, we look
- * for the lowest frequency which can sustain the load while keeping idle time
- * over 30%. If such a frequency exist, we try to decrease to this frequency.
- *
- * Any frequency increase takes it to the maximum frequency. Frequency reduction
- * happens at minimum steps of 5% (default) of current frequency
+ * (default), then we try to increase frequency. Else, we adjust the frequency
+ * proportional to load.
  */
-static void od_check_cpu(int cpu, unsigned int load_freq)
+static void od_check_cpu(int cpu, unsigned int load)
 {
 	struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
 	struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
@@ -178,29 +172,17 @@ static void od_check_cpu(int cpu, unsigned int load_freq)
 	dbs_info->freq_lo = 0;
 
 	/* Check for frequency increase */
-	if (load_freq > od_tuners->up_threshold * policy->cur) {
+	if (load > od_tuners->up_threshold) {
 		/* If switching to max speed, apply sampling_down_factor */
 		if (policy->cur < policy->max)
 			dbs_info->rate_mult =
 				od_tuners->sampling_down_factor;
 		dbs_freq_increase(policy, policy->max);
 		return;
-	}
-
-	/* Check for frequency decrease */
-	/* if we cannot reduce the frequency anymore, break out early */
-	if (policy->cur == policy->min)
-		return;
-
-	/*
-	 * The optimal frequency is the frequency that is the lowest that can
-	 * support the current CPU usage without triggering the up policy. To be
-	 * safe, we focus 10 points under the threshold.
-	 */
-	if (load_freq < od_tuners->adj_up_threshold
-			* policy->cur) {
+	} else {
+		/* Calculate the next frequency proportional to load */
 		unsigned int freq_next;
-		freq_next = load_freq / od_tuners->adj_up_threshold;
+		freq_next = load * policy->cpuinfo.max_freq / 100;
 
 		/* No longer fully busy, reset rate_mult */
 		dbs_info->rate_mult = 1;
@@ -374,9 +356,6 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
 			input < MIN_FREQUENCY_UP_THRESHOLD) {
 		return -EINVAL;
 	}
-	/* Calculate the new adj_up_threshold */
-	od_tuners->adj_up_threshold += input;
-	od_tuners->adj_up_threshold -= od_tuners->up_threshold;
 
 	od_tuners->up_threshold = input;
 	return count;
@@ -525,8 +504,6 @@ static int od_init(struct dbs_data *dbs_data)
 	if (idle_time != -1ULL) {
 		/* Idle micro accounting is supported. Use finer thresholds */
 		tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
-		tuners->adj_up_threshold = MICRO_FREQUENCY_UP_THRESHOLD -
-			MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
 		/*
 		 * In nohz/micro accounting case we set the minimum frequency
 		 * not depending on HZ, but fixed (very low). The deferred
@@ -535,8 +512,6 @@ static int od_init(struct dbs_data *dbs_data)
 		dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
 	} else {
 		tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
-		tuners->adj_up_threshold = DEF_FREQUENCY_UP_THRESHOLD -
-			DEF_FREQUENCY_DOWN_DIFFERENTIAL;
 
 		/* For correct statistics, we need 10 ticks for each measure */
 		dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *

From f41c15f2c9a00489735036846ec7e474e52b14a6 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 9 Oct 2014 12:18:54 -0700
Subject: [PATCH 0906/1185] Linux 3.10.57

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 03bd927522f7..9df630a513b7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 56
+SUBLEVEL = 57
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From a9ba4285aa5722a3b4d84888e78ba8adc0046b28 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 10 Jun 2014 15:40:23 -0700
Subject: [PATCH 0907/1185] ARM: add seccomp syscall

Wires up the new seccomp syscall.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>

Conflicts:
	arch/arm/include/uapi/asm/unistd.h
	arch/arm/kernel/calls.S

Signed-off-by: Lee Campbell <leecam@chromium.org>
---
 arch/arm/include/asm/unistd.h      | 2 +-
 arch/arm/include/uapi/asm/unistd.h | 6 ++++++
 arch/arm/kernel/calls.S            | 5 +++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 141baa3f9a72..acabef1a75df 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include <uapi/asm/unistd.h>
 
-#define __NR_syscalls  (380)
+#define __NR_syscalls  (384)
 #define __ARM_NR_cmpxchg		(__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index af33b44990ed..17407c92c0da 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -406,6 +406,12 @@
 #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
 #define __NR_kcmp			(__NR_SYSCALL_BASE+378)
 #define __NR_finit_module		(__NR_SYSCALL_BASE+379)
+/* Reserve for later
+#define __NR_sched_setattr		(__NR_SYSCALL_BASE+380)
+#define __NR_sched_getattr		(__NR_SYSCALL_BASE+381)
+#define __NR_renameat2			(__NR_SYSCALL_BASE+382)
+*/
+#define __NR_seccomp			(__NR_SYSCALL_BASE+383)
 
 /*
  * This may need to be greater than __NR_last_syscall+1 in order to
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index c6ca7e376773..725f844926ea 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -389,6 +389,11 @@
 		CALL(sys_process_vm_writev)
 		CALL(sys_kcmp)
 		CALL(sys_finit_module)
+/* 380 */	CALL(sys_ni_syscall)		/* reserved sys_sched_setattr */
+		CALL(sys_ni_syscall)		/* reserved sys_sched_getattr */
+		CALL(sys_ni_syscall)		/* reserved sys_renameat2     */
+		CALL(sys_seccomp)
+
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted

From 41900903483eb96602dd72e719a798c208118aad Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 27 Jun 2014 17:01:47 +0100
Subject: [PATCH 0908/1185] ARM: 8087/1: ptrace: reload syscall number after
 secure_computing() check

On the syscall tracing path, we call out to secure_computing() to allow
seccomp to check the syscall number being attempted. As part of this, a
SIGTRAP may be sent to the tracer and the syscall could be re-written by
a subsequent SET_SYSCALL ptrace request. Unfortunately, this new syscall
is ignored by the current code unless TIF_SYSCALL_TRACE is also set on
the current thread.

This patch slightly reworks the enter path of the syscall tracing code
so that we always reload the syscall number from
current_thread_info()->syscall after the potential ptrace traps.

Acked-by: Kees Cook <keescook@chromium.org>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/ptrace.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeffd9f6d..394424b25254 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -916,7 +916,7 @@ enum ptrace_syscall_dir {
 	PTRACE_SYSCALL_EXIT,
 };
 
-static int tracehook_report_syscall(struct pt_regs *regs,
+static void tracehook_report_syscall(struct pt_regs *regs,
 				    enum ptrace_syscall_dir dir)
 {
 	unsigned long ip;
@@ -934,7 +934,6 @@ static int tracehook_report_syscall(struct pt_regs *regs,
 		current_thread_info()->syscall = -1;
 
 	regs->ARM_ip = ip;
-	return current_thread_info()->syscall;
 }
 
 asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
@@ -946,7 +945,9 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 		return -1;
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
-		scno = tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
+	scno = current_thread_info()->syscall;
 
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, scno);

From 5d71177430d317a321c39f6183853ee46616a9dc Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 12 Aug 2014 10:35:19 +0200
Subject: [PATCH 0909/1185] myri10ge: check for DMA mapping errors

[ Upstream commit 10545937e866ccdbb7ab583031dbdcc6b14e4eb4 ]

On IOMMU systems DMA mapping can fail, we need to check for
that possibility.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../net/ethernet/myricom/myri10ge/myri10ge.c  | 88 ++++++++++++-------
 1 file changed, 58 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 7be9788ed0f6..4fb93c5b5563 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -856,6 +856,10 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
 		return -ENOMEM;
 	dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE,
 				   DMA_BIDIRECTIONAL);
+	if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) {
+		__free_page(dmatest_page);
+		return -ENOMEM;
+	}
 
 	/* Run a small DMA test.
 	 * The magic multipliers to the length tell the firmware
@@ -1191,6 +1195,7 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 			int bytes, int watchdog)
 {
 	struct page *page;
+	dma_addr_t bus;
 	int idx;
 #if MYRI10GE_ALLOC_SIZE > 4096
 	int end_offset;
@@ -1215,11 +1220,21 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 					rx->watchdog_needed = 1;
 				return;
 			}
+
+			bus = pci_map_page(mgp->pdev, page, 0,
+					   MYRI10GE_ALLOC_SIZE,
+					   PCI_DMA_FROMDEVICE);
+			if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+				__free_pages(page, MYRI10GE_ALLOC_ORDER);
+				if (rx->fill_cnt - rx->cnt < 16)
+					rx->watchdog_needed = 1;
+				return;
+			}
+
 			rx->page = page;
 			rx->page_offset = 0;
-			rx->bus = pci_map_page(mgp->pdev, page, 0,
-					       MYRI10GE_ALLOC_SIZE,
-					       PCI_DMA_FROMDEVICE);
+			rx->bus = bus;
+
 		}
 		rx->info[idx].page = rx->page;
 		rx->info[idx].page_offset = rx->page_offset;
@@ -2576,6 +2591,35 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src,
 	mb();
 }
 
+static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp,
+				  struct myri10ge_tx_buf *tx, int idx)
+{
+	unsigned int len;
+	int last_idx;
+
+	/* Free any DMA resources we've alloced and clear out the skb slot */
+	last_idx = (idx + 1) & tx->mask;
+	idx = tx->req & tx->mask;
+	do {
+		len = dma_unmap_len(&tx->info[idx], len);
+		if (len) {
+			if (tx->info[idx].skb != NULL)
+				pci_unmap_single(mgp->pdev,
+						 dma_unmap_addr(&tx->info[idx],
+								bus), len,
+						 PCI_DMA_TODEVICE);
+			else
+				pci_unmap_page(mgp->pdev,
+					       dma_unmap_addr(&tx->info[idx],
+							      bus), len,
+					       PCI_DMA_TODEVICE);
+			dma_unmap_len_set(&tx->info[idx], len, 0);
+			tx->info[idx].skb = NULL;
+		}
+		idx = (idx + 1) & tx->mask;
+	} while (idx != last_idx);
+}
+
 /*
  * Transmit a packet.  We need to split the packet so that a single
  * segment does not cross myri10ge->tx_boundary, so this makes segment
@@ -2599,7 +2643,7 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
 	u32 low;
 	__be32 high_swapped;
 	unsigned int len;
-	int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
+	int idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
 	u16 pseudo_hdr_offset, cksum_offset, queue;
 	int cum_len, seglen, boundary, rdma_count;
 	u8 flags, odd_flag;
@@ -2696,9 +2740,12 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
 
 	/* map the skb for DMA */
 	len = skb_headlen(skb);
+	bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+	if (unlikely(pci_dma_mapping_error(mgp->pdev, bus)))
+		goto drop;
+
 	idx = tx->req & tx->mask;
 	tx->info[idx].skb = skb;
-	bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
 	dma_unmap_addr_set(&tx->info[idx], bus, bus);
 	dma_unmap_len_set(&tx->info[idx], len, len);
 
@@ -2797,12 +2844,16 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
 			break;
 
 		/* map next fragment for DMA */
-		idx = (count + tx->req) & tx->mask;
 		frag = &skb_shinfo(skb)->frags[frag_idx];
 		frag_idx++;
 		len = skb_frag_size(frag);
 		bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len,
 				       DMA_TO_DEVICE);
+		if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+			myri10ge_unmap_tx_dma(mgp, tx, idx);
+			goto drop;
+		}
+		idx = (count + tx->req) & tx->mask;
 		dma_unmap_addr_set(&tx->info[idx], bus, bus);
 		dma_unmap_len_set(&tx->info[idx], len, len);
 	}
@@ -2833,31 +2884,8 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 
 abort_linearize:
-	/* Free any DMA resources we've alloced and clear out the skb
-	 * slot so as to not trip up assertions, and to avoid a
-	 * double-free if linearizing fails */
+	myri10ge_unmap_tx_dma(mgp, tx, idx);
 
-	last_idx = (idx + 1) & tx->mask;
-	idx = tx->req & tx->mask;
-	tx->info[idx].skb = NULL;
-	do {
-		len = dma_unmap_len(&tx->info[idx], len);
-		if (len) {
-			if (tx->info[idx].skb != NULL)
-				pci_unmap_single(mgp->pdev,
-						 dma_unmap_addr(&tx->info[idx],
-								bus), len,
-						 PCI_DMA_TODEVICE);
-			else
-				pci_unmap_page(mgp->pdev,
-					       dma_unmap_addr(&tx->info[idx],
-							      bus), len,
-					       PCI_DMA_TODEVICE);
-			dma_unmap_len_set(&tx->info[idx], len, 0);
-			tx->info[idx].skb = NULL;
-		}
-		idx = (idx + 1) & tx->mask;
-	} while (idx != last_idx);
 	if (skb_is_gso(skb)) {
 		netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n");
 		goto drop;

From f2c58cc43693776826659b9840e39f0843b14016 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Thu, 14 Aug 2014 15:27:20 +0300
Subject: [PATCH 0910/1185] sit: Fix ipip6_tunnel_lookup device matching
 criteria

[ Upstream commit bc8fc7b8f825ef17a0fb9e68c18ce94fa66ab337 ]

As of 4fddbf5d78 ("sit: strictly restrict incoming traffic to tunnel link device"),
when looking up a tunnel, tunnel's underlying interface (t->parms.link)
is verified to match incoming traffic's ingress device.

However the comparison was incorrectly based on skb->dev->iflink.

Instead, dev->ifindex should be used, which correctly represents the
interface from which the IP stack hands the ipip6 packets.

This allows setting up sit tunnels bound to vlan interfaces (otherwise
incoming ipip6 traffic on the vlan interface was dropped due to
ipip6_tunnel_lookup match failure).

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/sit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8d22460a811b..4ddf67c6355b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -101,19 +101,19 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
 	for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
 		if (local == t->parms.iph.saddr &&
 		    remote == t->parms.iph.daddr &&
-		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+		    (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
 	for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
 		if (remote == t->parms.iph.daddr &&
-		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+		    (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
 	for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
 		if (local == t->parms.iph.saddr &&
-		    (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+		    (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}

From 5f80f4d8203911275c29edb54bd2e94826b21487 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 14 Aug 2014 12:40:05 -0400
Subject: [PATCH 0911/1185] tcp: fix tcp_release_cb() to dispatch via address
 family for mtu_reduced()

[ Upstream commit 4fab9071950c2021d846e18351e0f46a1cffd67b ]

Make sure we use the correct address-family-specific function for
handling MTU reductions from within tcp_release_cb().

Previously AF_INET6 sockets were incorrectly always using the IPv6
code path when sometimes they were handling IPv4 traffic and thus had
an IPv4 dst.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Diagnosed-by: Willem de Bruijn <willemb@google.com>
Fixes: 563d34d057862 ("tcp: dont drop MTU reduction indications")
Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/inet_connection_sock.h | 1 +
 include/net/sock.h                 | 1 -
 include/net/tcp.h                  | 1 +
 net/ipv4/tcp_ipv4.c                | 5 +++--
 net/ipv4/tcp_output.c              | 2 +-
 net/ipv6/tcp_ipv6.c                | 3 ++-
 6 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index de2c78529afa..0a8f6f961baa 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -62,6 +62,7 @@ struct inet_connection_sock_af_ops {
 	void	    (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
 	int	    (*bind_conflict)(const struct sock *sk,
 				     const struct inet_bind_bucket *tb, bool relax);
+	void	    (*mtu_reduced)(struct sock *sk);
 };
 
 /** inet_connection_sock - INET connection oriented sock
diff --git a/include/net/sock.h b/include/net/sock.h
index 26b15c0780be..c0aad07160ef 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -932,7 +932,6 @@ struct proto {
 						struct sk_buff *skb);
 
 	void		(*release_cb)(struct sock *sk);
-	void		(*mtu_reduced)(struct sock *sk);
 
 	/* Keeping track of sk's, looking them up, and port selection methods. */
 	void			(*hash)(struct sock *sk);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6f87f0873843..29a1a63cd303 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -460,6 +460,7 @@ extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
  */
 
 extern void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
+void tcp_v4_mtu_reduced(struct sock *sk);
 extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 extern struct sock * tcp_create_openreq_child(struct sock *sk,
 					      struct request_sock *req,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5d87806d3ade..e025c1c788a1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -268,7 +268,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
  * It can be called through tcp_release_cb() if socket was owned by user
  * at the time tcp_v4_err() was called to handle ICMP message.
  */
-static void tcp_v4_mtu_reduced(struct sock *sk)
+void tcp_v4_mtu_reduced(struct sock *sk)
 {
 	struct dst_entry *dst;
 	struct inet_sock *inet = inet_sk(sk);
@@ -298,6 +298,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
 		tcp_simple_retransmit(sk);
 	} /* else let the usual retransmit timer handle it */
 }
+EXPORT_SYMBOL(tcp_v4_mtu_reduced);
 
 static void do_redirect(struct sk_buff *skb, struct sock *sk)
 {
@@ -2142,6 +2143,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
 	.compat_setsockopt = compat_ip_setsockopt,
 	.compat_getsockopt = compat_ip_getsockopt,
 #endif
+	.mtu_reduced	   = tcp_v4_mtu_reduced,
 };
 EXPORT_SYMBOL(ipv4_specific);
 
@@ -2867,7 +2869,6 @@ struct proto tcp_prot = {
 	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v4_do_rcv,
 	.release_cb		= tcp_release_cb,
-	.mtu_reduced		= tcp_v4_mtu_reduced,
 	.hash			= inet_hash,
 	.unhash			= inet_unhash,
 	.get_port		= inet_csk_get_port,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 56e29f0e230e..62aff23d19d1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -775,7 +775,7 @@ void tcp_release_cb(struct sock *sk)
 		__sock_put(sk);
 	}
 	if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
-		sk->sk_prot->mtu_reduced(sk);
+		inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
 		__sock_put(sk);
 	}
 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 66c718854e5a..1a87659a6139 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1651,6 +1651,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
 	.compat_setsockopt = compat_ipv6_setsockopt,
 	.compat_getsockopt = compat_ipv6_getsockopt,
 #endif
+	.mtu_reduced	   = tcp_v6_mtu_reduced,
 };
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -1682,6 +1683,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
 	.compat_setsockopt = compat_ipv6_setsockopt,
 	.compat_getsockopt = compat_ipv6_getsockopt,
 #endif
+	.mtu_reduced	   = tcp_v4_mtu_reduced,
 };
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -1919,7 +1921,6 @@ struct proto tcpv6_prot = {
 	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v6_do_rcv,
 	.release_cb		= tcp_release_cb,
-	.mtu_reduced		= tcp_v6_mtu_reduced,
 	.hash			= tcp_v6_hash,
 	.unhash			= inet_unhash,
 	.get_port		= inet_csk_get_port,

From 4035ed7bbef045a41c9f2876e78d9b6fb687ac3a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 15 Aug 2014 09:16:04 -0700
Subject: [PATCH 0912/1185] packet: handle too big packets for PACKET_V3

[ Upstream commit dc808110bb62b64a448696ecac3938902c92e1ab ]

af_packet can currently overwrite kernel memory by out of bound
accesses, because it assumed a [new] block can always hold one frame.

This is not generally the case, even if most existing tools do it right.

This patch clamps too long frames as API permits, and issue a one time
error on syslog.

[  394.357639] tpacket_rcv: packet too big, clamped from 5042 to 3966. macoff=82

In this example, packet header tp_snaplen was set to 3966,
and tp_len was set to 5042 (skb->len)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.")
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 17 +++++++++++++++++
 net/packet/internal.h  |  1 +
 2 files changed, 18 insertions(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e8b5a0dfca21..81b4b816f131 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -565,6 +565,7 @@ static void init_prb_bdqc(struct packet_sock *po,
 	p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
 	p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
 
+	p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
 	prb_init_ft_ops(p1, req_u);
 	prb_setup_retire_blk_timer(po, tx_ring);
 	prb_open_block(p1, pbd);
@@ -1803,6 +1804,18 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 			if ((int)snaplen < 0)
 				snaplen = 0;
 		}
+	} else if (unlikely(macoff + snaplen >
+			    GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
+		u32 nval;
+
+		nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
+		pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
+			    snaplen, nval, macoff);
+		snaplen = nval;
+		if (unlikely((int)snaplen < 0)) {
+			snaplen = 0;
+			macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+		}
 	}
 	spin_lock(&sk->sk_receive_queue.lock);
 	h.raw = packet_current_rx_frame(po, skb,
@@ -3642,6 +3655,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 			goto out;
 		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
 			goto out;
+		if (po->tp_version >= TPACKET_V3 &&
+		    (int)(req->tp_block_size -
+			  BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0)
+			goto out;
 		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
 					po->tp_reserve))
 			goto out;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 1035fa2d909c..ca086c0c2c08 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -29,6 +29,7 @@ struct tpacket_kbdq_core {
 	char		*pkblk_start;
 	char		*pkblk_end;
 	int		kblk_size;
+	unsigned int	max_frame_len;
 	unsigned int	knum_blocks;
 	uint64_t	knxt_seq_num;
 	char		*prev;

From 522ad79b7fd042f7f735bf1826f2e5b564d8f015 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 21 Aug 2014 21:33:44 +0200
Subject: [PATCH 0913/1185] openvswitch: fix panic with multiple vlan headers

[ Upstream commit 2ba5af42a7b59ef01f9081234d8855140738defd ]

When there are multiple vlan headers present in a received frame, the first
one is put into vlan_tci and protocol is set to ETH_P_8021Q. Anything in the
skb beyond the VLAN TPID may be still non-linear, including the inner TCI
and ethertype. While ovs_flow_extract takes care of IP and IPv6 headers, it
does nothing with ETH_P_8021Q. Later, if OVS_ACTION_ATTR_POP_VLAN is
executed, __pop_vlan_tci pulls the next vlan header into vlan_tci.

This leads to two things:

1. Part of the resulting ethernet header is in the non-linear part of the
   skb. When eth_type_trans is called later as the result of
   OVS_ACTION_ATTR_OUTPUT, kernel BUGs in __skb_pull. Also, __pop_vlan_tci
   is in fact accessing random data when it reads past the TPID.

2. network_header points into the ethernet header instead of behind it.
   mac_len is set to a wrong value (10), too.

Reported-by: Yulong Pei <ypei@redhat.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/openvswitch/actions.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 894b6cbdd929..c4779ca59032 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -40,6 +40,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 
 static int make_writable(struct sk_buff *skb, int write_len)
 {
+	if (!pskb_may_pull(skb, write_len))
+		return -ENOMEM;
+
 	if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
 		return 0;
 
@@ -68,6 +71,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 
 	vlan_set_encap_proto(skb, vhdr);
 	skb->mac_header += VLAN_HLEN;
+	if (skb_network_offset(skb) < ETH_HLEN)
+		skb_set_network_header(skb, ETH_HLEN);
 	skb_reset_mac_len(skb);
 
 	return 0;

From 696c5d5f340f57bf1085bc7cc95937d2349988cb Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 3 Sep 2014 14:12:55 +0200
Subject: [PATCH 0914/1185] l2tp: fix race while getting PMTU on PPP
 pseudo-wire

[ Upstream commit eed4d839b0cdf9d84b0a9bc63de90fd5e1e886fb ]

Use dst_entry held by sk_dst_get() to retrieve tunnel's PMTU.

The dst_mtu(__sk_dst_get(tunnel->sock)) call was racy. __sk_dst_get()
could return NULL if tunnel->sock->sk_dst_cache was reset just before the
call, thus making dst_mtu() dereference a NULL pointer:

[ 1937.661598] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
[ 1937.664005] IP: [<ffffffffa049db88>] pppol2tp_connect+0x33d/0x41e [l2tp_ppp]
[ 1937.664005] PGD daf0c067 PUD d9f93067 PMD 0
[ 1937.664005] Oops: 0000 [#1] SMP
[ 1937.664005] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6table_filter ip6_tables iptable_filter ip_tables ebtable_nat ebtables x_tables udp_tunnel pppoe pppox ppp_generic slhc deflate ctr twofish_generic twofish_x86_64_3way xts lrw gf128mul glue_helper twofish_x86_64 twofish_common blowfish_generic blowfish_x86_64 blowfish_common des_generic cbc xcbc rmd160 sha512_generic hmac crypto_null af_key xfrm_algo 8021q garp bridge stp llc tun atmtcp clip atm ext3 mbcache jbd iTCO_wdt coretemp kvm_intel iTCO_vendor_support kvm pcspkr evdev ehci_pci lpc_ich mfd_core i5400_edac edac_core i5k_amb shpchp button processor thermal_sys xfs crc32c_generic libcrc32c dm_mod usbhid sg hid sr_mod sd_mod cdrom crc_t10dif crct10dif_common ata_generic ahci ata_piix tg3 libahci libata uhci_hcd ptp ehci_hcd pps_core usbcore scsi_mod libphy usb_common [last unloaded: l2tp_core]
[ 1937.664005] CPU: 0 PID: 10022 Comm: l2tpstress Tainted: G           O   3.17.0-rc1 #1
[ 1937.664005] Hardware name: HP ProLiant DL160 G5, BIOS O12 08/22/2008
[ 1937.664005] task: ffff8800d8fda790 ti: ffff8800c43c4000 task.ti: ffff8800c43c4000
[ 1937.664005] RIP: 0010:[<ffffffffa049db88>]  [<ffffffffa049db88>] pppol2tp_connect+0x33d/0x41e [l2tp_ppp]
[ 1937.664005] RSP: 0018:ffff8800c43c7de8  EFLAGS: 00010282
[ 1937.664005] RAX: ffff8800da8a7240 RBX: ffff8800d8c64600 RCX: 000001c325a137b5
[ 1937.664005] RDX: 8c6318c6318c6320 RSI: 000000000000010c RDI: 0000000000000000
[ 1937.664005] RBP: ffff8800c43c7ea8 R08: 0000000000000000 R09: 0000000000000000
[ 1937.664005] R10: ffffffffa048e2c0 R11: ffff8800d8c64600 R12: ffff8800ca7a5000
[ 1937.664005] R13: ffff8800c439bf40 R14: 000000000000000c R15: 0000000000000009
[ 1937.664005] FS:  00007fd7f610f700(0000) GS:ffff88011a600000(0000) knlGS:0000000000000000
[ 1937.664005] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 1937.664005] CR2: 0000000000000020 CR3: 00000000d9d75000 CR4: 00000000000027e0
[ 1937.664005] Stack:
[ 1937.664005]  ffffffffa049da80 ffff8800d8fda790 000000000000005b ffff880000000009
[ 1937.664005]  ffff8800daf3f200 0000000000000003 ffff8800c43c7e48 ffffffff81109b57
[ 1937.664005]  ffffffff81109b0e ffffffff8114c566 0000000000000000 0000000000000000
[ 1937.664005] Call Trace:
[ 1937.664005]  [<ffffffffa049da80>] ? pppol2tp_connect+0x235/0x41e [l2tp_ppp]
[ 1937.664005]  [<ffffffff81109b57>] ? might_fault+0x9e/0xa5
[ 1937.664005]  [<ffffffff81109b0e>] ? might_fault+0x55/0xa5
[ 1937.664005]  [<ffffffff8114c566>] ? rcu_read_unlock+0x1c/0x26
[ 1937.664005]  [<ffffffff81309196>] SYSC_connect+0x87/0xb1
[ 1937.664005]  [<ffffffff813e56f7>] ? sysret_check+0x1b/0x56
[ 1937.664005]  [<ffffffff8107590d>] ? trace_hardirqs_on_caller+0x145/0x1a1
[ 1937.664005]  [<ffffffff81213dee>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[ 1937.664005]  [<ffffffff8114c262>] ? spin_lock+0x9/0xb
[ 1937.664005]  [<ffffffff813092b4>] SyS_connect+0x9/0xb
[ 1937.664005]  [<ffffffff813e56d2>] system_call_fastpath+0x16/0x1b
[ 1937.664005] Code: 10 2a 84 81 e8 65 76 bd e0 65 ff 0c 25 10 bb 00 00 4d 85 ed 74 37 48 8b 85 60 ff ff ff 48 8b 80 88 01 00 00 48 8b b8 10 02 00 00 <48> 8b 47 20 ff 50 20 85 c0 74 0f 83 e8 28 89 83 10 01 00 00 89
[ 1937.664005] RIP  [<ffffffffa049db88>] pppol2tp_connect+0x33d/0x41e [l2tp_ppp]
[ 1937.664005]  RSP <ffff8800c43c7de8>
[ 1937.664005] CR2: 0000000000000020
[ 1939.559375] ---[ end trace 82d44500f28f8708 ]---

Fixes: f34c4a35d879 ("l2tp: take PMTU from tunnel UDP socket")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/l2tp/l2tp_ppp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 164fa9dcd97d..c3ae2411650c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -756,7 +756,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	/* If PMTU discovery was enabled, use the MTU that was discovered */
 	dst = sk_dst_get(tunnel->sock);
 	if (dst != NULL) {
-		u32 pmtu = dst_mtu(__sk_dst_get(tunnel->sock));
+		u32 pmtu = dst_mtu(dst);
+
 		if (pmtu != 0)
 			session->mtu = session->mru = pmtu -
 				PPPOL2TP_HEADER_OVERHEAD;

From 654850db1159769fc8a1c3c26ee07cd25abbddb0 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Thu, 18 Sep 2014 10:31:17 -0400
Subject: [PATCH 0915/1185] tg3: Work around HW/FW limitations with vlan
 encapsulated frames

[ Upstream commit 476c18850c6cbaa3f2bb661ae9710645081563b9 ]

TG3 appears to have an issue performing TSO and checksum offloading
correclty when the frame has been vlan encapsulated (non-accelrated).
In these cases, tcp checksum is not correctly updated.

This patch attempts to work around this issue.  After the patch,
802.1ad vlans start working correctly over tg3 devices.

CC: Prashant Sreedharan <prashant@broadcom.com>
CC: Michael Chan <mchan@broadcom.com>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/tg3.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 4942ddf9c8ae..a0e1901c4ac4 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7759,8 +7759,6 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	entry = tnapi->tx_prod;
 	base_flags = 0;
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		base_flags |= TXD_FLAG_TCPUDP_CSUM;
 
 	mss = skb_shinfo(skb)->gso_size;
 	if (mss) {
@@ -7776,6 +7774,13 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb) - ETH_HLEN;
 
+		/* HW/FW can not correctly segment packets that have been
+		 * vlan encapsulated.
+		 */
+		if (skb->protocol == htons(ETH_P_8021Q) ||
+		    skb->protocol == htons(ETH_P_8021AD))
+			return tg3_tso_bug(tp, skb);
+
 		if (!skb_is_gso_v6(skb)) {
 			iph->check = 0;
 			iph->tot_len = htons(mss + hdr_len);
@@ -7822,6 +7827,17 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 				base_flags |= tsflags << 12;
 			}
 		}
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		/* HW/FW can not correctly checksum packets that have been
+		 * vlan encapsulated.
+		 */
+		if (skb->protocol == htons(ETH_P_8021Q) ||
+		    skb->protocol == htons(ETH_P_8021AD)) {
+			if (skb_checksum_help(skb))
+				goto drop;
+		} else  {
+			base_flags |= TXD_FLAG_TCPUDP_CSUM;
+		}
 	}
 
 	if (tg3_flag(tp, USE_JUMBO_BDFLAG) &&

From 58345c2e8626ac1ee93723c771f9f11ecdf125ba Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 30 Sep 2014 19:39:36 -0400
Subject: [PATCH 0916/1185] tg3: Allow for recieve of full-size 8021AD frames

[ Upstream commit 7d3083ee36b51e425b6abd76778a2046906b0fd3 ]

When receiving a vlan-tagged frame that still contains
a vlan header, the length of the packet will be greater
then MTU+ETH_HLEN since it will account of the extra
vlan header.  TG3 checks this for the case for 802.1Q,
but not for 802.1ad.  As a result, full sized 802.1ad
frames get dropped by the card.

Add a check for 802.1ad protocol when receving full
sized frames.

Suggested-by: Prashant Sreedharan <prashant@broadcom.com>
CC: Prashant Sreedharan <prashant@broadcom.com>
CC: Michael Chan <mchan@broadcom.com>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/tg3.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a0e1901c4ac4..3de4069f020e 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6767,7 +6767,8 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 		skb->protocol = eth_type_trans(skb, tp->dev);
 
 		if (len > (tp->dev->mtu + ETH_HLEN) &&
-		    skb->protocol != htons(ETH_P_8021Q)) {
+		    skb->protocol != htons(ETH_P_8021Q) &&
+		    skb->protocol != htons(ETH_P_8021AD)) {
 			dev_kfree_skb(skb);
 			goto drop_it_no_recycle;
 		}

From f9aceca39d40c373f5f1a0866059f8347101a3d5 Mon Sep 17 00:00:00 2001
From: KY Srinivasan <kys@microsoft.com>
Date: Sun, 28 Sep 2014 22:16:43 -0700
Subject: [PATCH 0917/1185] hyperv: Fix a bug in netvsc_start_xmit()

[ Upstream commit dedb845ded56ded1c62f5398a94ffa8615d4592d ]

After the packet is successfully sent, we should not touch the skb
as it may have been freed. This patch is based on the work done by
Long Li <longli@microsoft.com>.

In this version of the patch I have fixed issues pointed out by David.
David, please queue this up for stable.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Tested-by: Long Li <longli@microsoft.com>
Tested-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/hyperv/netvsc_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index aea78fc2e48f..59e9c56e5b8a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -138,6 +138,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	struct hv_netvsc_packet *packet;
 	int ret;
 	unsigned int i, num_pages, npg_data;
+	u32 skb_length = skb->len;
 
 	/* Add multipages for skb->data and additional 2 for RNDIS */
 	npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
@@ -208,7 +209,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	ret = rndis_filter_send(net_device_ctx->device_ctx,
 				  packet);
 	if (ret == 0) {
-		net->stats.tx_bytes += skb->len;
+		net->stats.tx_bytes += skb_length;
 		net->stats.tx_packets++;
 	} else {
 		kfree(packet);

From ae3b8ed5dadbb278498090b18ee70e6211ef2763 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 2 Oct 2014 18:26:49 +0200
Subject: [PATCH 0918/1185] ip6_gre: fix flowi6_proto value in xmit path

[ Upstream commit 3be07244b7337760a3269d56b2f4a63e72218648 ]

In xmit path, we build a flowi6 which will be used for the output route lookup.
We are sending a GRE packet, neither IPv4 nor IPv6 encapsulated packet, thus the
protocol should be IPPROTO_GRE.

Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
Reported-by: Matthieu Ternisien d'Ouville <matthieu.tdo@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_gre.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 7dca7c43fdf1..250a73e77f57 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -787,7 +787,7 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
 		encap_limit = t->parms.encap_limit;
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-	fl6.flowi6_proto = IPPROTO_IPIP;
+	fl6.flowi6_proto = IPPROTO_GRE;
 
 	dsfield = ipv4_get_dsfield(iph);
 
@@ -837,7 +837,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
 		encap_limit = t->parms.encap_limit;
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-	fl6.flowi6_proto = IPPROTO_IPV6;
+	fl6.flowi6_proto = IPPROTO_GRE;
 
 	dsfield = ipv6_get_dsfield(ipv6h);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)

From 2d435f096dd8618919b86c3575aeb0815bf799b4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Fri, 3 Oct 2014 18:16:20 -0400
Subject: [PATCH 0919/1185] sctp: handle association restarts when the socket
 is closed.

[ Upstream commit bdf6fa52f01b941d4a80372d56de465bdbbd1d23 ]

Currently association restarts do not take into consideration the
state of the socket.  When a restart happens, the current assocation
simply transitions into established state.  This creates a condition
where a remote system, through a the restart procedure, may create a
local association that is no way reachable by user.  The conditions
to trigger this are as follows:
  1) Remote does not acknoledge some data causing data to remain
     outstanding.
  2) Local application calls close() on the socket.  Since data
     is still outstanding, the association is placed in SHUTDOWN_PENDING
     state.  However, the socket is closed.
  3) The remote tries to create a new association, triggering a restart
     on the local system.  The association moves from SHUTDOWN_PENDING
     to ESTABLISHED.  At this point, it is no longer reachable by
     any socket on the local system.

This patch addresses the above situation by moving the newly ESTABLISHED
association into SHUTDOWN-SENT state and bundling a SHUTDOWN after
the COOKIE-ACK chunk.  This way, the restarted associate immidiately
enters the shutdown procedure and forces the termination of the
unreachable association.

Reported-by: David Laight <David.Laight@aculab.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sctp/command.h |  2 +-
 net/sctp/sm_statefuns.c    | 19 ++++++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 35247271e557..5f39c1cc0766 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -118,7 +118,7 @@ typedef enum {
  * analysis of the state functions, but in reality just taken from
  * thin air in the hopes othat we don't trigger a kernel panic.
  */
-#define SCTP_MAX_NUM_COMMANDS 14
+#define SCTP_MAX_NUM_COMMANDS 20
 
 typedef union {
 	__s32 i32;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 6eb26403de6a..edc204b05c82 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1782,9 +1782,22 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
 	/* Update the content of current association. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
 	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
-	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
-			SCTP_STATE(SCTP_STATE_ESTABLISHED));
-	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+	if (sctp_state(asoc, SHUTDOWN_PENDING) &&
+	    (sctp_sstate(asoc->base.sk, CLOSING) ||
+	     sock_flag(asoc->base.sk, SOCK_DEAD))) {
+		/* if were currently in SHUTDOWN_PENDING, but the socket
+		 * has been closed by user, don't transition to ESTABLISHED.
+		 * Instead trigger SHUTDOWN bundled with COOKIE_ACK.
+		 */
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+		return sctp_sf_do_9_2_start_shutdown(net, ep, asoc,
+						     SCTP_ST_CHUNK(0), NULL,
+						     commands);
+	} else {
+		sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+				SCTP_STATE(SCTP_STATE_ESTABLISHED));
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+	}
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem_ev:

From dc5a170002bd45394f1a483e7af6d91b01e91dca Mon Sep 17 00:00:00 2001
From: Per Hurtig <per.hurtig@kau.se>
Date: Thu, 12 Jun 2014 17:08:32 +0200
Subject: [PATCH 0920/1185] tcp: fixing TLP's FIN recovery

[ Upstream commit bef1909ee3ed1ca39231b260a8d3b4544ecd0c8f ]

Fix to a problem observed when losing a FIN segment that does not
contain data.  In such situations, TLP is unable to recover from
*any* tail loss and instead adds at least PTO ms to the
retransmission process, i.e., RTO = RTO + PTO.

Signed-off-by: Per Hurtig <per.hurtig@kau.se>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_output.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 62aff23d19d1..11ef25c9cf43 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2036,9 +2036,7 @@ void tcp_send_loss_probe(struct sock *sk)
 	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
 		goto rearm_timer;
 
-	/* Probe with zero data doesn't trigger fast recovery. */
-	if (skb->len > 0)
-		err = __tcp_retransmit_skb(sk, skb);
+	err = __tcp_retransmit_skb(sk, skb);
 
 	/* Record snd_nxt for loss detection. */
 	if (likely(!err))

From 8eb99ef81326fae50f26974e33fc1a7c07cb00f5 Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Fri, 24 Jan 2014 16:29:11 +0800
Subject: [PATCH 0921/1185] ipv6: reallocate addrconf router for ipv6 address
 when lo device up

[ Upstream commit 33d99113b1102c2d2f8603b9ba72d89d915c13f5 ]

commit 25fb6ca4ed9cad72f14f61629b68dc03c0d9713f
"net IPv6 : Fix broken IPv6 routing table after loopback down-up"
allocates addrconf router for ipv6 address when lo device up.
but commit a881ae1f625c599b460cc8f8a7fcb1c438f699ad
"ipv6:don't call addrconf_dst_alloc again when enable lo" breaks
this behavior.

Since the addrconf router is moved to the garbage list when
lo device down, we should release this router and rellocate
a new one for ipv6 address when lo device up.

This patch solves bug 67951 on bugzilla
https://bugzilla.kernel.org/show_bug.cgi?id=67951

change from v1:
use ip6_rt_put to repleace ip6_del_rt, thanks Hannes!
change code style, suggested by Sergei.

CC: Sabrina Dubroca <sd@queasysnail.net>
CC: Hannes Frederic Sowa <hannes@stressinduktion.org>
Reported-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/addrconf.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7bcdd0df68db..d0912acd9522 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2691,8 +2691,18 @@ static void init_loopback(struct net_device *dev)
 			if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
 				continue;
 
-			if (sp_ifa->rt)
-				continue;
+			if (sp_ifa->rt) {
+				/* This dst has been added to garbage list when
+				 * lo device down, release this obsolete dst and
+				 * reallocate a new router for ifa.
+				 */
+				if (sp_ifa->rt->dst.obsolete > 0) {
+					ip6_rt_put(sp_ifa->rt);
+					sp_ifa->rt = NULL;
+				} else {
+					continue;
+				}
+			}
 
 			sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0);
 

From 084a2fd4fb44bc3a72e6a091b6f473e8b6176c77 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Fri, 19 Sep 2014 10:13:50 +0800
Subject: [PATCH 0922/1185] USB: Add device quirk for ASUS T100 Base Station
 keyboard

commit ddbe1fca0bcb87ca8c199ea873a456ca8a948567 upstream.

This full-speed USB device generates spurious remote wakeup event
as soon as USB_DEVICE_REMOTE_WAKEUP feature is set. As the result,
Linux can't enter system suspend and S0ix power saving modes once
this keyboard is used.

This patch tries to introduce USB_QUIRK_IGNORE_REMOTE_WAKEUP quirk.
With this quirk set, wakeup capability will be ignored during
device configure.

This patch could be back-ported to kernels as old as 2.6.39.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c     | 6 ++++--
 drivers/usb/core/quirks.c  | 4 ++++
 include/linux/usb/quirks.h | 3 +++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b5d42fee8a84..c9f56ffdba9a 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1951,8 +1951,10 @@ void usb_set_device_state(struct usb_device *udev,
 					|| new_state == USB_STATE_SUSPENDED)
 				;	/* No change to wakeup settings */
 			else if (new_state == USB_STATE_CONFIGURED)
-				wakeup = udev->actconfig->desc.bmAttributes
-					 & USB_CONFIG_ATT_WAKEUP;
+				wakeup = (udev->quirks &
+					USB_QUIRK_IGNORE_REMOTE_WAKEUP) ? 0 :
+					udev->actconfig->desc.bmAttributes &
+					USB_CONFIG_ATT_WAKEUP;
 			else
 				wakeup = 0;
 		}
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 1053eb651b2f..a301b3fa622b 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -162,6 +162,10 @@ static const struct usb_device_id usb_interface_quirk_list[] = {
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x046d, USB_CLASS_VIDEO, 1, 0),
 	  .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* ASUS Base Station(T100) */
+	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
 	{ }  /* terminating entry must be last */
 };
 
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 52f944dfe2fd..49587dc22f5d 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -30,4 +30,7 @@
    descriptor */
 #define USB_QUIRK_DELAY_INIT		0x00000040
 
+/* device generates spurious wakeup, ignore remote wakeup capability */
+#define USB_QUIRK_IGNORE_REMOTE_WAKEUP	0x00000200
+
 #endif /* __LINUX_USB_QUIRKS_H */

From 8ea18089585657066fc23f1ab8cad6f1fa5e529e Mon Sep 17 00:00:00 2001
From: Joe Savage <joe.savage@goketra.com>
Date: Sat, 20 Sep 2014 08:01:16 -0500
Subject: [PATCH 0923/1185] USB: serial: cp210x: added Ketra N1 wireless
 interface support

commit bfc2d7dfdd761ae3beccdb26abebe03cef042f46 upstream.

Added support for Ketra N1 wireless interface, which uses the
Silicon Labs' CP2104 USB to UART bridge with customized PID 8946.

Signed-off-by: Joe Savage <joe.savage@goketra.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index b14379659e35..163e63ce8fcd 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -122,6 +122,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */
 	{ USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
 	{ USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
+	{ USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */
 	{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */

From a17c0d3ba5d6b06bb6cb3efc9418c4375c10f26c Mon Sep 17 00:00:00 2001
From: Andreas Bomholtz <andreas@seluxit.com>
Date: Mon, 22 Sep 2014 09:50:43 +0200
Subject: [PATCH 0924/1185] USB: cp210x: add support for Seluxit USB dongle

commit dee80ad12d2b1b304286a707fde7ab05d1fc7bab upstream.

Added the Seluxit ApS USB Serial Dongle to cp210x driver.

Signed-off-by: Andreas Bomholtz <andreas@seluxit.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 163e63ce8fcd..b22a4bc308e2 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -156,6 +156,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
 	{ USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */
 	{ USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */
+	{ USB_DEVICE(0x1D6F, 0x0010) }, /* Seluxit ApS RF Dongle */
 	{ USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */
 	{ USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */
 	{ USB_DEVICE(0x1FB9, 0x0100) }, /* Lake Shore Model 121 Current Source */

From 0b9c3a46536a5be91175e280882f74890e617eef Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 15 Oct 2014 08:32:29 +0200
Subject: [PATCH 0925/1185] Linux 3.10.58

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 9df630a513b7..c27454b8ca3e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 57
+SUBLEVEL = 58
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From 5ffb57932ec09ebc9956a1c74f703ca8389e3a90 Mon Sep 17 00:00:00 2001
From: Ruchi Kandoi <kandoiruchi@google.com>
Date: Tue, 14 Oct 2014 17:43:21 -0700
Subject: [PATCH 0926/1185] power: Avoids bogus error messages for the suspend
 aborts.

Avoids printing bogus error message "tasks refusing to freeze", in cases
where pending wakeup source caused the suspend abort.

Signed-off-by: Ruchi Kandoi <kandoiruchi@google.com>
Change-Id: I913ad290f501b31cd536d039834c8d24c6f16928
---
 kernel/power/process.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/kernel/power/process.c b/kernel/power/process.c
index fc0df8486449..d26dcb5dff87 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -82,23 +82,24 @@ static int try_to_freeze_tasks(bool user_only)
 	do_div(elapsed_msecs64, NSEC_PER_MSEC);
 	elapsed_msecs = elapsed_msecs64;
 
-	if (todo) {
+	if (wakeup) {
 		printk("\n");
-		printk(KERN_ERR "Freezing of tasks %s after %d.%03d seconds "
-		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
-		       wakeup ? "aborted" : "failed",
+		printk(KERN_ERR "Freezing of tasks aborted after %d.%03d seconds",
+		       elapsed_msecs / 1000, elapsed_msecs % 1000);
+	} else if (todo) {
+		printk("\n");
+		printk(KERN_ERR "Freezing of tasks failed after %d.%03d seconds"
+		       " (%d tasks refusing to freeze, wq_busy=%d):\n",
 		       elapsed_msecs / 1000, elapsed_msecs % 1000,
 		       todo - wq_busy, wq_busy);
 
-		if (!wakeup) {
-			read_lock(&tasklist_lock);
-			do_each_thread(g, p) {
-				if (p != current && !freezer_should_skip(p)
-				    && freezing(p) && !frozen(p))
-					sched_show_task(p);
-			} while_each_thread(g, p);
-			read_unlock(&tasklist_lock);
-		}
+		read_lock(&tasklist_lock);
+		do_each_thread(g, p) {
+			if (p != current && !freezer_should_skip(p)
+			    && freezing(p) && !frozen(p))
+				sched_show_task(p);
+		} while_each_thread(g, p);
+		read_unlock(&tasklist_lock);
 	} else {
 		printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000,
 			elapsed_msecs % 1000);

From 13224a7d4e4e4ac6ade52deec32e9f9c28533659 Mon Sep 17 00:00:00 2001
From: Ruchi Kandoi <kandoiruchi@google.com>
Date: Tue, 21 Oct 2014 13:55:04 -0700
Subject: [PATCH 0927/1185] cpufreq: Avoid using global variable total_cpus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The change is to compile on kernels where cpufreq stats are compiled as
a module (CONFIG_CPU_FREQ_STAT=m), because total_cpus is not exported for
module use.

Reported-By: Emilio López <elopez93@gmail.com>
Signed-off-by: Ruchi Kandoi <kandoiruchi@google.com>
Change-Id: I4f3c74f0fac5e8d9449655b26bf3b407b0fe4290
---
 drivers/cpufreq/cpufreq_stats.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 7a2bcac3ad7f..d811f5d4b32b 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -282,19 +282,19 @@ static void cpufreq_stats_free_sysfs(unsigned int cpu)
 
 static void cpufreq_allstats_free(void)
 {
-	int i;
+	int cpu;
 	struct all_cpufreq_stats *all_stat;
 
 	sysfs_remove_file(cpufreq_global_kobject,
 						&_attr_all_time_in_state.attr);
 
-	for (i = 0; i < total_cpus; i++) {
-		all_stat = per_cpu(all_cpufreq_stats, i);
+	for_each_possible_cpu(cpu) {
+		all_stat = per_cpu(all_cpufreq_stats, cpu);
 		if (!all_stat)
 			continue;
 		kfree(all_stat->time_in_state);
 		kfree(all_stat);
-		per_cpu(all_cpufreq_stats, i) = NULL;
+		per_cpu(all_cpufreq_stats, cpu) = NULL;
 	}
 	if (all_freq_table) {
 		kfree(all_freq_table->freq_table);

From 3774b37a724bbdd3811f677b991d66b204328257 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 2 May 2014 16:24:11 +0100
Subject: [PATCH 0928/1185] arm64: barriers: wire up new barrier options

Now that all callers of the barrier macros are updated to pass the
mandatory options, update the macros so the option is actually used.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 493e68747e07b69da3d746352525a1ebd6b61d82)
Signed-off-by: Mark Brown <broonie@kernel.org>

Conflicts:
	arch/arm64/include/asm/barrier.h
---
 arch/arm64/include/asm/barrier.h | 48 +++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index c98d0a88916a..9a4c3d5b402e 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -25,21 +25,55 @@
 #define wfi()		asm volatile("wfi" : : : "memory")
 
 #define isb()		asm volatile("isb" : : : "memory")
-#define dmb(opt)	asm volatile("dmb sy" : : : "memory")
-#define dsb(opt)	asm volatile("dsb sy" : : : "memory")
+#define dmb(opt)	asm volatile("dmb " #opt : : : "memory")
+#define dsb(opt)	asm volatile("dsb " #opt : : : "memory")
 
 #define mb()		dsb(sy)
-#define rmb()		asm volatile("dsb ld" : : : "memory")
-#define wmb()		asm volatile("dsb st" : : : "memory")
+#define rmb()		dsb(ld)
+#define wmb()		dsb(st)
 
 #ifndef CONFIG_SMP
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #else
-#define smp_mb()	asm volatile("dmb ish" : : : "memory")
-#define smp_rmb()	asm volatile("dmb ishld" : : : "memory")
-#define smp_wmb()	asm volatile("dmb ishst" : : : "memory")
+
+#define smp_mb()	dmb(ish)
+#define smp_rmb()	dmb(ishld)
+#define smp_wmb()	dmb(ishst)
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	switch (sizeof(*p)) {						\
+	case 4:								\
+		asm volatile ("stlr %w1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	case 8:								\
+		asm volatile ("stlr %1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	}								\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1;						\
+	compiletime_assert_atomic_type(*p);				\
+	switch (sizeof(*p)) {						\
+	case 4:								\
+		asm volatile ("ldar %w0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	case 8:								\
+		asm volatile ("ldar %0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	}								\
+	___p1;								\
+})
+
 #endif
 
 #define read_barrier_depends()		do { } while(0)

From b817da4cf03a33c8c40e3bef04d3f3ed64277d47 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 2 May 2014 16:24:12 +0100
Subject: [PATCH 0929/1185] arm64: barriers: use barrier() instead of smp_mb()
 when !SMP

The recently introduced acquire/release accessors refer to smp_mb()
in the !CONFIG_SMP case. This is confusing when reading the code, so use
barrier() directly when we know we're UP.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit be6209a6107e0f63544e3e7d00fd5c95434ec80a)
Signed-off-by: Mark Brown <broonie@kernel.org>

Conflicts:
	arch/arm64/include/asm/barrier.h
---
 arch/arm64/include/asm/barrier.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 9a4c3d5b402e..709f1f6d6bbd 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -36,6 +36,22 @@
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #else
 
 #define smp_mb()	dmb(ish)

From 7af7a7d021416dfdbb30e6b31957297d484ebb97 Mon Sep 17 00:00:00 2001
From: Ruchi Kandoi <kandoiruchi@google.com>
Date: Wed, 29 Oct 2014 10:36:27 -0700
Subject: [PATCH 0930/1185] power: Adds functionality to log the last suspend
 abort reason.

Extends the last_resume_reason to log suspend abort reason. The abort
reasons will have "Abort:" appended at the start to distinguish itself
from the resume reason.

Signed-off-by: Ruchi Kandoi <kandoiruchi@google.com>
Change-Id: I3207f1844e3d87c706dfc298fb10e1c648814c5f
---
 drivers/base/power/main.c     | 13 +++++++++++
 drivers/base/power/wakeup.c   | 16 +++++++++++++
 drivers/base/syscore.c        |  3 +++
 include/linux/suspend.h       |  2 +-
 include/linux/wakeup_reason.h |  4 +++-
 kernel/irq/pm.c               |  7 +++++-
 kernel/power/process.c        |  6 ++++-
 kernel/power/suspend.c        | 19 +++++++++++++---
 kernel/power/wakeup_reason.c  | 42 ++++++++++++++++++++++++++++-------
 9 files changed, 97 insertions(+), 15 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 6a33dd85c044..5131ad8ca17f 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -30,6 +30,7 @@
 #include <linux/suspend.h>
 #include <linux/cpuidle.h>
 #include <linux/timer.h>
+#include <linux/wakeup_reason.h>
 
 #include "../base.h"
 #include "power.h"
@@ -938,6 +939,7 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state)
 static int dpm_suspend_noirq(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
 	int error = 0;
 
 	cpuidle_pause();
@@ -965,6 +967,9 @@ static int dpm_suspend_noirq(pm_message_t state)
 		put_device(dev);
 
 		if (pm_wakeup_pending()) {
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
 			error = -EBUSY;
 			break;
 		}
@@ -1023,6 +1028,7 @@ static int device_suspend_late(struct device *dev, pm_message_t state)
 static int dpm_suspend_late(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
 	int error = 0;
 
 	mutex_lock(&dpm_list_mtx);
@@ -1048,6 +1054,9 @@ static int dpm_suspend_late(pm_message_t state)
 		put_device(dev);
 
 		if (pm_wakeup_pending()) {
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
 			error = -EBUSY;
 			break;
 		}
@@ -1115,6 +1124,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	char *info = NULL;
 	int error = 0;
 	struct dpm_watchdog wd;
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
 
 	dpm_wait_for_children(dev, async);
 
@@ -1131,6 +1141,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 		pm_wakeup_event(dev, 0);
 
 	if (pm_wakeup_pending()) {
+		pm_get_active_wakeup_sources(suspend_abort,
+			MAX_SUSPEND_ABORT_LEN);
+		log_suspend_abort_reason(suspend_abort);
 		async_error = -EBUSY;
 		goto Complete;
 	}
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 79715e7fa43e..bea700736f24 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -659,6 +659,22 @@ void pm_wakeup_event(struct device *dev, unsigned int msec)
 }
 EXPORT_SYMBOL_GPL(pm_wakeup_event);
 
+void pm_get_active_wakeup_sources(char *pending_wakeup_source, size_t max)
+{
+	struct wakeup_source *ws;
+	int len = 0;
+	rcu_read_lock();
+	len += snprintf(pending_wakeup_source, max, "Pending Wakeup Sources: ");
+	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+		if (ws->active) {
+			len += snprintf(pending_wakeup_source + len, max,
+				"%s ", ws->name);
+		}
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(pm_get_active_wakeup_sources);
+
 static void print_active_wakeup_sources(void)
 {
 	struct wakeup_source *ws;
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index e8d11b6630ee..0ab546558c4e 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c
@@ -10,6 +10,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/wakeup_reason.h>
 
 static LIST_HEAD(syscore_ops_list);
 static DEFINE_MUTEX(syscore_ops_lock);
@@ -73,6 +74,8 @@ int syscore_suspend(void)
 	return 0;
 
  err_out:
+	log_suspend_abort_reason("System core suspend callback %pF failed",
+		ops->suspend);
 	pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend);
 
 	list_for_each_entry_continue(ops, &syscore_ops_list, node)
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d4e3f16d5e89..a34821358ae5 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -363,7 +363,7 @@ extern bool pm_wakeup_pending(void);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
-
+extern void pm_get_active_wakeup_sources(char *pending_sources, size_t max);
 static inline void lock_system_sleep(void)
 {
 	current->flags |= PF_FREEZER_SKIP;
diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
index 7ce50f0debc4..5f095da2c977 100644
--- a/include/linux/wakeup_reason.h
+++ b/include/linux/wakeup_reason.h
@@ -18,6 +18,8 @@
 #ifndef _LINUX_WAKEUP_REASON_H
 #define _LINUX_WAKEUP_REASON_H
 
-void log_wakeup_reason(int irq);
+#define MAX_SUSPEND_ABORT_LEN 256
 
+void log_wakeup_reason(int irq);
+void log_suspend_abort_reason(const char *fmt, ...);
 #endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index fe4b09cf829c..08d0916150d5 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/syscore_ops.h>
-
+#include <linux/wakeup_reason.h>
 #include "internals.h"
 
 /**
@@ -100,11 +100,16 @@ EXPORT_SYMBOL_GPL(resume_device_irqs);
 int check_wakeup_irqs(void)
 {
 	struct irq_desc *desc;
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
 	int irq;
 
 	for_each_irq_desc(irq, desc) {
 		if (irqd_is_wakeup_set(&desc->irq_data)) {
 			if (desc->istate & IRQS_PENDING) {
+				log_suspend_abort_reason("Wakeup IRQ %d %s pending",
+					irq,
+					desc->action && desc->action->name ?
+					desc->action->name : "");
 				pr_info("Wakeup IRQ %d %s pending, suspend aborted\n",
 					irq,
 					desc->action && desc->action->name ?
diff --git a/kernel/power/process.c b/kernel/power/process.c
index d26dcb5dff87..86a40fa35095 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -17,7 +17,7 @@
 #include <linux/delay.h>
 #include <linux/workqueue.h>
 #include <linux/kmod.h>
-
+#include <linux/wakeup_reason.h>
 /* 
  * Timeout for stopping processes
  */
@@ -34,6 +34,7 @@ static int try_to_freeze_tasks(bool user_only)
 	unsigned int elapsed_msecs;
 	bool wakeup = false;
 	int sleep_usecs = USEC_PER_MSEC;
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
 
 	do_gettimeofday(&start);
 
@@ -63,6 +64,9 @@ static int try_to_freeze_tasks(bool user_only)
 			break;
 
 		if (pm_wakeup_pending()) {
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
 			wakeup = true;
 			break;
 		}
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 454568e6c8d2..7c53fea31cba 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -27,6 +27,7 @@
 #include <linux/ftrace.h>
 #include <linux/rtc.h>
 #include <trace/events/power.h>
+#include <linux/wakeup_reason.h>
 
 #include "power.h"
 
@@ -147,7 +148,7 @@ static int suspend_prepare(suspend_state_t state)
 	error = suspend_freeze_processes();
 	if (!error)
 		return 0;
-
+	log_suspend_abort_reason("One or more tasks refusing to freeze");
 	suspend_stats.failed_freeze++;
 	dpm_save_failed_step(SUSPEND_FREEZE);
  Finish:
@@ -177,7 +178,8 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
  */
 static int suspend_enter(suspend_state_t state, bool *wakeup)
 {
-	int error;
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
+	int error, last_dev;
 
 	if (need_suspend_ops(state) && suspend_ops->prepare) {
 		error = suspend_ops->prepare();
@@ -187,7 +189,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 
 	error = dpm_suspend_end(PMSG_SUSPEND);
 	if (error) {
+		last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
+		last_dev %= REC_FAILED_NUM;
 		printk(KERN_ERR "PM: Some devices failed to power down\n");
+		log_suspend_abort_reason("%s device failed to power down",
+			suspend_stats.failed_devs[last_dev]);
 		goto Platform_finish;
 	}
 
@@ -212,8 +218,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	}
 
 	error = disable_nonboot_cpus();
-	if (error || suspend_test(TEST_CPUS))
+	if (error || suspend_test(TEST_CPUS)) {
+		log_suspend_abort_reason("Disabling non-boot cpus failed");
 		goto Enable_cpus;
+	}
 
 	arch_suspend_disable_irqs();
 	BUG_ON(!irqs_disabled());
@@ -224,6 +232,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 		if (!(suspend_test(TEST_CORE) || *wakeup)) {
 			error = suspend_ops->enter(state);
 			events_check_enabled = false;
+		} else {
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
 		}
 		syscore_resume();
 	}
@@ -271,6 +283,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 	error = dpm_suspend_start(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to suspend\n");
+		log_suspend_abort_reason("Some devices failed to suspend");
 		goto Recover_platform;
 	}
 	suspend_test_finish("suspend devices");
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
index 187e4e9105fb..2aacc34ef17c 100644
--- a/kernel/power/wakeup_reason.c
+++ b/kernel/power/wakeup_reason.c
@@ -31,6 +31,8 @@
 #define MAX_WAKEUP_REASON_IRQS 32
 static int irq_list[MAX_WAKEUP_REASON_IRQS];
 static int irqcount;
+static bool suspend_abort;
+static char abort_reason[MAX_SUSPEND_ABORT_LEN];
 static struct kobject *wakeup_reason;
 static spinlock_t resume_reason_lock;
 
@@ -40,14 +42,18 @@ static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribu
 	int irq_no, buf_offset = 0;
 	struct irq_desc *desc;
 	spin_lock(&resume_reason_lock);
-	for (irq_no = 0; irq_no < irqcount; irq_no++) {
-		desc = irq_to_desc(irq_list[irq_no]);
-		if (desc && desc->action && desc->action->name)
-			buf_offset += sprintf(buf + buf_offset, "%d %s\n",
-					irq_list[irq_no], desc->action->name);
-		else
-			buf_offset += sprintf(buf + buf_offset, "%d\n",
-					irq_list[irq_no]);
+	if (suspend_abort) {
+		buf_offset = sprintf(buf, "Abort: %s", abort_reason);
+	} else {
+		for (irq_no = 0; irq_no < irqcount; irq_no++) {
+			desc = irq_to_desc(irq_list[irq_no]);
+			if (desc && desc->action && desc->action->name)
+				buf_offset += sprintf(buf + buf_offset, "%d %s\n",
+						irq_list[irq_no], desc->action->name);
+			else
+				buf_offset += sprintf(buf + buf_offset, "%d\n",
+						irq_list[irq_no]);
+		}
 	}
 	spin_unlock(&resume_reason_lock);
 	return buf_offset;
@@ -89,6 +95,25 @@ void log_wakeup_reason(int irq)
 	spin_unlock(&resume_reason_lock);
 }
 
+void log_suspend_abort_reason(const char *fmt, ...)
+{
+	va_list args;
+
+	spin_lock(&resume_reason_lock);
+
+	//Suspend abort reason has already been logged.
+	if (suspend_abort) {
+		spin_unlock(&resume_reason_lock);
+		return;
+	}
+
+	suspend_abort = true;
+	va_start(args, fmt);
+	snprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args);
+	va_end(args);
+	spin_unlock(&resume_reason_lock);
+}
+
 /* Detects a suspend and clears all the previous wake up reasons*/
 static int wakeup_reason_pm_event(struct notifier_block *notifier,
 		unsigned long pm_event, void *unused)
@@ -97,6 +122,7 @@ static int wakeup_reason_pm_event(struct notifier_block *notifier,
 	case PM_SUSPEND_PREPARE:
 		spin_lock(&resume_reason_lock);
 		irqcount = 0;
+		suspend_abort = false;
 		spin_unlock(&resume_reason_lock);
 		break;
 	default:

From d619a776f8885f626c831e49bb8858597cf8b3fa Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Thu, 18 Sep 2014 11:30:44 -0400
Subject: [PATCH 0931/1185] Btrfs: try not to ENOSPC on log replay

commit 1d52c78afbbf80b58299e076a159617d6b42fe3c upstream.

When doing log replay we may have to update inodes, which traditionally goes
through our delayed inode stuff.  This will try to move space over from the
trans handle, but we don't reserve space in our trans handle on replay since we
don't know how much we will need, so instead we try to flush.  But because we
have a trans handle open we won't flush anything, so if we are out of reserve
space we will simply return ENOSPC.  Since we know that if an operation made it
into the log then we definitely had space before the box bought the farm then we
don't need to worry about doing this space reservation.  Use the
fs_info->log_root_recovering flag to skip the delayed inode stuff and update the
item directly.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8fcd2424e7f9..187911fbabce 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3545,7 +3545,8 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
 	 * without delay
 	 */
 	if (!btrfs_is_free_space_inode(inode)
-	    && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
+	    && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+	    && !root->fs_info->log_root_recovering) {
 		btrfs_update_root_times(trans, root);
 
 		ret = btrfs_delayed_update_inode(trans, root, inode);

From 5525f742eb55f40c1080c5ffb39a09978d5c50ba Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 19 Sep 2014 15:43:34 -0400
Subject: [PATCH 0932/1185] Btrfs: fix build_backref_tree issue with multiple
 shared blocks

commit bbe9051441effce51c9a533d2c56440df64db2d7 upstream.

Marc Merlin sent me a broken fs image months ago where it would blow up in the
upper->checked BUG_ON() in build_backref_tree.  This is because we had a
scenario like this

block a -- level 4 (not shared)
   |
block b -- level 3 (reloc block, shared)
   |
block c -- level 2 (not shared)
   |
block d -- level 1 (shared)
   |
block e -- level 0 (shared)

We go to build a backref tree for block e, we notice block d is shared and add
it to the list of blocks to lookup it's backrefs for.  Now when we loop around
we will check edges for the block, so we will see we looked up block c last
time.  So we lookup block d and then see that the block that points to it is
block c and we can just skip that edge since we've already been up this path.
The problem is because we clear need_check when we see block d (as it is shared)
we never add block b as needing to be checked.  And because block c is in our
path already we bail out before we walk up to block b and add it to the backref
check list.

To fix this we need to reset need_check if we trip over a block that doesn't
need to be checked.  This will make sure that any subsequent blocks in the path
as we're walking up afterwards are added to the list to be processed.  With this
patch I can now mount Marc's fs image and it'll complete the balance without
panicing.  Thanks,

Reported-by: Marc MERLIN <marc@merlins.org>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/relocation.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b3896d5f233a..0e7f7765b3bb 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -967,8 +967,11 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
 					need_check = false;
 					list_add_tail(&edge->list[UPPER],
 						      &list);
-				} else
+				} else {
+					if (upper->checked)
+						need_check = true;
 					INIT_LIST_HEAD(&edge->list[UPPER]);
+				}
 			} else {
 				upper = rb_entry(rb_node, struct backref_node,
 						 rb_node);

From aea9dd519b41025500e678587908705ad499ac38 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@redhat.com>
Date: Fri, 26 Sep 2014 08:30:06 -0700
Subject: [PATCH 0933/1185] Btrfs: fix race in WAIT_SYNC ioctl

commit 42383020beb1cfb05f5d330cc311931bc4917a97 upstream.

We check whether transid is already committed via last_trans_committed and
then search through trans_list for pending transactions.  If
last_trans_committed is updated by btrfs_commit_transaction after we check
it (there is no locking), we will fail to find the committed transaction
and return EINVAL to the caller.  This has been observed occasionally by
ceph-osd (which uses this ioctl heavily).

Fix by rechecking whether the provided transid <= last_trans_committed
after the search fails, and if so return 0.

Signed-off-by: Sage Weil <sage@redhat.com>
Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/transaction.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0544587d74f4..1f214689fa5e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -524,7 +524,6 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
 		if (transid <= root->fs_info->last_trans_committed)
 			goto out;
 
-		ret = -EINVAL;
 		/* find specified transaction */
 		spin_lock(&root->fs_info->trans_lock);
 		list_for_each_entry(t, &root->fs_info->trans_list, list) {
@@ -540,9 +539,16 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
 			}
 		}
 		spin_unlock(&root->fs_info->trans_lock);
-		/* The specified transaction doesn't exist */
-		if (!cur_trans)
+
+		/*
+		 * The specified transaction doesn't exist, or we
+		 * raced with btrfs_commit_transaction
+		 */
+		if (!cur_trans) {
+			if (transid > root->fs_info->last_trans_committed)
+				ret = -EINVAL;
 			goto out;
+		}
 	} else {
 		/* find newest transaction that is committing | committed */
 		spin_lock(&root->fs_info->trans_lock);

From a7dbb3e347aa3916f681b10cfbc7d12ed6ae7b34 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Wed, 8 Oct 2014 12:32:47 -0700
Subject: [PATCH 0934/1185] fs: Add a missing permission check to do_umount

commit a1480dcc3c706e309a88884723446f2e84fedd5b upstream.

Accessing do_remount_sb should require global CAP_SYS_ADMIN, but
only one of the two call sites was appropriately protected.

Fixes CVE-2014-7975.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/namespace.c b/fs/namespace.c
index 00409add4d96..7f6a9348c589 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1274,6 +1274,8 @@ static int do_umount(struct mount *mnt, int flags)
 		 * Special case for "unmounting" root ...
 		 * we just try to remount it readonly.
 		 */
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
 		down_write(&sb->s_umount);
 		if (!(sb->s_flags & MS_RDONLY))
 			retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);

From 973af5283c036d199360a4f82ce442815bf5378b Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Mon, 18 Aug 2014 15:46:07 -0700
Subject: [PATCH 0935/1185] kvm: x86: fix stale mmio cache bug

commit 56f17dd3fbc44adcdbc3340fe3988ddb833a47a7 upstream.

The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
up to userspace:

(1) Guest accesses gpa X without a memory slot. The gfn is cached in
struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
the SPTE write-execute-noread so that future accesses cause
EPT_MISCONFIGs.

(2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
covering the page just accessed.

(3) Guest attempts to read or write to gpa X again. On Intel, this
generates an EPT_MISCONFIG. The memory slot generation number that
was incremented in (2) would normally take care of this but we fast
path mmio faults through quickly_check_mmio_pf(), which only checks
the per-vcpu mmio cache. Since we hit the cache, KVM passes a
KVM_EXIT_MMIO up to userspace.

This patch fixes the issue by using the memslot generation number
to validate the mmio cache.

Signed-off-by: David Matlack <dmatlack@google.com>
[xiaoguangrong: adjust the code to make it simpler for stable-tree fix.]
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Tested-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu.c              |  2 +-
 arch/x86/kvm/x86.h              | 20 +++++++++++++++-----
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f7f20f7fac3c..373058c9b75d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -463,6 +463,7 @@ struct kvm_vcpu_arch {
 	u64 mmio_gva;
 	unsigned access;
 	gfn_t mmio_gfn;
+	u64 mmio_gen;
 
 	struct kvm_pmu pmu;
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 711c649f80b7..e14b1f8667bb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3072,7 +3072,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 		return;
 
-	vcpu_clear_mmio_info(vcpu, ~0ul);
+	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
 	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
 	if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 3186542f2fa3..7626d3efa064 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -78,15 +78,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
 	vcpu->arch.mmio_gva = gva & PAGE_MASK;
 	vcpu->arch.access = access;
 	vcpu->arch.mmio_gfn = gfn;
+	vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
+}
+
+static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation;
 }
 
 /*
- * Clear the mmio cache info for the given gva,
- * specially, if gva is ~0ul, we clear all mmio cache info.
+ * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we
+ * clear all mmio cache info.
  */
+#define MMIO_GVA_ANY (~(gva_t)0)
+
 static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
+	if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
 		return;
 
 	vcpu->arch.mmio_gva = 0;
@@ -94,7 +102,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
 
 static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
 {
-	if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK))
+	if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva &&
+	      vcpu->arch.mmio_gva == (gva & PAGE_MASK))
 		return true;
 
 	return false;
@@ -102,7 +111,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
 
 static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 {
-	if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
+	if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn &&
+	      vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
 		return true;
 
 	return false;

From 68249f03d07ff212ee1b0170450985d896378f88 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 3 Sep 2014 16:21:32 +0200
Subject: [PATCH 0936/1185] KVM: s390: unintended fallthrough for external call

commit f346026e55f1efd3949a67ddd1dcea7c1b9a615e upstream.

We must not fallthrough if the conditions for external call are not met.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kvm/interrupt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5c948177529e..bc79ab00536f 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -71,6 +71,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 			return 0;
 		if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
 			return 1;
+		return 0;
 	case KVM_S390_INT_EMERGENCY:
 		if (psw_extint_disabled(vcpu))
 			return 0;

From 7889ddde2798154828d49da8d7c8863b7573e62d Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 19 Sep 2014 16:03:25 -0700
Subject: [PATCH 0937/1185] kvm: don't take vcpu mutex for obviously invalid
 vcpu ioctls

commit 2ea75be3219571d0ec009ce20d9971e54af96e09 upstream.

vcpu ioctls can hang the calling thread if issued while a vcpu is running.
However, invalid ioctls can happen when userspace tries to probe the kind
of file descriptors (e.g. isatty() calls ioctl(TCGETS)); in that case,
we know the ioctl is going to be rejected as invalid anyway and we can
fail before trying to take the vcpu mutex.

This patch does not change functionality, it just makes invalid ioctls
fail faster.

Signed-off-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/kvm_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8cf1cd2fadaa..a17f190be58e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -52,6 +52,7 @@
 
 #include <asm/processor.h>
 #include <asm/io.h>
+#include <asm/ioctl.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
@@ -1981,6 +1982,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
 	if (vcpu->kvm->mm != current->mm)
 		return -EIO;
 
+	if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
+		return -EINVAL;
+
 #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
 	/*
 	 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,

From 75b6cf03dd5cae671b84f60e018a6c08e5f6d4b7 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Date: Wed, 24 Sep 2014 00:26:24 +0100
Subject: [PATCH 0938/1185] x86/intel/quark: Switch off CR4.PGE so TLB flush
 uses CR3 instead

commit ee1b5b165c0a2f04d2107e634e51f05d0eb107de upstream.

Quark x1000 advertises PGE via the standard CPUID method
PGE bits exist in Quark X1000's PTEs. In order to flush
an individual PTE it is necessary to reload CR3 irrespective
of the PTE.PGE bit.

See Quark Core_DevMan_001.pdf section 6.4.11

This bug was fixed in Galileo kernels, unfixed vanilla kernels are expected to
crash and burn on this platform.

Signed-off-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Cc: Borislav Petkov <bp@alien8.de>
Link: http://lkml.kernel.org/r/1411514784-14885-1-git-send-email-pure.logic@nexus-software.ie
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/intel.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f187806dfc18..8533e69d2b89 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -154,6 +154,21 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 			setup_clear_cpu_cap(X86_FEATURE_ERMS);
 		}
 	}
+
+	/*
+	 * Intel Quark Core DevMan_001.pdf section 6.4.11
+	 * "The operating system also is required to invalidate (i.e., flush)
+	 *  the TLB when any changes are made to any of the page table entries.
+	 *  The operating system must reload CR3 to cause the TLB to be flushed"
+	 *
+	 * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
+	 * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
+	 * to be modified
+	 */
+	if (c->x86 == 5 && c->x86_model == 9) {
+		pr_info("Disabling PGE capability bit\n");
+		setup_clear_cpu_cap(X86_FEATURE_PGE);
+	}
 }
 
 #ifdef CONFIG_X86_32

From 3f8ae85b8ab60e025c806305f95e262d451831ac Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 18 Sep 2014 20:08:51 +0300
Subject: [PATCH 0939/1185] spi: dw-mid: respect 8 bit mode

commit b41583e7299046abdc578c33f25ed83ee95b9b31 upstream.

In case of 8 bit mode and DMA usage we end up with every second byte written as
0. We have to respect bits_per_word settings what this patch actually does.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-dw-mid.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index b9f0192758d6..58fa14d27ffa 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -136,7 +136,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
 	txconf.dst_addr = dws->dma_addr;
 	txconf.dst_maxburst = LNW_DMA_MSIZE_16;
 	txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	txconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+	txconf.dst_addr_width = dws->dma_width;
 	txconf.device_fc = false;
 
 	txchan->device->device_control(txchan, DMA_SLAVE_CONFIG,
@@ -159,7 +159,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
 	rxconf.src_addr = dws->dma_addr;
 	rxconf.src_maxburst = LNW_DMA_MSIZE_16;
 	rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	rxconf.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+	rxconf.src_addr_width = dws->dma_width;
 	rxconf.device_fc = false;
 
 	rxchan->device->device_control(rxchan, DMA_SLAVE_CONFIG,

From 0ee097ac8eebdef530c6757d09bd82e08b455a08 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 12 Sep 2014 15:11:58 +0300
Subject: [PATCH 0940/1185] spi: dw-mid: check that DMA was inited before exit

commit fb57862ead652454ceeb659617404c5f13bc34b5 upstream.

If the driver was compiled with DMA support, but DMA channels weren't acquired
by some reason, mid_spi_dma_exit() will crash the kernel.

Fixes: 7063c0d942a1 (spi/dw_spi: add DMA support)
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-dw-mid.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index 58fa14d27ffa..b8ac40c8a22d 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -89,6 +89,8 @@ static int mid_spi_dma_init(struct dw_spi *dws)
 
 static void mid_spi_dma_exit(struct dw_spi *dws)
 {
+	if (!dws->dma_inited)
+		return;
 	dma_release_channel(dws->txchan);
 	dma_release_channel(dws->rxchan);
 }

From d9aa4aecb2e94ca383789147769291e8723ecf86 Mon Sep 17 00:00:00 2001
From: Xiubo Li <Li.Xiubo@freescale.com>
Date: Sun, 28 Sep 2014 11:35:25 +0800
Subject: [PATCH 0941/1185] regmap: debugfs: fix possbile NULL pointer
 dereference

commit 2c98e0c1cc6b8e86f1978286c3d4e0769ee9d733 upstream.

If 'map->dev' is NULL and there will lead dev_name() to be NULL pointer
dereference. So before dev_name(), we need to have check of the map->dev
pionter.

We also should make sure that the 'name' pointer shouldn't be NULL for
debugfs_create_dir(). So here using one default "dummy" debugfs name when
the 'name' pointer and 'map->dev' are both NULL.

Signed-off-by: Xiubo Li <Li.Xiubo@freescale.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/regmap/regmap-debugfs.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index 975719bc3450..b41994fd8460 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -460,16 +460,20 @@ void regmap_debugfs_init(struct regmap *map, const char *name)
 {
 	struct rb_node *next;
 	struct regmap_range_node *range_node;
+	const char *devname = "dummy";
 
 	INIT_LIST_HEAD(&map->debugfs_off_cache);
 	mutex_init(&map->cache_lock);
 
+	if (map->dev)
+		devname = dev_name(map->dev);
+
 	if (name) {
 		map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s",
-					      dev_name(map->dev), name);
+					      devname, name);
 		name = map->debugfs_name;
 	} else {
-		name = dev_name(map->dev);
+		name = devname;
 	}
 
 	map->debugfs = debugfs_create_dir(name, regmap_debugfs_root);

From a110e6d6ebac622c30c9d219b5b154d3d3167a20 Mon Sep 17 00:00:00 2001
From: Pankaj Dubey <pankaj.dubey@samsung.com>
Date: Sat, 27 Sep 2014 09:47:55 +0530
Subject: [PATCH 0942/1185] regmap: fix NULL pointer dereference in
 _regmap_write/read

commit 5336be8416a71b5568d2cf54a2f2066abe9f2a53 upstream.

If LOG_DEVICE is defined and map->dev is NULL it will lead to NULL
pointer dereference. This patch fixes this issue by adding check for
dev->NULL in all such places in regmap.c

Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/regmap/regmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 4b5cf2e34e9a..6a66f0b7d3d4 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1177,7 +1177,7 @@ int _regmap_write(struct regmap *map, unsigned int reg,
 	}
 
 #ifdef LOG_DEVICE
-	if (strcmp(dev_name(map->dev), LOG_DEVICE) == 0)
+	if (map->dev && strcmp(dev_name(map->dev), LOG_DEVICE) == 0)
 		dev_info(map->dev, "%x <= %x\n", reg, val);
 #endif
 
@@ -1437,7 +1437,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg,
 	ret = map->reg_read(context, reg, val);
 	if (ret == 0) {
 #ifdef LOG_DEVICE
-		if (strcmp(dev_name(map->dev), LOG_DEVICE) == 0)
+		if (map->dev && strcmp(dev_name(map->dev), LOG_DEVICE) == 0)
 			dev_info(map->dev, "%x => %x\n", reg, *val);
 #endif
 

From 39d6457473676ef1ceeec2f384bbabb32adbd888 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 29 Sep 2014 13:55:41 -0500
Subject: [PATCH 0943/1185] be2iscsi: check ip buffer before copying

commit a41a9ad3bbf61fae0b6bfb232153da60d14fdbd9 upstream.

Dan Carpenter found a issue where be2iscsi would copy the ip
from userspace to the driver buffer before checking the len
of the data being copied:
http://marc.info/?l=linux-scsi&m=140982651504251&w=2

This patch just has us only copy what we the driver buffer
can support.

Tested-by: John Soni Jose <sony.john-n@emulex.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/be2iscsi/be_mgmt.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c
index 245a9595a93a..ef0a78b0d730 100644
--- a/drivers/scsi/be2iscsi/be_mgmt.c
+++ b/drivers/scsi/be2iscsi/be_mgmt.c
@@ -812,17 +812,20 @@ mgmt_static_ip_modify(struct beiscsi_hba *phba,
 
 	if (ip_action == IP_ACTION_ADD) {
 		memcpy(req->ip_params.ip_record.ip_addr.addr, ip_param->value,
-		       ip_param->len);
+		       sizeof(req->ip_params.ip_record.ip_addr.addr));
 
 		if (subnet_param)
 			memcpy(req->ip_params.ip_record.ip_addr.subnet_mask,
-			       subnet_param->value, subnet_param->len);
+			       subnet_param->value,
+			       sizeof(req->ip_params.ip_record.ip_addr.subnet_mask));
 	} else {
 		memcpy(req->ip_params.ip_record.ip_addr.addr,
-		       if_info->ip_addr.addr, ip_param->len);
+		       if_info->ip_addr.addr,
+		       sizeof(req->ip_params.ip_record.ip_addr.addr));
 
 		memcpy(req->ip_params.ip_record.ip_addr.subnet_mask,
-		       if_info->ip_addr.subnet_mask, ip_param->len);
+		       if_info->ip_addr.subnet_mask,
+		       sizeof(req->ip_params.ip_record.ip_addr.subnet_mask));
 	}
 
 	rc = mgmt_exec_nonemb_cmd(phba, &nonemb_cmd, NULL, 0);
@@ -850,7 +853,7 @@ static int mgmt_modify_gateway(struct beiscsi_hba *phba, uint8_t *gt_addr,
 	req->action = gtway_action;
 	req->ip_addr.ip_type = BE2_IPV4;
 
-	memcpy(req->ip_addr.addr, gt_addr, param_len);
+	memcpy(req->ip_addr.addr, gt_addr, sizeof(req->ip_addr.addr));
 
 	return mgmt_exec_nonemb_cmd(phba, &nonemb_cmd, NULL, 0);
 }

From e6f0636b610650b66849476f0de8663f9cf5ff15 Mon Sep 17 00:00:00 2001
From: Chris J Arges <chris.j.arges@canonical.com>
Date: Tue, 23 Sep 2014 09:22:25 -0500
Subject: [PATCH 0944/1185] mptfusion: enable no_write_same for vmware scsi
 disks

commit 4089b71cc820a426d601283c92fcd4ffeb5139c2 upstream.

When using a virtual SCSI disk in a VMWare VM if blkdev_issue_zeroout is used
data can be improperly zeroed out using the mptfusion driver. This patch
disables write_same for this driver and the vmware subsystem_vendor which
ensures that manual zeroing out is used instead.

BugLink: http://bugs.launchpad.net/bugs/1371591
Reported-by: Bruce Lucas <bruce.lucas@mongodb.com>
Tested-by: Chris J Arges <chris.j.arges@canonical.com>
Signed-off-by: Chris J Arges <chris.j.arges@canonical.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/message/fusion/mptspi.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 5653e505f91f..424f51d1e2ce 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -1422,6 +1422,11 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto out_mptspi_probe;
         }
 
+	/* VMWare emulation doesn't properly implement WRITE_SAME
+	 */
+	if (pdev->subsystem_vendor == 0x15AD)
+		sh->no_write_same = 1;
+
 	spin_lock_irqsave(&ioc->FreeQlock, flags);
 
 	/* Attach the SCSI Host to the IOC structure

From e166c65c41b14aa35a1bd2dcbd4da08958321a3b Mon Sep 17 00:00:00 2001
From: Arun Easi <arun.easi@qlogic.com>
Date: Thu, 25 Sep 2014 06:14:45 -0400
Subject: [PATCH 0945/1185] qla2xxx: Use correct offset to req-q-out for
 reserve calculation

commit 75554b68ac1e018bca00d68a430b92ada8ab52dd upstream.

Signed-off-by: Arun Easi <arun.easi@qlogic.com>
Signed-off-by: Saurav Kashyap <saurav.kashyap@qlogic.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index f033b191a022..e6884940d107 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1514,12 +1514,10 @@ static inline void qlt_unmap_sg(struct scsi_qla_host *vha,
 static int qlt_check_reserve_free_req(struct scsi_qla_host *vha,
 	uint32_t req_cnt)
 {
-	struct qla_hw_data *ha = vha->hw;
-	device_reg_t __iomem *reg = ha->iobase;
 	uint32_t cnt;
 
 	if (vha->req->cnt < (req_cnt + 2)) {
-		cnt = (uint16_t)RD_REG_DWORD(&reg->isp24.req_q_out);
+		cnt = (uint16_t)RD_REG_DWORD(vha->req->req_q_out);
 
 		ql_dbg(ql_dbg_tgt, vha, 0xe00a,
 		    "Request ring circled: cnt=%d, vha->->ring_index=%d, "

From 71e82363fbf9b4c90bd70fc3b19fd4cea58b16a9 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 18 Sep 2014 11:25:37 -0700
Subject: [PATCH 0946/1185] firmware_class: make sure fw requests contain a
 name

commit 471b095dfe0d693a8d624cbc716d1ee4d74eb437 upstream.

An empty firmware request name will trigger warnings when building
device names. Make sure this is caught earlier and rejected.

The warning was visible via the test_firmware.ko module interface:

echo -ne "\x00" > /sys/devices/virtual/misc/test_firmware/trigger_request

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_class.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 01e21037d8fe..00a565676583 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -1021,6 +1021,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
 	if (!firmware_p)
 		return -EINVAL;
 
+	if (!name || name[0] == '\0')
+		return -EINVAL;
+
 	ret = _request_firmware_prepare(&fw, name, device);
 	if (ret <= 0) /* error or already assigned */
 		goto out;

From 55a5b2114b4783803cd5dfb6d237be2158329b6b Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Wed, 27 Aug 2014 16:25:31 -0700
Subject: [PATCH 0947/1185] Drivers: hv: vmbus: Cleanup vmbus_post_msg()

commit fdeebcc62279119dbeafbc1a2e39e773839025fd upstream.

Posting messages to the host can fail because of transient resource
related failures. Correctly deal with these failures and increase the
number of attempts to post the message before giving up.

In this version of the patch, I have normalized the error code to
Linux error code.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Tested-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/connection.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index b9f5d295cbec..a3b555808768 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -393,10 +393,21 @@ int vmbus_post_msg(void *buffer, size_t buflen)
 	 * insufficient resources. Retry the operation a couple of
 	 * times before giving up.
 	 */
-	while (retries < 3) {
-		ret =  hv_post_message(conn_id, 1, buffer, buflen);
-		if (ret != HV_STATUS_INSUFFICIENT_BUFFERS)
+	while (retries < 10) {
+		ret = hv_post_message(conn_id, 1, buffer, buflen);
+
+		switch (ret) {
+		case HV_STATUS_INSUFFICIENT_BUFFERS:
+			ret = -ENOMEM;
+		case -ENOMEM:
+			break;
+		case HV_STATUS_SUCCESS:
 			return ret;
+		default:
+			pr_err("hv_post_msg() failed; error code:%d\n", ret);
+			return -EINVAL;
+		}
+
 		retries++;
 		msleep(100);
 	}

From a443461e9e0068d13ca632610f1bcb300f7f8768 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Wed, 27 Aug 2014 16:25:32 -0700
Subject: [PATCH 0948/1185] Drivers: hv: vmbus: Cleanup vmbus_teardown_gpadl()

commit 66be653083057358724d56d817e870e53fb81ca7 upstream.

Eliminate calls to BUG_ON() by properly handling errors. In cases where
rollback is possible, we will return the appropriate error to have the
calling code decide how to rollback state. In the case where we are
transferring ownership of the guest physical pages to the host,
we will wait for the host to respond.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Tested-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 0b122f8c7005..a367365b3613 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -464,7 +464,7 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
 	struct vmbus_channel_gpadl_teardown *msg;
 	struct vmbus_channel_msginfo *info;
 	unsigned long flags;
-	int ret, t;
+	int ret;
 
 	info = kmalloc(sizeof(*info) +
 		       sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL);
@@ -486,11 +486,12 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
 	ret = vmbus_post_msg(msg,
 			       sizeof(struct vmbus_channel_gpadl_teardown));
 
-	BUG_ON(ret != 0);
-	t = wait_for_completion_timeout(&info->waitevent, 5*HZ);
-	BUG_ON(t == 0);
+	if (ret)
+		goto post_msg_err;
 
-	/* Received a torndown response */
+	wait_for_completion(&info->waitevent);
+
+post_msg_err:
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&info->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);

From b5e03dd7bfe9a3a9cdeb160f869b3c264d51706f Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Wed, 27 Aug 2014 16:25:34 -0700
Subject: [PATCH 0949/1185] Drivers: hv: vmbus: Cleanup vmbus_establish_gpadl()

commit 72c6b71c245dac8f371167d97ef471b367d0b66b upstream.

Eliminate the call to BUG_ON() by waiting for the host to respond. We are
trying to reclaim the ownership of memory that was given to the host and so
we will have to wait until the host responds.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Tested-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index a367365b3613..d34db9fdc518 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -392,7 +392,6 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
 	u32 next_gpadl_handle;
 	unsigned long flags;
 	int ret = 0;
-	int t;
 
 	next_gpadl_handle = atomic_read(&vmbus_connection.next_gpadl_handle);
 	atomic_inc(&vmbus_connection.next_gpadl_handle);
@@ -439,9 +438,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
 
 		}
 	}
-	t = wait_for_completion_timeout(&msginfo->waitevent, 5*HZ);
-	BUG_ON(t == 0);
-
+	wait_for_completion(&msginfo->waitevent);
 
 	/* At this point, we received the gpadl created msg */
 	*gpadl_handle = gpadlmsg->gpadl;

From bd6174827fa6862fc2b59c2e0eeee5e5329befa2 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Wed, 27 Aug 2014 16:25:35 -0700
Subject: [PATCH 0950/1185] Drivers: hv: vmbus: Fix a bug in vmbus_open()

commit 45d727cee9e200f5b351528b9fb063b69cf702c8 upstream.

Fix a bug in vmbus_open() and properly propagate the error. I would
like to thank Dexuan Cui <decui@microsoft.com> for identifying the
issue.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Tested-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index d34db9fdc518..92f34de7aee9 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -199,8 +199,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	ret = vmbus_post_msg(open_msg,
 			       sizeof(struct vmbus_channel_open_channel));
 
-	if (ret != 0)
+	if (ret != 0) {
+		err = ret;
 		goto error1;
+	}
 
 	t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ);
 	if (t == 0) {

From 394359bd95c76762ee2919f6bcec47f241e80e77 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 28 Sep 2014 10:50:06 +0200
Subject: [PATCH 0951/1185] m68k: Disable/restore interrupts in
 hwreg_present()/hwreg_write()

commit e4dc601bf99ccd1c95b7e6eef1d3cf3c4b0d4961 upstream.

hwreg_present() and hwreg_write() temporarily change the VBR register to
another vector table. This table contains a valid bus error handler
only, all other entries point to arbitrary addresses.

If an interrupt comes in while the temporary table is active, the
processor will start executing at such an arbitrary address, and the
kernel will crash.

While most callers run early, before interrupts are enabled, or
explicitly disable interrupts, Finn Thain pointed out that macsonic has
one callsite that doesn't, causing intermittent boot crashes.
There's another unsafe callsite in hilkbd.

Fix this for good by disabling and restoring interrupts inside
hwreg_present() and hwreg_write().

Explicitly disabling interrupts can be removed from the callsites later.

Reported-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/m68k/mm/hwtest.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/m68k/mm/hwtest.c b/arch/m68k/mm/hwtest.c
index 2c7dde3c6430..2a5259fd23eb 100644
--- a/arch/m68k/mm/hwtest.c
+++ b/arch/m68k/mm/hwtest.c
@@ -28,9 +28,11 @@
 int hwreg_present( volatile void *regp )
 {
     int	ret = 0;
+    unsigned long flags;
     long	save_sp, save_vbr;
     long	tmp_vectors[3];
 
+    local_irq_save(flags);
     __asm__ __volatile__
 	(	"movec	%/vbr,%2\n\t"
 		"movel	#Lberr1,%4@(8)\n\t"
@@ -46,6 +48,7 @@ int hwreg_present( volatile void *regp )
 		: "=&d" (ret), "=&r" (save_sp), "=&r" (save_vbr)
 		: "a" (regp), "a" (tmp_vectors)
                 );
+    local_irq_restore(flags);
 
     return( ret );
 }
@@ -58,9 +61,11 @@ EXPORT_SYMBOL(hwreg_present);
 int hwreg_write( volatile void *regp, unsigned short val )
 {
 	int		ret;
+	unsigned long flags;
 	long	save_sp, save_vbr;
 	long	tmp_vectors[3];
 
+	local_irq_save(flags);
 	__asm__ __volatile__
 	(	"movec	%/vbr,%2\n\t"
 		"movel	#Lberr2,%4@(8)\n\t"
@@ -78,6 +83,7 @@ int hwreg_write( volatile void *regp, unsigned short val )
 		: "=&d" (ret), "=&r" (save_sp), "=&r" (save_vbr)
 		: "a" (regp), "a" (tmp_vectors), "g" (val)
 	);
+	local_irq_restore(flags);
 
 	return( ret );
 }

From c60cd0d07d35b42b5e30ed4d5155bd68605a0f2d Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Sat, 27 Sep 2014 12:31:35 +0200
Subject: [PATCH 0952/1185] Documentation: lzo: document part of the encoding

commit d98a0526434d27e261f622cf9d2e0028b5ff1a00 upstream.

Add a complete description of the LZO format as processed by the
decompressor. I have not found a public specification of this format
hence this analysis, which will be used to better understand the code.

Cc: Willem Pinckaers <willem@lekkertech.net>
Cc: "Don A. Bailey" <donb@securitymouse.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/lzo.txt | 164 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 164 insertions(+)
 create mode 100644 Documentation/lzo.txt

diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
new file mode 100644
index 000000000000..ea45dd3901e3
--- /dev/null
+++ b/Documentation/lzo.txt
@@ -0,0 +1,164 @@
+
+LZO stream format as understood by Linux's LZO decompressor
+===========================================================
+
+Introduction
+
+  This is not a specification. No specification seems to be publicly available
+  for the LZO stream format. This document describes what input format the LZO
+  decompressor as implemented in the Linux kernel understands. The file subject
+  of this analysis is lib/lzo/lzo1x_decompress_safe.c. No analysis was made on
+  the compressor nor on any other implementations though it seems likely that
+  the format matches the standard one. The purpose of this document is to
+  better understand what the code does in order to propose more efficient fixes
+  for future bug reports.
+
+Description
+
+  The stream is composed of a series of instructions, operands, and data. The
+  instructions consist in a few bits representing an opcode, and bits forming
+  the operands for the instruction, whose size and position depend on the
+  opcode and on the number of literals copied by previous instruction. The
+  operands are used to indicate :
+
+    - a distance when copying data from the dictionary (past output buffer)
+    - a length (number of bytes to copy from dictionary)
+    - the number of literals to copy, which is retained in variable "state"
+      as a piece of information for next instructions.
+
+  Optionally depending on the opcode and operands, extra data may follow. These
+  extra data can be a complement for the operand (eg: a length or a distance
+  encoded on larger values), or a literal to be copied to the output buffer.
+
+  The first byte of the block follows a different encoding from other bytes, it
+  seems to be optimized for literal use only, since there is no dictionary yet
+  prior to that byte.
+
+  Lengths are always encoded on a variable size starting with a small number
+  of bits in the operand. If the number of bits isn't enough to represent the
+  length, up to 255 may be added in increments by consuming more bytes with a
+  rate of at most 255 per extra byte (thus the compression ratio cannot exceed
+  around 255:1). The variable length encoding using #bits is always the same :
+
+       length = byte & ((1 << #bits) - 1)
+       if (!length) {
+               length = ((1 << #bits) - 1)
+               length += 255*(number of zero bytes)
+               length += first-non-zero-byte
+       }
+       length += constant (generally 2 or 3)
+
+  For references to the dictionary, distances are relative to the output
+  pointer. Distances are encoded using very few bits belonging to certain
+  ranges, resulting in multiple copy instructions using different encodings.
+  Certain encodings involve one extra byte, others involve two extra bytes
+  forming a little-endian 16-bit quantity (marked LE16 below).
+
+  After any instruction except the large literal copy, 0, 1, 2 or 3 literals
+  are copied before starting the next instruction. The number of literals that
+  were copied may change the meaning and behaviour of the next instruction. In
+  practice, only one instruction needs to know whether 0, less than 4, or more
+  literals were copied. This is the information stored in the <state> variable
+  in this implementation. This number of immediate literals to be copied is
+  generally encoded in the last two bits of the instruction but may also be
+  taken from the last two bits of an extra operand (eg: distance).
+
+  End of stream is declared when a block copy of distance 0 is seen. Only one
+  instruction may encode this distance (0001HLLL), it takes one LE16 operand
+  for the distance, thus requiring 3 bytes.
+
+  IMPORTANT NOTE : in the code some length checks are missing because certain
+  instructions are called under the assumption that a certain number of bytes
+  follow because it has already been garanteed before parsing the instructions.
+  They just have to "refill" this credit if they consume extra bytes. This is
+  an implementation design choice independant on the algorithm or encoding.
+
+Byte sequences
+
+  First byte encoding :
+
+      0..17   : follow regular instruction encoding, see below. It is worth
+                noting that codes 16 and 17 will represent a block copy from
+                the dictionary which is empty, and that they will always be
+                invalid at this place.
+
+      18..21  : copy 0..3 literals
+                state = (byte - 17) = 0..3  [ copy <state> literals ]
+                skip byte
+
+      22..255 : copy literal string
+                length = (byte - 17) = 4..238
+                state = 4 [ don't copy extra literals ]
+                skip byte
+
+  Instruction encoding :
+
+      0 0 0 0 X X X X  (0..15)
+        Depends on the number of literals copied by the last instruction.
+        If last instruction did not copy any literal (state == 0), this
+        encoding will be a copy of 4 or more literal, and must be interpreted
+        like this :
+
+           0 0 0 0 L L L L  (0..15)  : copy long literal string
+           length = 3 + (L ?: 15 + (zero_bytes * 255) + non_zero_byte)
+           state = 4  (no extra literals are copied)
+
+        If last instruction used to copy between 1 to 3 literals (encoded in
+        the instruction's opcode or distance), the instruction is a copy of a
+        2-byte block from the dictionary within a 1kB distance. It is worth
+        noting that this instruction provides little savings since it uses 2
+        bytes to encode a copy of 2 other bytes but it encodes the number of
+        following literals for free. It must be interpreted like this :
+
+           0 0 0 0 D D S S  (0..15)  : copy 2 bytes from <= 1kB distance
+           length = 2
+           state = S (copy S literals after this block)
+         Always followed by exactly one byte : H H H H H H H H
+           distance = (H << 2) + D + 1
+
+        If last instruction used to copy 4 or more literals (as detected by
+        state == 4), the instruction becomes a copy of a 3-byte block from the
+        dictionary from a 2..3kB distance, and must be interpreted like this :
+
+           0 0 0 0 D D S S  (0..15)  : copy 3 bytes from 2..3 kB distance
+           length = 3
+           state = S (copy S literals after this block)
+         Always followed by exactly one byte : H H H H H H H H
+           distance = (H << 2) + D + 2049
+
+      0 0 0 1 H L L L  (16..31)
+           Copy of a block within 16..48kB distance (preferably less than 10B)
+           length = 2 + (L ?: 7 + (zero_bytes * 255) + non_zero_byte)
+        Always followed by exactly one LE16 :  D D D D D D D D : D D D D D D S S
+           distance = 16384 + (H << 14) + D
+           state = S (copy S literals after this block)
+           End of stream is reached if distance == 16384
+
+      0 0 1 L L L L L  (32..63)
+           Copy of small block within 16kB distance (preferably less than 34B)
+           length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
+        Always followed by exactly one LE16 :  D D D D D D D D : D D D D D D S S
+           distance = D + 1
+           state = S (copy S literals after this block)
+
+      0 1 L D D D S S  (64..127)
+           Copy 3-4 bytes from block within 2kB distance
+           state = S (copy S literals after this block)
+           length = 3 + L
+         Always followed by exactly one byte : H H H H H H H H
+           distance = (H << 3) + D + 1
+
+      1 L L D D D S S  (128..255)
+           Copy 5-8 bytes from block within 2kB distance
+           state = S (copy S literals after this block)
+           length = 5 + L
+         Always followed by exactly one byte : H H H H H H H H
+           distance = (H << 3) + D + 1
+
+Authors
+
+  This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an
+  analysis of the decompression code available in Linux 3.16-rc5. The code is
+  tricky, it is possible that this document contains mistakes or that a few
+  corner cases were overlooked. In any case, please report any doubt, fix, or
+  proposed updates to the author(s) so that the document can be updated.

From f7939e1eb8de872933fc2e3fa9934aa55567a541 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Sat, 27 Sep 2014 12:31:36 +0200
Subject: [PATCH 0953/1185] Revert "lzo: properly check for overruns"

commit af958a38a60c7ca3d8a39c918c1baa2ff7b6b233 upstream.

This reverts commit 206a81c ("lzo: properly check for overruns").

As analysed by Willem Pinckaers, this fix is still incomplete on
certain rare corner cases, and it is easier to restart from the
original code.

Reported-by: Willem Pinckaers <willem@lekkertech.net>
Cc: "Don A. Bailey" <donb@securitymouse.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/lzo/lzo1x_decompress_safe.c | 62 +++++++++++----------------------
 1 file changed, 21 insertions(+), 41 deletions(-)

diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 8563081e8da3..569985d522d5 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -19,31 +19,11 @@
 #include <linux/lzo.h>
 #include "lzodefs.h"
 
-#define HAVE_IP(t, x)					\
-	(((size_t)(ip_end - ip) >= (size_t)(t + x)) &&	\
-	 (((t + x) >= t) && ((t + x) >= x)))
-
-#define HAVE_OP(t, x)					\
-	(((size_t)(op_end - op) >= (size_t)(t + x)) &&	\
-	 (((t + x) >= t) && ((t + x) >= x)))
-
-#define NEED_IP(t, x)					\
-	do {						\
-		if (!HAVE_IP(t, x))			\
-			goto input_overrun;		\
-	} while (0)
-
-#define NEED_OP(t, x)					\
-	do {						\
-		if (!HAVE_OP(t, x))			\
-			goto output_overrun;		\
-	} while (0)
-
-#define TEST_LB(m_pos)					\
-	do {						\
-		if ((m_pos) < out)			\
-			goto lookbehind_overrun;	\
-	} while (0)
+#define HAVE_IP(x)      ((size_t)(ip_end - ip) >= (size_t)(x))
+#define HAVE_OP(x)      ((size_t)(op_end - op) >= (size_t)(x))
+#define NEED_IP(x)      if (!HAVE_IP(x)) goto input_overrun
+#define NEED_OP(x)      if (!HAVE_OP(x)) goto output_overrun
+#define TEST_LB(m_pos)  if ((m_pos) < out) goto lookbehind_overrun
 
 int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 			  unsigned char *out, size_t *out_len)
@@ -78,14 +58,14 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 					while (unlikely(*ip == 0)) {
 						t += 255;
 						ip++;
-						NEED_IP(1, 0);
+						NEED_IP(1);
 					}
 					t += 15 + *ip++;
 				}
 				t += 3;
 copy_literal_run:
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-				if (likely(HAVE_IP(t, 15) && HAVE_OP(t, 15))) {
+				if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) {
 					const unsigned char *ie = ip + t;
 					unsigned char *oe = op + t;
 					do {
@@ -101,8 +81,8 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 				} else
 #endif
 				{
-					NEED_OP(t, 0);
-					NEED_IP(t, 3);
+					NEED_OP(t);
+					NEED_IP(t + 3);
 					do {
 						*op++ = *ip++;
 					} while (--t > 0);
@@ -115,7 +95,7 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 				m_pos -= t >> 2;
 				m_pos -= *ip++ << 2;
 				TEST_LB(m_pos);
-				NEED_OP(2, 0);
+				NEED_OP(2);
 				op[0] = m_pos[0];
 				op[1] = m_pos[1];
 				op += 2;
@@ -139,10 +119,10 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 				while (unlikely(*ip == 0)) {
 					t += 255;
 					ip++;
-					NEED_IP(1, 0);
+					NEED_IP(1);
 				}
 				t += 31 + *ip++;
-				NEED_IP(2, 0);
+				NEED_IP(2);
 			}
 			m_pos = op - 1;
 			next = get_unaligned_le16(ip);
@@ -157,10 +137,10 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 				while (unlikely(*ip == 0)) {
 					t += 255;
 					ip++;
-					NEED_IP(1, 0);
+					NEED_IP(1);
 				}
 				t += 7 + *ip++;
-				NEED_IP(2, 0);
+				NEED_IP(2);
 			}
 			next = get_unaligned_le16(ip);
 			ip += 2;
@@ -174,7 +154,7 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
 		if (op - m_pos >= 8) {
 			unsigned char *oe = op + t;
-			if (likely(HAVE_OP(t, 15))) {
+			if (likely(HAVE_OP(t + 15))) {
 				do {
 					COPY8(op, m_pos);
 					op += 8;
@@ -184,7 +164,7 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 					m_pos += 8;
 				} while (op < oe);
 				op = oe;
-				if (HAVE_IP(6, 0)) {
+				if (HAVE_IP(6)) {
 					state = next;
 					COPY4(op, ip);
 					op += next;
@@ -192,7 +172,7 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 					continue;
 				}
 			} else {
-				NEED_OP(t, 0);
+				NEED_OP(t);
 				do {
 					*op++ = *m_pos++;
 				} while (op < oe);
@@ -201,7 +181,7 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 #endif
 		{
 			unsigned char *oe = op + t;
-			NEED_OP(t, 0);
+			NEED_OP(t);
 			op[0] = m_pos[0];
 			op[1] = m_pos[1];
 			op += 2;
@@ -214,15 +194,15 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 		state = next;
 		t = next;
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-		if (likely(HAVE_IP(6, 0) && HAVE_OP(4, 0))) {
+		if (likely(HAVE_IP(6) && HAVE_OP(4))) {
 			COPY4(op, ip);
 			op += t;
 			ip += t;
 		} else
 #endif
 		{
-			NEED_IP(t, 3);
-			NEED_OP(t, 0);
+			NEED_IP(t + 3);
+			NEED_OP(t);
 			while (t > 0) {
 				*op++ = *ip++;
 				t--;

From 96894152592785638ba49e3a55c5bc218aafc49e Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Sat, 27 Sep 2014 12:31:37 +0200
Subject: [PATCH 0954/1185] lzo: check for length overrun in variable length
 encoding.

commit 72cf90124e87d975d0b2114d930808c58b4c05e4 upstream.

This fix ensures that we never meet an integer overflow while adding
255 while parsing a variable length encoding. It works differently from
commit 206a81c ("lzo: properly check for overruns") because instead of
ensuring that we don't overrun the input, which is tricky to guarantee
due to many assumptions in the code, it simply checks that the cumulated
number of 255 read cannot overflow by bounding this number.

The MAX_255_COUNT is the maximum number of times we can add 255 to a base
count without overflowing an integer. The multiply will overflow when
multiplying 255 by more than MAXINT/255. The sum will overflow earlier
depending on the base count. Since the base count is taken from a u8
and a few bits, it is safe to assume that it will always be lower than
or equal to 2*255, thus we can always prevent any overflow by accepting
two less 255 steps.

This patch also reduces the CPU overhead and actually increases performance
by 1.1% compared to the initial code, while the previous fix costs 3.1%
(measured on x86_64).

The fix needs to be backported to all currently supported stable kernels.

Reported-by: Willem Pinckaers <willem@lekkertech.net>
Cc: "Don A. Bailey" <donb@securitymouse.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/lzo/lzo1x_decompress_safe.c | 43 ++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 569985d522d5..a1c387f6afba 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -25,6 +25,16 @@
 #define NEED_OP(x)      if (!HAVE_OP(x)) goto output_overrun
 #define TEST_LB(m_pos)  if ((m_pos) < out) goto lookbehind_overrun
 
+/* This MAX_255_COUNT is the maximum number of times we can add 255 to a base
+ * count without overflowing an integer. The multiply will overflow when
+ * multiplying 255 by more than MAXINT/255. The sum will overflow earlier
+ * depending on the base count. Since the base count is taken from a u8
+ * and a few bits, it is safe to assume that it will always be lower than
+ * or equal to 2*255, thus we can always prevent any overflow by accepting
+ * two less 255 steps. See Documentation/lzo.txt for more information.
+ */
+#define MAX_255_COUNT      ((((size_t)~0) / 255) - 2)
+
 int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 			  unsigned char *out, size_t *out_len)
 {
@@ -55,12 +65,19 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 		if (t < 16) {
 			if (likely(state == 0)) {
 				if (unlikely(t == 0)) {
+					size_t offset;
+					const unsigned char *ip_last = ip;
+
 					while (unlikely(*ip == 0)) {
-						t += 255;
 						ip++;
 						NEED_IP(1);
 					}
-					t += 15 + *ip++;
+					offset = ip - ip_last;
+					if (unlikely(offset > MAX_255_COUNT))
+						return LZO_E_ERROR;
+
+					offset = (offset << 8) - offset;
+					t += offset + 15 + *ip++;
 				}
 				t += 3;
 copy_literal_run:
@@ -116,12 +133,19 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 		} else if (t >= 32) {
 			t = (t & 31) + (3 - 1);
 			if (unlikely(t == 2)) {
+				size_t offset;
+				const unsigned char *ip_last = ip;
+
 				while (unlikely(*ip == 0)) {
-					t += 255;
 					ip++;
 					NEED_IP(1);
 				}
-				t += 31 + *ip++;
+				offset = ip - ip_last;
+				if (unlikely(offset > MAX_255_COUNT))
+					return LZO_E_ERROR;
+
+				offset = (offset << 8) - offset;
+				t += offset + 31 + *ip++;
 				NEED_IP(2);
 			}
 			m_pos = op - 1;
@@ -134,12 +158,19 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 			m_pos -= (t & 8) << 11;
 			t = (t & 7) + (3 - 1);
 			if (unlikely(t == 2)) {
+				size_t offset;
+				const unsigned char *ip_last = ip;
+
 				while (unlikely(*ip == 0)) {
-					t += 255;
 					ip++;
 					NEED_IP(1);
 				}
-				t += 7 + *ip++;
+				offset = ip - ip_last;
+				if (unlikely(offset > MAX_255_COUNT))
+					return LZO_E_ERROR;
+
+				offset = (offset << 8) - offset;
+				t += offset + 7 + *ip++;
 				NEED_IP(2);
 			}
 			next = get_unaligned_le16(ip);

From 60eefed4786cbefa0f4d206dfcd7e3c87c992ded Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 27 Sep 2014 17:02:26 -0400
Subject: [PATCH 0955/1185] NFSv4: Fix lock recovery when
 CREATE_SESSION/SETCLIENTID_CONFIRM fails

commit a4339b7b686b4acc8b6de2b07d7bacbe3ae44b83 upstream.

If a NFSv4.x server returns NFS4ERR_STALE_CLIENTID in response to a
CREATE_SESSION or SETCLIENTID_CONFIRM in order to tell us that it rebooted
a second time, then the client will currently take this to mean that it must
declare all locks to be stale, and hence ineligible for reboot recovery.

RFC3530 and RFC5661 both suggest that the client should instead rely on the
server to respond to inelegible open share, lock and delegation reclaim
requests with NFS4ERR_NO_GRACE in this situation.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4state.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2c37442ed936..0e73bff6867f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1755,7 +1755,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
 		break;
 	case -NFS4ERR_STALE_CLIENTID:
 		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
-		nfs4_state_clear_reclaim_reboot(clp);
 		nfs4_state_start_reclaim_reboot(clp);
 		break;
 	case -NFS4ERR_CLID_INUSE:

From bb7105c3d5b128542239f2eff1e399e26959bcc7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 27 Sep 2014 17:41:51 -0400
Subject: [PATCH 0956/1185] NFSv4: fix open/lock state recovery error handling

commit df817ba35736db2d62b07de6f050a4db53492ad8 upstream.

The current open/lock state recovery unfortunately does not handle errors
such as NFS4ERR_CONN_NOT_BOUND_TO_SESSION correctly. Instead of looping,
just proceeds as if the state manager is finished recovering.
This patch ensures that we loop back, handle higher priority errors
and complete the open/lock state recovery.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4state.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0e73bff6867f..d482b86d0e0b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1699,7 +1699,8 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
 			if (status < 0) {
 				set_bit(ops->owner_flag_bit, &sp->so_flags);
 				nfs4_put_state_owner(sp);
-				return nfs4_recovery_handle_error(clp, status);
+				status = nfs4_recovery_handle_error(clp, status);
+				return (status != 0) ? status : -EAGAIN;
 			}
 
 			nfs4_put_state_owner(sp);
@@ -1708,7 +1709,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
 		spin_unlock(&clp->cl_lock);
 	}
 	rcu_read_unlock();
-	return status;
+	return 0;
 }
 
 static int nfs4_check_lease(struct nfs_client *clp)
@@ -2173,14 +2174,11 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			section = "reclaim reboot";
 			status = nfs4_do_reclaim(clp,
 				clp->cl_mvops->reboot_recovery_ops);
-			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
-			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
-				continue;
-			nfs4_state_end_reclaim_reboot(clp);
-			if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state))
+			if (status == -EAGAIN)
 				continue;
 			if (status < 0)
 				goto out_error;
+			nfs4_state_end_reclaim_reboot(clp);
 		}
 
 		/* Now recover expired state... */
@@ -2188,9 +2186,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			section = "reclaim nograce";
 			status = nfs4_do_reclaim(clp,
 				clp->cl_mvops->nograce_recovery_ops);
-			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
-			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
-			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+			if (status == -EAGAIN)
 				continue;
 			if (status < 0)
 				goto out_error;

From f2368dcc930680343e06848cc09689f7b8e8a0b9 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Mon, 29 Sep 2014 12:31:57 -0400
Subject: [PATCH 0957/1185] NFSv4.1: Fix an NFSv4.1 state renewal regression

commit d1f456b0b9545f1606a54cd17c20775f159bd2ce upstream.

Commit 2f60ea6b8ced ("NFSv4: The NFSv4.0 client must send RENEW calls if it holds a delegation") set the NFS4_RENEW_TIMEOUT flag in nfs4_renew_state, and does
not put an nfs41_proc_async_sequence call, the NFSv4.1 lease renewal heartbeat
call, on the wire to renew the NFSv4.1 state if the flag was not set.

The NFS4_RENEW_TIMEOUT flag is set when "now" is after the last renewal
(cl_last_renewal) plus the lease time divided by 3. This is arbitrary and
sometimes does the following:

In normal operation, the only way a future state renewal call is put on the
wire is via a call to nfs4_schedule_state_renewal, which schedules a
nfs4_renew_state workqueue task. nfs4_renew_state determines if the
NFS4_RENEW_TIMEOUT should be set, and the calls nfs41_proc_async_sequence,
which only gets sent if the NFS4_RENEW_TIMEOUT flag is set.
Then the nfs41_proc_async_sequence rpc_release function schedules
another state remewal via nfs4_schedule_state_renewal.

Without this change we can get into a state where an application stops
accessing the NFSv4.1 share, state renewal calls stop due to the
NFS4_RENEW_TIMEOUT flag _not_ being set. The only way to recover
from this situation is with a clientid re-establishment, once the application
resumes and the server has timed out the lease and so returns
NFS4ERR_BAD_SESSION on the subsequent SEQUENCE operation.

An example application:
open, lock, write a file.

sleep for 6 * lease (could be less)

ulock, close.

In the above example with NFSv4.1 delegations enabled, without this change,
there are no OP_SEQUENCE state renewal calls during the sleep, and the
clientid is recovered due to lease expiration on the close.

This issue does not occur with NFSv4.1 delegations disabled, nor with
NFSv4.0, with or without delegations enabled.

Signed-off-by: Andy Adamson <andros@netapp.com>
Link: http://lkml.kernel.org/r/1411486536-23401-1-git-send-email-andros@netapp.com
Fixes: 2f60ea6b8ced (NFSv4: The NFSv4.0 client must send RENEW calls...)
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4proc.c   |  2 +-
 fs/nfs/nfs4renewd.c | 12 ++++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3fc87b6f9def..69fc437be661 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6067,7 +6067,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
 	int ret = 0;
 
 	if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
-		return 0;
+		return -EAGAIN;
 	task = _nfs41_proc_sequence(clp, cred, false);
 	if (IS_ERR(task))
 		ret = PTR_ERR(task);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 1720d32ffa54..e1ba58c3d1ad 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -88,10 +88,18 @@ nfs4_renew_state(struct work_struct *work)
 			}
 			nfs_expire_all_delegations(clp);
 		} else {
+			int ret;
+
 			/* Queue an asynchronous RENEW. */
-			ops->sched_state_renewal(clp, cred, renew_flags);
+			ret = ops->sched_state_renewal(clp, cred, renew_flags);
 			put_rpccred(cred);
-			goto out_exp;
+			switch (ret) {
+			default:
+				goto out_exp;
+			case -EAGAIN:
+			case -ENOMEM:
+				break;
+			}
 		}
 	} else {
 		dprintk("%s: failed to call renewd. Reason: lease not expired \n",

From 75f43e3bf1fc500716b81a5f82a428fdc84c1cf8 Mon Sep 17 00:00:00 2001
From: Oren Givon <oren.givon@intel.com>
Date: Wed, 17 Sep 2014 10:31:56 +0300
Subject: [PATCH 0958/1185] iwlwifi: Add missing PCI IDs for the 7260 series

commit 4f08970f5284dce486f0e2290834aefb2a262189 upstream.

Add 4 missing PCI IDs for the 7260 series.

Signed-off-by: Oren Givon <oren.givon@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/pcie/drv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c
index b53e5c3f403b..bb020ad3f76c 100644
--- a/drivers/net/wireless/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/iwlwifi/pcie/drv.c
@@ -269,6 +269,8 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = {
 	{IWL_PCI_DEVICE(0x08B1, 0x4070, iwl7260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x08B1, 0x4072, iwl7260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x08B1, 0x4170, iwl7260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x08B1, 0x4C60, iwl7260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x08B1, 0x4C70, iwl7260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x08B1, 0x4060, iwl7260_2n_cfg)},
 	{IWL_PCI_DEVICE(0x08B1, 0x406A, iwl7260_2n_cfg)},
 	{IWL_PCI_DEVICE(0x08B1, 0x4160, iwl7260_2n_cfg)},
@@ -306,6 +308,8 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = {
 	{IWL_PCI_DEVICE(0x08B1, 0xC770, iwl7260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x08B1, 0xC760, iwl7260_2n_cfg)},
 	{IWL_PCI_DEVICE(0x08B2, 0xC270, iwl7260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x08B1, 0xCC70, iwl7260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x08B1, 0xCC60, iwl7260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x08B2, 0xC272, iwl7260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x08B2, 0xC260, iwl7260_2n_cfg)},
 	{IWL_PCI_DEVICE(0x08B2, 0xC26A, iwl7260_n_cfg)},

From 08037263178349a06d05da0c437b0be54ae05d8b Mon Sep 17 00:00:00 2001
From: Douglas Lehr <dllehr@us.ibm.com>
Date: Thu, 21 Aug 2014 09:26:52 +1000
Subject: [PATCH 0959/1185] PCI: Increase IBM ipr SAS Crocodile BARs to at
 least system page size

commit 9fe373f9997b48fcd6222b95baf4a20c134b587a upstream.

The Crocodile chip occasionally comes up with 4k and 8k BAR sizes.  Due to
an erratum, setting the SR-IOV page size causes the physical function BARs
to expand to the system page size.  Since ppc64 uses 64k pages, when Linux
tries to assign the smaller resource sizes to the now 64k BARs the address
will be truncated and the BARs will overlap.

Force Linux to allocate the resource as a full page, which avoids the
overlap.

[bhelgaas: print expanded resource, too]
Signed-off-by: Douglas Lehr <dllehr@us.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Milton Miller <miltonm@us.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/quirks.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 4510279e28dc..910339c0791f 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -28,6 +28,7 @@
 #include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/ktime.h>
+#include <linux/mm.h>
 #include <asm/dma.h>	/* isa_dma_bridge_buggy */
 #include "pci.h"
 
@@ -291,6 +292,25 @@ static void quirk_citrine(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM,	PCI_DEVICE_ID_IBM_CITRINE,	quirk_citrine);
 
+/*  On IBM Crocodile ipr SAS adapters, expand BAR to system page size */
+static void quirk_extend_bar_to_page(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < PCI_STD_RESOURCE_END; i++) {
+		struct resource *r = &dev->resource[i];
+
+		if (r->flags & IORESOURCE_MEM && resource_size(r) < PAGE_SIZE) {
+			r->end = PAGE_SIZE - 1;
+			r->start = 0;
+			r->flags |= IORESOURCE_UNSET;
+			dev_info(&dev->dev, "expanded BAR %d to page size: %pR\n",
+				 i, r);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, 0x034a, quirk_extend_bar_to_page);
+
 /*
  *  S3 868 and 968 chips report region size equal to 32M, but they decode 64M.
  *  If it's needed, re-allocate the region.

From 359772477cc4c410a6d785a3de51d85cd0b6aeba Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Wed, 27 Aug 2014 14:57:57 +0200
Subject: [PATCH 0960/1185] PCI: Generate uppercase hex for modalias interface
 class

commit 89ec3dcf17fd3fa009ecf8faaba36828dd6bc416 upstream.

Some implementations of modprobe fail to load the driver for a PCI device
automatically because the "interface" part of the modalias from the kernel
is lowercase, and the modalias from file2alias is uppercase.

The "interface" is the low-order byte of the Class Code, defined in PCI
r3.0, Appendix D.  Most interface types defined in the spec do not use
alpha characters, so they won't be affected.  For example, 00h, 01h, 10h,
20h, etc. are unaffected.

Print the "interface" byte of the Class Code in uppercase hex, as we
already do for the Vendor ID, Device ID, Class, etc.

[bhelgaas: changelog]
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/pci-sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 5b4a9d9cd200..689f3c87ee5c 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -175,7 +175,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 
-	return sprintf(buf, "pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02x\n",
+	return sprintf(buf, "pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02X\n",
 		       pci_dev->vendor, pci_dev->device,
 		       pci_dev->subsystem_vendor, pci_dev->subsystem_device,
 		       (u8)(pci_dev->class >> 16), (u8)(pci_dev->class >> 8),

From 28330aa4e0dc1232ecfb18c7456a5c456f5c6e97 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 24 Sep 2014 11:24:54 +0200
Subject: [PATCH 0961/1185] rt2800: correct BBP1_TX_POWER_CTRL mask

commit 01f7feeaf4528bec83798316b3c811701bac5d3e upstream.

Two bits control TX power on BBP_R1 register. Correct the mask,
otherwise we clear additional bit on BBP_R1 register, what can have
unknown, possible negative effect.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/rt2x00/rt2800.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/rt2x00/rt2800.h b/drivers/net/wireless/rt2x00/rt2800.h
index a7630d5ec892..a629313dd98a 100644
--- a/drivers/net/wireless/rt2x00/rt2800.h
+++ b/drivers/net/wireless/rt2x00/rt2800.h
@@ -1920,7 +1920,7 @@ struct mac_iveiv_entry {
  * 2 - drop tx power by 12dBm,
  * 3 - increase tx power by 6dBm
  */
-#define BBP1_TX_POWER_CTRL		FIELD8(0x07)
+#define BBP1_TX_POWER_CTRL		FIELD8(0x03)
 #define BBP1_TX_ANTENNA			FIELD8(0x18)
 
 /*

From 90f9a6a427abd19c8d304ecbac034deb39d0737c Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@intel.com>
Date: Fri, 8 Aug 2014 19:07:16 +0200
Subject: [PATCH 0962/1185] Bluetooth: Fix HCI H5 corrupted ack value

commit 4807b51895dce8aa650ebebc51fa4a795ed6b8b8 upstream.

In this expression: seq = (seq - 1) % 8
seq (u8) is implicitly converted to an int in the arithmetic operation.
So if seq value is 0, operation is ((0 - 1) % 8) => (-1 % 8) => -1.
The new seq value is 0xff which is an invalid ACK value, we expect 0x07.
It leads to frequent dropped ACK and retransmission.
Fix this by using '&' binary operator instead of '%'.

Signed-off-by: Loic Poulain <loic.poulain@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/hci_h5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index db0be2fb05fe..db35c542eb20 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -237,7 +237,7 @@ static void h5_pkt_cull(struct h5 *h5)
 			break;
 
 		to_remove--;
-		seq = (seq - 1) % 8;
+		seq = (seq - 1) & 0x07;
 	}
 
 	if (seq != h5->rx_ack)

From 41ba8fa07c9aa5c18ba3fd14917b53c4367d3ea7 Mon Sep 17 00:00:00 2001
From: Champion Chen <champion_chen@realsil.com.cn>
Date: Sat, 6 Sep 2014 14:06:08 -0500
Subject: [PATCH 0963/1185] Bluetooth: Fix issue with USB suspend in btusb
 driver

commit 85560c4a828ec9c8573840c9b66487b6ae584768 upstream.

Suspend could fail for some platforms because
btusb_suspend==> btusb_stop_traffic ==> usb_kill_anchored_urbs.

When btusb_bulk_complete returns before system suspend and resubmits
an URB, the system cannot enter suspend state.

Signed-off-by: Champion Chen <champion_chen@realsil.com.cn>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/btusb.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 45aa8e760124..61a8ec4e5f4d 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -302,6 +302,9 @@ static void btusb_intr_complete(struct urb *urb)
 			BT_ERR("%s corrupted event packet", hdev->name);
 			hdev->stat.err_rx++;
 		}
+	} else if (urb->status == -ENOENT) {
+		/* Avoid suspend failed when usb_kill_urb */
+		return;
 	}
 
 	if (!test_bit(BTUSB_INTR_RUNNING, &data->flags))
@@ -390,6 +393,9 @@ static void btusb_bulk_complete(struct urb *urb)
 			BT_ERR("%s corrupted ACL packet", hdev->name);
 			hdev->stat.err_rx++;
 		}
+	} else if (urb->status == -ENOENT) {
+		/* Avoid suspend failed when usb_kill_urb */
+		return;
 	}
 
 	if (!test_bit(BTUSB_BULK_RUNNING, &data->flags))
@@ -484,6 +490,9 @@ static void btusb_isoc_complete(struct urb *urb)
 				hdev->stat.err_rx++;
 			}
 		}
+	} else if (urb->status == -ENOENT) {
+		/* Avoid suspend failed when usb_kill_urb */
+		return;
 	}
 
 	if (!test_bit(BTUSB_ISOC_RUNNING, &data->flags))

From 2e8fee8128715f3d250f92ef8d12a06c23a15101 Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Thu, 9 Oct 2014 15:28:23 -0700
Subject: [PATCH 0964/1185] mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set

commit 934f3072c17cc8886f4c043b47eeeb1b12f8de33 upstream.

commit 21caf2fc1931 ("mm: teach mm by current context info to not do I/O
during memory allocation") introduces PF_MEMALLOC_NOIO flag to avoid doing
I/O inside memory allocation, __GFP_IO is cleared when this flag is set,
but __GFP_FS implies __GFP_IO, it should also be cleared.  Or it may still
run into I/O, like in superblock shrinker.  And this will make the kernel
run into the deadlock case described in that commit.

See Dave Chinner's comment about io in superblock shrinker:

Filesystem shrinkers do indeed perform IO from the superblock shrinker and
have for years.  Even clean inodes can require IO before they can be freed
- e.g.  on an orphan list, need truncation of post-eof blocks, need to
wait for ordered operations to complete before it can be freed, etc.

IOWs, Ext4, btrfs and XFS all can issue and/or block on arbitrary amounts
of IO in the superblock shrinker context.  XFS, in particular, has been
doing transactions and IO from the VFS inode cache shrinker since it was
first introduced....

Fix this by clearing __GFP_FS in memalloc_noio_flags(), this function has
masked all the gfp_mask that will be passed into fs for the processes
setting PF_MEMALLOC_NOIO in the direct reclaim path.

v1 thread at: https://lkml.org/lkml/2014/9/3/32

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: joyce.xue <xuejiufei@huawei.com>
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sched.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8293545ac9b7..f87e9a8d364f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1670,11 +1670,13 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
-/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags */
+/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
+ * __GFP_FS is also cleared as it implies __GFP_IO.
+ */
 static inline gfp_t memalloc_noio_flags(gfp_t flags)
 {
 	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
-		flags &= ~__GFP_IO;
+		flags &= ~(__GFP_IO | __GFP_FS);
 	return flags;
 }
 

From 9345b4559ec8826ec13dd91d95a8cab81748c307 Mon Sep 17 00:00:00 2001
From: Yann Droneaud <ydroneaud@opteya.com>
Date: Thu, 9 Oct 2014 15:24:40 -0700
Subject: [PATCH 0965/1185] fanotify: enable close-on-exec on events' fd when
 requested in fanotify_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 0b37e097a648aa71d4db1ad108001e95b69a2da4 upstream.

According to commit 80af258867648 ("fanotify: groups can specify their
f_flags for new fd"), file descriptors created as part of file access
notification events inherit flags from the event_f_flags argument passed
to syscall fanotify_init(2)[1].

Unfortunately O_CLOEXEC is currently silently ignored.

Indeed, event_f_flags are only given to dentry_open(), which only seems to
care about O_ACCMODE and O_PATH in do_dentry_open(), O_DIRECT in
open_check_o_direct() and O_LARGEFILE in generic_file_open().

It's a pity, since, according to some lookup on various search engines and
http://codesearch.debian.net/, there's already some userspace code which
use O_CLOEXEC:

- in systemd's readahead[2]:

    fanotify_fd = fanotify_init(FAN_CLOEXEC|FAN_NONBLOCK, O_RDONLY|O_LARGEFILE|O_CLOEXEC|O_NOATIME);

- in clsync[3]:

    #define FANOTIFY_EVFLAGS (O_LARGEFILE|O_RDONLY|O_CLOEXEC)

    int fanotify_d = fanotify_init(FANOTIFY_FLAGS, FANOTIFY_EVFLAGS);

- in examples [4] from "Filesystem monitoring in the Linux
  kernel" article[5] by Aleksander Morgado:

    if ((fanotify_fd = fanotify_init (FAN_CLOEXEC,
                                      O_RDONLY | O_CLOEXEC | O_LARGEFILE)) < 0)

Additionally, since commit 48149e9d3a7e ("fanotify: check file flags
passed in fanotify_init").  having O_CLOEXEC as part of fanotify_init()
second argument is expressly allowed.

So it seems expected to set close-on-exec flag on the file descriptors if
userspace is allowed to request it with O_CLOEXEC.

But Andrew Morton raised[6] the concern that enabling now close-on-exec
might break existing applications which ask for O_CLOEXEC but expect the
file descriptor to be inherited across exec().

In the other hand, as reported by Mihai Dontu[7] close-on-exec on the file
descriptor returned as part of file access notify can break applications
due to deadlock.  So close-on-exec is needed for most applications.

More, applications asking for close-on-exec are likely expecting it to be
enabled, relying on O_CLOEXEC being effective.  If not, it might weaken
their security, as noted by Jan Kara[8].

So this patch replaces call to macro get_unused_fd() by a call to function
get_unused_fd_flags() with event_f_flags value as argument.  This way
O_CLOEXEC flag in the second argument of fanotify_init(2) syscall is
interpreted and close-on-exec get enabled when requested.

[1] http://man7.org/linux/man-pages/man2/fanotify_init.2.html
[2] http://cgit.freedesktop.org/systemd/systemd/tree/src/readahead/readahead-collect.c?id=v208#n294
[3] https://github.com/xaionaro/clsync/blob/v0.2.1/sync.c#L1631
    https://github.com/xaionaro/clsync/blob/v0.2.1/configuration.h#L38
[4] http://www.lanedo.com/~aleksander/fanotify/fanotify-example.c
[5] http://www.lanedo.com/2013/filesystem-monitoring-linux-kernel/
[6] http://lkml.kernel.org/r/20141001153621.65e9258e65a6167bf2e4cb50@linux-foundation.org
[7] http://lkml.kernel.org/r/20141002095046.3715eb69@mdontu-l
[8] http://lkml.kernel.org/r/20141002104410.GB19748@quack.suse.cz

Link: http://lkml.kernel.org/r/cover.1411562410.git.ydroneaud@opteya.com
Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Tested-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Cc: Mihai Don\u021bu <mihai.dontu@gmail.com>
Cc: Pádraig Brady <P@draigBrady.com>
Cc: Heinrich Schuchardt <xypron.glpk@gmx.de>
Cc: Jan Kara <jack@suse.cz>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Michael Kerrisk-manpages <mtk.manpages@gmail.com>
Cc: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Cc: Richard Guy Briggs <rgb@redhat.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/notify/fanotify/fanotify_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index f1680cdbd88b..9be6b4163406 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -69,7 +69,7 @@ static int create_fd(struct fsnotify_group *group,
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	client_fd = get_unused_fd();
+	client_fd = get_unused_fd_flags(group->fanotify_data.f_flags);
 	if (client_fd < 0)
 		return client_fd;
 

From 42abc5125d0cd1abba9d21133010dcea1f3a0e8f Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 13 Oct 2014 15:51:05 -0700
Subject: [PATCH 0966/1185] kernel: add support for gcc 5

commit 71458cfc782eafe4b27656e078d379a34e472adf upstream.

We're missing include/linux/compiler-gcc5.h which is required now
because gcc branched off to v5 in trunk.

Just copy the relevant bits out of include/linux/compiler-gcc4.h,
no new code is added as of now.

This fixes a build error when using gcc 5.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-gcc5.h | 66 +++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 include/linux/compiler-gcc5.h

diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h
new file mode 100644
index 000000000000..cdd1cc202d51
--- /dev/null
+++ b/include/linux/compiler-gcc5.h
@@ -0,0 +1,66 @@
+#ifndef __LINUX_COMPILER_H
+#error "Please don't include <linux/compiler-gcc5.h> directly, include <linux/compiler.h> instead."
+#endif
+
+#define __used				__attribute__((__used__))
+#define __must_check			__attribute__((warn_unused_result))
+#define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
+
+/* Mark functions as cold. gcc will assume any path leading to a call
+   to them will be unlikely.  This means a lot of manual unlikely()s
+   are unnecessary now for any paths leading to the usual suspects
+   like BUG(), printk(), panic() etc. [but let's keep them for now for
+   older compilers]
+
+   Early snapshots of gcc 4.3 don't support this and we can't detect this
+   in the preprocessor, but we can live with this because they're unreleased.
+   Maketime probing would be overkill here.
+
+   gcc also has a __attribute__((__hot__)) to move hot functions into
+   a special section, but I don't see any sense in this right now in
+   the kernel context */
+#define __cold			__attribute__((__cold__))
+
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+#ifndef __CHECKER__
+# define __compiletime_warning(message) __attribute__((warning(message)))
+# define __compiletime_error(message) __attribute__((error(message)))
+#endif /* __CHECKER__ */
+
+/*
+ * Mark a position in code as unreachable.  This can be used to
+ * suppress control flow warnings after asm blocks that transfer
+ * control elsewhere.
+ *
+ * Early snapshots of gcc 4.5 don't support this and we can't detect
+ * this in the preprocessor, but we can live with this because they're
+ * unreleased.  Really, we need to have autoconf for the kernel.
+ */
+#define unreachable() __builtin_unreachable()
+
+/* Mark a function definition as prohibited from being cloned. */
+#define __noclone	__attribute__((__noclone__))
+
+/*
+ * Tell the optimizer that something else uses this function or variable.
+ */
+#define __visible __attribute__((externally_visible))
+
+/*
+ * GCC 'asm goto' miscompiles certain code sequences:
+ *
+ *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
+ * Fixed in GCC 4.8.2 and later versions.
+ *
+ * (asm goto is automatically volatile - the naming reflects this.)
+ */
+#define asm_volatile_goto(x...)	do { asm goto(x); asm (""); } while (0)
+
+#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#define __HAVE_BUILTIN_BSWAP16__
+#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */

From 9fa1d75bead1cfe2718d4e81fd039ea4006da745 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 18 Sep 2014 20:08:53 +0300
Subject: [PATCH 0967/1185] spi: dw-mid: terminate ongoing transfers at exit

commit 8e45ef682cb31fda62ed4eeede5d9745a0a1b1e2 upstream.

Do full clean up at exit, means terminate all ongoing DMA transfers.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-dw-mid.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index b8ac40c8a22d..0791c92e8c50 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -91,7 +91,11 @@ static void mid_spi_dma_exit(struct dw_spi *dws)
 {
 	if (!dws->dma_inited)
 		return;
+
+	dmaengine_terminate_all(dws->txchan);
 	dma_release_channel(dws->txchan);
+
+	dmaengine_terminate_all(dws->rxchan);
 	dma_release_channel(dws->rxchan);
 }
 

From 9f81e4deb5a6807fc2495e61ce080df7de51e3b8 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Tue, 14 Oct 2014 06:55:05 +0100
Subject: [PATCH 0968/1185] arm64: compat: fix compat types affecting struct
 compat_elf_prpsinfo

commit 971a5b6fe634bb7b617d8c5f25b6a3ddbc600194 upstream.

The compat_elf_prpsinfo structure does not match the arch/arm struct
elf_pspsinfo definition. As result NT_PRPSINFO note in core file
created by arm64 kernel for aarch32 (compat) process has wrong size.
So gdb cannot display command that caused process crash.

Fix is to change size of __compat_uid_t, __compat_gid_t so it would
match size of similar fields in arch/arm case.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/compat.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 899af807ef0f..c30a548cee56 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -33,8 +33,8 @@ typedef s32		compat_ssize_t;
 typedef s32		compat_time_t;
 typedef s32		compat_clock_t;
 typedef s32		compat_pid_t;
-typedef u32		__compat_uid_t;
-typedef u32		__compat_gid_t;
+typedef u16		__compat_uid_t;
+typedef u16		__compat_gid_t;
 typedef u16		__compat_uid16_t;
 typedef u16		__compat_gid16_t;
 typedef u32		__compat_uid32_t;

From aff3c48530a9bd58edf42d94684234244c957a6b Mon Sep 17 00:00:00 2001
From: Anatol Pomozov <anatol.pomozov@gmail.com>
Date: Fri, 17 Oct 2014 12:43:34 -0700
Subject: [PATCH 0969/1185] ALSA: pcm: use the same dma mmap codepath both for
 arm and arm64

commit a011e213f3700233ed2a676f1ef0a74a052d7162 upstream.

This avoids following kernel crash when try to playback on arm64

[  107.497203] [<ffffffc00046b310>] snd_pcm_mmap_data_fault+0x90/0xd4
[  107.503405] [<ffffffc0001541ac>] __do_fault+0xb0/0x498
[  107.508565] [<ffffffc0001576a0>] handle_mm_fault+0x224/0x7b0
[  107.514246] [<ffffffc000092640>] do_page_fault+0x11c/0x310
[  107.519738] [<ffffffc000081100>] do_mem_abort+0x38/0x98

Tested: backported to 3.14 and tried to playback on arm64 machine

Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/pcm_native.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index f92818155958..175dca44c97e 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3197,7 +3197,7 @@ static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = {
 
 #ifndef ARCH_HAS_DMA_MMAP_COHERENT
 /* This should be defined / handled globally! */
-#ifdef CONFIG_ARM
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
 #define ARCH_HAS_DMA_MMAP_COHERENT
 #endif
 #endif

From 8205fae0f07db86ca05108a478d5f8b7e5fe2737 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 13 Oct 2014 23:18:02 +0200
Subject: [PATCH 0970/1185] ALSA: emu10k1: Fix deadlock in synth voice lookup

commit 95926035b187cc9fee6fb61385b7da9c28123f74 upstream.

The emu10k1 voice allocator takes voice_lock spinlock.  When there is
no empty stream available, it tries to release a voice used by synth,
and calls get_synth_voice.  The callback function,
snd_emu10k1_synth_get_voice(), however, also takes the voice_lock,
thus it deadlocks.

The fix is simply removing the voice_lock holds in
snd_emu10k1_synth_get_voice(), as this is always called in the
spinlock context.

Reported-and-tested-by: Arthur Marsh <arthur.marsh@internode.on.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/emu10k1/emu10k1_callback.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c
index cae36597aa71..0a34b5f1c475 100644
--- a/sound/pci/emu10k1/emu10k1_callback.c
+++ b/sound/pci/emu10k1/emu10k1_callback.c
@@ -85,6 +85,8 @@ snd_emu10k1_ops_setup(struct snd_emux *emux)
  * get more voice for pcm
  *
  * terminate most inactive voice and give it as a pcm voice.
+ *
+ * voice_lock is already held.
  */
 int
 snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw)
@@ -92,12 +94,10 @@ snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw)
 	struct snd_emux *emu;
 	struct snd_emux_voice *vp;
 	struct best_voice best[V_END];
-	unsigned long flags;
 	int i;
 
 	emu = hw->synth;
 
-	spin_lock_irqsave(&emu->voice_lock, flags);
 	lookup_voices(emu, hw, best, 1); /* no OFF voices */
 	for (i = 0; i < V_END; i++) {
 		if (best[i].voice >= 0) {
@@ -113,11 +113,9 @@ snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw)
 			vp->emu->num_voices--;
 			vp->ch = -1;
 			vp->state = SNDRV_EMUX_ST_OFF;
-			spin_unlock_irqrestore(&emu->voice_lock, flags);
 			return ch;
 		}
 	}
-	spin_unlock_irqrestore(&emu->voice_lock, flags);
 
 	/* not found */
 	return -ENOMEM;

From 4148f813c2ef303eb500a6b9a360783c3217e810 Mon Sep 17 00:00:00 2001
From: Vlad Catoi <vladcatoi@gmail.com>
Date: Sat, 18 Oct 2014 17:45:41 -0500
Subject: [PATCH 0971/1185] ALSA: usb-audio: Add support for Steinberg UR22 USB
 interface

commit f0b127fbfdc8756eba7437ab668f3169280bd358 upstream.

Adding support for Steinberg UR22 USB interface via quirks table patch

See Ubuntu bug report:
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1317244
Also see threads:
http://linux-audio.4202.n7.nabble.com/Support-for-Steinberg-UR22-Yamaha-USB-chipset-0499-1509-tc82888.html#a82917
http://www.steinberg.net/forums/viewtopic.php?t=62290

Tested by at least 4 people judging by the threads.
Did not test MIDI interface, but audio output and capture both are
functional. Built 3.17 kernel with this driver on Ubuntu 14.04 & tested with mpg123
Patch applied to 3.13 Ubuntu kernel works well enough for daily use.

Signed-off-by: Vlad Catoi <vladcatoi@gmail.com>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks-table.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 8b75bcf136f6..d5bed1d25713 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -385,6 +385,36 @@ YAMAHA_DEVICE(0x105d, NULL),
 		}
 	}
 },
+{
+	USB_DEVICE(0x0499, 0x1509),
+	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+		/* .vendor_name = "Yamaha", */
+		/* .product_name = "Steinberg UR22", */
+		.ifnum = QUIRK_ANY_INTERFACE,
+		.type = QUIRK_COMPOSITE,
+		.data = (const struct snd_usb_audio_quirk[]) {
+			{
+				.ifnum = 1,
+				.type = QUIRK_AUDIO_STANDARD_INTERFACE
+			},
+			{
+				.ifnum = 2,
+				.type = QUIRK_AUDIO_STANDARD_INTERFACE
+			},
+			{
+				.ifnum = 3,
+				.type = QUIRK_MIDI_YAMAHA
+			},
+			{
+				.ifnum = 4,
+				.type = QUIRK_IGNORE_INTERFACE
+			},
+			{
+				.ifnum = -1
+			}
+		}
+	}
+},
 {
 	USB_DEVICE(0x0499, 0x150a),
 	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {

From 26aca22946dc127ee5f72d031aa6dfe7f1328b44 Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@atmel.com>
Date: Mon, 22 Sep 2014 15:51:33 +0200
Subject: [PATCH 0972/1185] ARM: at91/PMC: don't forget to write PMC_PCDR
 register to disable clocks

commit cfa1950e6c6b72251e80adc736af3c3d2907ab0e upstream.

When introducing support for sama5d3, the write to PMC_PCDR register has
been accidentally removed.

Reported-by: Nathalie Cyrille <nathalie.cyrille@atmel.com>
Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-at91/clock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c
index da841885d01c..64f9f1045539 100644
--- a/arch/arm/mach-at91/clock.c
+++ b/arch/arm/mach-at91/clock.c
@@ -947,6 +947,7 @@ static int __init at91_clock_reset(void)
 	}
 
 	at91_pmc_write(AT91_PMC_SCDR, scdr);
+	at91_pmc_write(AT91_PMC_PCDR, pcdr);
 	if (cpu_is_sama5d3())
 		at91_pmc_write(AT91_PMC_PCDR1, pcdr1);
 

From f52c88e8e124f72eca2dc700c476b2b38f288f28 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Thu, 24 Jul 2014 17:25:42 +0800
Subject: [PATCH 0973/1185] ecryptfs: avoid to access NULL pointer when write
 metadata in xattr

commit 35425ea2492175fd39f6116481fe98b2b3ddd4ca upstream.

Christopher Head 2014-06-28 05:26:20 UTC described:
"I tried to reproduce this on 3.12.21. Instead, when I do "echo hello > foo"
in an ecryptfs mount with ecryptfs_xattr specified, I get a kernel crash:

BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: [<ffffffff8110eb39>] fsstack_copy_attr_all+0x2/0x61
PGD d7840067 PUD b2c3c067 PMD 0
Oops: 0002 [#1] SMP
Modules linked in: nvidia(PO)
CPU: 3 PID: 3566 Comm: bash Tainted: P           O 3.12.21-gentoo-r1 #2
Hardware name: ASUSTek Computer Inc. G60JX/G60JX, BIOS 206 03/15/2010
task: ffff8801948944c0 ti: ffff8800bad70000 task.ti: ffff8800bad70000
RIP: 0010:[<ffffffff8110eb39>]  [<ffffffff8110eb39>] fsstack_copy_attr_all+0x2/0x61
RSP: 0018:ffff8800bad71c10  EFLAGS: 00010246
RAX: 00000000000181a4 RBX: ffff880198648480 RCX: 0000000000000000
RDX: 0000000000000004 RSI: ffff880172010450 RDI: 0000000000000000
RBP: ffff880198490e40 R08: 0000000000000000 R09: 0000000000000000
R10: ffff880172010450 R11: ffffea0002c51e80 R12: 0000000000002000
R13: 000000000000001a R14: 0000000000000000 R15: ffff880198490e40
FS:  00007ff224caa700(0000) GS:ffff88019fcc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 00000000bb07f000 CR4: 00000000000007e0
Stack:
ffffffff811826e8 ffff8800a39d8000 0000000000000000 000000000000001a
ffff8800a01d0000 ffff8800a39d8000 ffffffff81185fd5 ffffffff81082c2c
00000001a39d8000 53d0abbc98490e40 0000000000000037 ffff8800a39d8220
Call Trace:
[<ffffffff811826e8>] ? ecryptfs_setxattr+0x40/0x52
[<ffffffff81185fd5>] ? ecryptfs_write_metadata+0x1b3/0x223
[<ffffffff81082c2c>] ? should_resched+0x5/0x23
[<ffffffff8118322b>] ? ecryptfs_initialize_file+0xaf/0xd4
[<ffffffff81183344>] ? ecryptfs_create+0xf4/0x142
[<ffffffff810f8c0d>] ? vfs_create+0x48/0x71
[<ffffffff810f9c86>] ? do_last.isra.68+0x559/0x952
[<ffffffff810f7ce7>] ? link_path_walk+0xbd/0x458
[<ffffffff810fa2a3>] ? path_openat+0x224/0x472
[<ffffffff810fa7bd>] ? do_filp_open+0x2b/0x6f
[<ffffffff81103606>] ? __alloc_fd+0xd6/0xe7
[<ffffffff810ee6ab>] ? do_sys_open+0x65/0xe9
[<ffffffff8157d022>] ? system_call_fastpath+0x16/0x1b
RIP  [<ffffffff8110eb39>] fsstack_copy_attr_all+0x2/0x61
RSP <ffff8800bad71c10>
CR2: 0000000000000000
---[ end trace df9dba5f1ddb8565 ]---"

If we create a file when we mount with ecryptfs_xattr_metadata option, we will
encounter a crash in this path:
->ecryptfs_create
  ->ecryptfs_initialize_file
    ->ecryptfs_write_metadata
      ->ecryptfs_write_metadata_to_xattr
        ->ecryptfs_setxattr
          ->fsstack_copy_attr_all
It's because our dentry->d_inode used in fsstack_copy_attr_all is NULL, and it
will be initialized when ecryptfs_initialize_file finish.

So we should skip copying attr from lower inode when the value of ->d_inode is
invalid.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ecryptfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5eab400e2590..41baf8b5e0eb 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1051,7 +1051,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 	}
 
 	rc = vfs_setxattr(lower_dentry, name, value, size, flags);
-	if (!rc)
+	if (!rc && dentry->d_inode)
 		fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
 out:
 	return rc;

From 816b571ac0e9eb9700df1ebc99702f9ad04e8607 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 30 Oct 2014 09:35:42 -0700
Subject: [PATCH 0974/1185] Linux 3.10.59

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index c27454b8ca3e..7baf27f5cf0f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 58
+SUBLEVEL = 59
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From 96b56157b38cffced997ffa71fdd4bf71024fc67 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@linaro.org>
Date: Thu, 30 Oct 2014 18:01:15 +0000
Subject: [PATCH 0975/1185] gator: Version 5.20

Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 drivers/gator/Kconfig                         |   4 +-
 drivers/gator/Makefile                        |  22 +-
 drivers/gator/gator.h                         |  41 +-
 drivers/gator/gator_annotate.c                |  67 +--
 drivers/gator/gator_annotate_kernel.c         |  30 +-
 drivers/gator/gator_backtrace.c               |  66 +--
 drivers/gator/gator_buffer.c                  |  71 +--
 drivers/gator/gator_buffer_write.c            |  19 +-
 drivers/gator/gator_cookies.c                 |  67 ++-
 drivers/gator/gator_events_armv6.c            |  47 +-
 drivers/gator/gator_events_armv7.c            |  90 ++--
 drivers/gator/gator_events_block.c            |  29 +-
 drivers/gator/gator_events_ccn-504.c          | 346 ------------
 drivers/gator/gator_events_irq.c              |  14 +-
 drivers/gator/gator_events_l2c-310.c          |   6 +-
 drivers/gator/gator_events_mali_4xx.c         | 181 +++----
 drivers/gator/gator_events_mali_common.c      |  21 +-
 drivers/gator/gator_events_mali_common.h      |  17 +-
 ...ali_t6xx.c => gator_events_mali_midgard.c} | 147 +++---
 ...xx_hw.c => gator_events_mali_midgard_hw.c} | 494 ++++++++++--------
 ....c => gator_events_mali_midgard_hw_test.c} |   8 +-
 drivers/gator/gator_events_meminfo.c          | 201 ++++---
 drivers/gator/gator_events_mmapped.c          |   6 +-
 drivers/gator/gator_events_net.c              |  22 +-
 drivers/gator/gator_events_perf_pmu.c         | 207 ++++----
 drivers/gator/gator_events_sched.c            |  14 +-
 drivers/gator/gator_events_scorpion.c         |  81 +--
 drivers/gator/gator_events_threads.c          | 115 ----
 drivers/gator/gator_fs.c                      |   9 +-
 drivers/gator/gator_hrtimer_gator.c           |   8 +-
 drivers/gator/gator_iks.c                     |  22 +-
 drivers/gator/gator_main.c                    | 349 +++++++------
 drivers/gator/gator_marshaling.c              |  75 +--
 drivers/gator/gator_trace_gpu.c               |  88 ++--
 drivers/gator/gator_trace_power.c             |  61 +--
 drivers/gator/gator_trace_sched.c             | 104 ++--
 drivers/gator/mali/mali_dd_gator_api.h        |  40 --
 drivers/gator/mali/mali_kbase_gator_api.h     | 219 ++++++++
 .../mali/mali_mjollnir_profiling_gator_api.h  | 178 +++----
 .../mali/mali_utgard_profiling_gator_api.h    | 196 ++++---
 .../gator/{mali_t6xx.mk => mali_midgard.mk}   |   4 +-
 tools/gator/daemon/Android.mk                 |  17 +-
 tools/gator/daemon/AnnotateListener.cpp       |  69 +++
 tools/gator/daemon/AnnotateListener.h         |  31 ++
 tools/gator/daemon/Application.mk             |   2 +
 tools/gator/daemon/Buffer.cpp                 | 215 ++++----
 tools/gator/daemon/Buffer.h                   |  38 +-
 tools/gator/daemon/CCNDriver.cpp              | 295 +++++++++++
 tools/gator/daemon/CCNDriver.h                |  43 ++
 tools/gator/daemon/CPUFreqDriver.cpp          |  58 ++
 tools/gator/daemon/CPUFreqDriver.h            |  34 ++
 tools/gator/daemon/CapturedXML.cpp            |  17 +-
 tools/gator/daemon/CapturedXML.h              |   4 +-
 tools/gator/daemon/Child.cpp                  |  82 ++-
 tools/gator/daemon/Child.h                    |   4 +-
 tools/gator/daemon/Command.cpp                | 172 ++++++
 tools/gator/daemon/Command.h                  |  14 +
 tools/gator/daemon/Config.h                   |  13 +-
 tools/gator/daemon/DiskIODriver.cpp           | 125 +++++
 tools/gator/daemon/DiskIODriver.h             |  39 ++
 tools/gator/daemon/Driver.cpp                 |  81 +++
 tools/gator/daemon/Driver.h                   |  76 ++-
 tools/gator/daemon/DriverSource.cpp           |  19 +-
 tools/gator/daemon/DynBuf.cpp                 |   2 +-
 tools/gator/daemon/EventsXML.cpp              |   2 +-
 tools/gator/daemon/ExternalSource.cpp         |  87 ++-
 tools/gator/daemon/ExternalSource.h           |   8 +-
 tools/gator/daemon/FSDriver.cpp               | 140 ++---
 tools/gator/daemon/FSDriver.h                 |  19 +-
 tools/gator/daemon/Fifo.cpp                   |   5 +-
 tools/gator/daemon/Fifo.h                     |  10 +-
 tools/gator/daemon/FtraceDriver.cpp           | 118 +++++
 tools/gator/daemon/FtraceDriver.h             |  31 ++
 tools/gator/daemon/FtraceSource.cpp           | 158 ++++++
 tools/gator/daemon/FtraceSource.h             |  43 ++
 tools/gator/daemon/Hwmon.h                    |  45 --
 .../daemon/{Hwmon.cpp => HwmonDriver.cpp}     | 213 ++------
 tools/gator/daemon/HwmonDriver.h              |  31 ++
 tools/gator/daemon/KMod.cpp                   |   9 +
 tools/gator/daemon/KMod.h                     |   7 +-
 tools/gator/daemon/LocalCapture.h             |   4 +-
 tools/gator/daemon/Logging.cpp                |  18 +-
 tools/gator/daemon/Logging.h                  |  12 +-
 tools/gator/daemon/Makefile                   |   5 +-
 tools/gator/daemon/MaliVideoDriver.cpp        | 150 ++----
 tools/gator/daemon/MaliVideoDriver.h          |  17 +-
 tools/gator/daemon/MemInfoDriver.cpp          |  93 ++++
 tools/gator/daemon/MemInfoDriver.h            |  37 ++
 tools/gator/daemon/Monitor.cpp                |  14 +
 tools/gator/daemon/NetDriver.cpp              | 129 +++++
 tools/gator/daemon/NetDriver.h                |  39 ++
 tools/gator/daemon/OlySocket.cpp              |  54 +-
 tools/gator/daemon/OlySocket.h                |   9 +
 tools/gator/daemon/PerfBuffer.cpp             |  65 ++-
 tools/gator/daemon/PerfBuffer.h               |   4 +-
 tools/gator/daemon/PerfDriver.cpp             | 194 +++----
 tools/gator/daemon/PerfDriver.h               |  15 +-
 tools/gator/daemon/PerfGroup.cpp              |  66 ++-
 tools/gator/daemon/PerfGroup.h                |  14 +-
 tools/gator/daemon/PerfSource.cpp             | 306 ++++++++---
 tools/gator/daemon/PerfSource.h               |   3 +-
 tools/gator/daemon/Proc.cpp                   | 137 ++++-
 tools/gator/daemon/Proc.h                     |   6 +-
 tools/gator/daemon/Sender.cpp                 |   7 +-
 tools/gator/daemon/Sender.h                   |   4 +-
 tools/gator/daemon/SessionData.cpp            |  91 +++-
 tools/gator/daemon/SessionData.h              |  44 +-
 tools/gator/daemon/SessionXML.cpp             |  38 +-
 tools/gator/daemon/SessionXML.h               |  12 +-
 tools/gator/daemon/Setup.cpp                  | 232 ++++++++
 tools/gator/daemon/Setup.h                    |  18 +
 tools/gator/daemon/StreamlineSetup.h          |   4 +-
 tools/gator/daemon/UEvent.cpp                 |   3 +-
 tools/gator/daemon/UserSpaceSource.cpp        |  23 +-
 tools/gator/daemon/defaults.xml               |  33 +-
 ...s-Cortex-A12.xml => events-Cortex-A17.xml} |   8 +-
 tools/gator/daemon/events-Filesystem.xml      |  12 +-
 tools/gator/daemon/events-Linux.xml           |  11 +-
 tools/gator/daemon/events-Mali-Midgard.xml    |  46 ++
 tools/gator/daemon/events-Mali-Midgard_hw.xml |  91 ++++
 tools/gator/daemon/events-Mali-T60x_hw.xml    | 108 ++++
 tools/gator/daemon/events-Mali-T62x_hw.xml    | 109 ++++
 tools/gator/daemon/events-Mali-T6xx.xml       |  46 --
 tools/gator/daemon/events-Mali-T6xx_hw.xml    |  91 ----
 tools/gator/daemon/events-Mali-T72x_hw.xml    |  95 ++++
 tools/gator/daemon/events-Mali-T76x_hw.xml    | 108 ++++
 tools/gator/daemon/events-Mali-V500.xml       |  55 +-
 tools/gator/daemon/events-ftrace.xml          |   7 +
 tools/gator/daemon/main.cpp                   |  95 +++-
 tools/gator/daemon/mxml/config.h              |  10 +-
 tools/gator/daemon/mxml/mxml-attr.c           |  19 +-
 tools/gator/daemon/mxml/mxml-entity.c         |  19 +-
 tools/gator/daemon/mxml/mxml-file.c           | 104 ++--
 tools/gator/daemon/mxml/mxml-get.c            |  27 +-
 tools/gator/daemon/mxml/mxml-index.c          |  11 +-
 tools/gator/daemon/mxml/mxml-node.c           |  31 +-
 tools/gator/daemon/mxml/mxml-private.c        |  42 +-
 tools/gator/daemon/mxml/mxml-private.h        |   8 +-
 tools/gator/daemon/mxml/mxml-search.c         |  17 +-
 tools/gator/daemon/mxml/mxml-set.c            |  20 +-
 tools/gator/daemon/mxml/mxml-string.c         |  21 +-
 tools/gator/daemon/mxml/mxml.h                |  17 +-
 142 files changed, 5872 insertions(+), 3418 deletions(-)
 delete mode 100644 drivers/gator/gator_events_ccn-504.c
 rename drivers/gator/{gator_events_mali_t6xx.c => gator_events_mali_midgard.c} (78%)
 rename drivers/gator/{gator_events_mali_t6xx_hw.c => gator_events_mali_midgard_hw.c} (59%)
 rename drivers/gator/{gator_events_mali_t6xx_hw_test.c => gator_events_mali_midgard_hw_test.c} (88%)
 delete mode 100644 drivers/gator/gator_events_threads.c
 delete mode 100644 drivers/gator/mali/mali_dd_gator_api.h
 create mode 100644 drivers/gator/mali/mali_kbase_gator_api.h
 rename drivers/gator/{mali_t6xx.mk => mali_midgard.mk} (91%)
 create mode 100644 tools/gator/daemon/AnnotateListener.cpp
 create mode 100644 tools/gator/daemon/AnnotateListener.h
 create mode 100644 tools/gator/daemon/CCNDriver.cpp
 create mode 100644 tools/gator/daemon/CCNDriver.h
 create mode 100644 tools/gator/daemon/CPUFreqDriver.cpp
 create mode 100644 tools/gator/daemon/CPUFreqDriver.h
 create mode 100644 tools/gator/daemon/Command.cpp
 create mode 100644 tools/gator/daemon/Command.h
 create mode 100644 tools/gator/daemon/DiskIODriver.cpp
 create mode 100644 tools/gator/daemon/DiskIODriver.h
 create mode 100644 tools/gator/daemon/FtraceDriver.cpp
 create mode 100644 tools/gator/daemon/FtraceDriver.h
 create mode 100644 tools/gator/daemon/FtraceSource.cpp
 create mode 100644 tools/gator/daemon/FtraceSource.h
 delete mode 100644 tools/gator/daemon/Hwmon.h
 rename tools/gator/daemon/{Hwmon.cpp => HwmonDriver.cpp} (51%)
 create mode 100644 tools/gator/daemon/HwmonDriver.h
 create mode 100644 tools/gator/daemon/MemInfoDriver.cpp
 create mode 100644 tools/gator/daemon/MemInfoDriver.h
 create mode 100644 tools/gator/daemon/NetDriver.cpp
 create mode 100644 tools/gator/daemon/NetDriver.h
 create mode 100644 tools/gator/daemon/Setup.cpp
 create mode 100644 tools/gator/daemon/Setup.h
 rename tools/gator/daemon/{events-Cortex-A12.xml => events-Cortex-A17.xml} (97%)
 create mode 100644 tools/gator/daemon/events-Mali-Midgard.xml
 create mode 100644 tools/gator/daemon/events-Mali-Midgard_hw.xml
 create mode 100644 tools/gator/daemon/events-Mali-T60x_hw.xml
 create mode 100644 tools/gator/daemon/events-Mali-T62x_hw.xml
 delete mode 100644 tools/gator/daemon/events-Mali-T6xx.xml
 delete mode 100644 tools/gator/daemon/events-Mali-T6xx_hw.xml
 create mode 100644 tools/gator/daemon/events-Mali-T72x_hw.xml
 create mode 100644 tools/gator/daemon/events-Mali-T76x_hw.xml
 create mode 100644 tools/gator/daemon/events-ftrace.xml

diff --git a/drivers/gator/Kconfig b/drivers/gator/Kconfig
index e46ccb9b8064..b2358bbc1293 100644
--- a/drivers/gator/Kconfig
+++ b/drivers/gator/Kconfig
@@ -24,8 +24,8 @@ config GATOR_MALI_4XXMP
 	bool "Mali-400MP or Mali-450MP"
 	select GATOR_WITH_MALI_SUPPORT
 
-config GATOR_MALI_T6XX
-	bool "Mali-T604 or Mali-T658"
+config GATOR_MALI_MIDGARD
+	bool "Mali-T60x, Mali-T62x, Mali-T72x or Mali-T76x"
 	select GATOR_WITH_MALI_SUPPORT
 
 endchoice
diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile
index 2f86823313c6..28d2070b11d5 100644
--- a/drivers/gator/Makefile
+++ b/drivers/gator/Makefile
@@ -14,17 +14,16 @@ gator-y :=	gator_main.o \
 		gator_events_net.o \
 		gator_events_perf_pmu.o \
 		gator_events_sched.o \
-		gator_events_threads.o \
 
 # Convert the old GATOR_WITH_MALI_SUPPORT to the new kernel flags
 ifneq ($(GATOR_WITH_MALI_SUPPORT),)
   CONFIG_GATOR_WITH_MALI_SUPPORT := y
-  ifeq ($(GATOR_WITH_MALI_SUPPORT),MALI_T6xx)
+  ifeq ($(GATOR_WITH_MALI_SUPPORT),MALI_MIDGARD)
     CONFIG_GATOR_MALI_4XXMP := n
-    CONFIG_GATOR_MALI_T6XX := y
+    CONFIG_GATOR_MALI_MIDGARD := y
   else
     CONFIG_GATOR_MALI_4XXMP := y
-    CONFIG_GATOR_MALI_T6XX := n
+    CONFIG_GATOR_MALI_MIDGARD := n
   endif
   EXTRA_CFLAGS += -DMALI_SUPPORT=$(GATOR_WITH_MALI_SUPPORT)
   ifneq ($(GATOR_MALI_INTERFACE_STYLE),)
@@ -33,10 +32,10 @@ ifneq ($(GATOR_WITH_MALI_SUPPORT),)
 endif
 
 ifeq ($(CONFIG_GATOR_WITH_MALI_SUPPORT),y)
-  ifeq ($(CONFIG_GATOR_MALI_T6XX),y)
-    gator-y +=	gator_events_mali_t6xx.o \
-		gator_events_mali_t6xx_hw.o
-    include $(src)/mali_t6xx.mk
+  ifeq ($(CONFIG_GATOR_MALI_MIDGARD),y)
+    gator-y +=	gator_events_mali_midgard.o \
+		gator_events_mali_midgard_hw.o
+    include $(src)/mali_midgard.mk
   else
     gator-y +=	gator_events_mali_4xx.o
   endif
@@ -46,7 +45,7 @@ ifeq ($(CONFIG_GATOR_WITH_MALI_SUPPORT),y)
     ccflags-y += -I$(CONFIG_GATOR_MALI_PATH)
   endif
   ccflags-$(CONFIG_GATOR_MALI_4XXMP) += -DMALI_SUPPORT=MALI_4xx
-  ccflags-$(CONFIG_GATOR_MALI_T6XX) += -DMALI_SUPPORT=MALI_T6xx
+  ccflags-$(CONFIG_GATOR_MALI_MIDGARD) += -DMALI_SUPPORT=MALI_MIDGARD
 endif
 
 # GATOR_TEST controls whether to include (=1) or exclude (=0) test code.
@@ -54,16 +53,15 @@ GATOR_TEST ?= 0
 EXTRA_CFLAGS +=	-DGATOR_TEST=$(GATOR_TEST)
 
 # Should the original or new block_rq_complete API be used?
-OLD_BLOCK_RQ_COMPLETE := $(shell grep -A3 block_rq_complete include/trace/events/block.h | grep nr_bytes > /dev/null; echo $$?)
+OLD_BLOCK_RQ_COMPLETE := $(shell grep -A3 block_rq_complete $(srctree)/include/trace/events/block.h | grep nr_bytes -q; echo $$?)
 EXTRA_CFLAGS += -DOLD_BLOCK_RQ_COMPLETE=$(OLD_BLOCK_RQ_COMPLETE)
 
 gator-$(CONFIG_ARM) +=	gator_events_armv6.o \
 			gator_events_armv7.o \
-			gator_events_ccn-504.o \
 			gator_events_l2c-310.o \
 			gator_events_scorpion.o
 
-gator-$(CONFIG_ARM64) +=	gator_events_ccn-504.o
+gator-$(CONFIG_ARM64) +=
 
 else
 
diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h
index 5ad0254d86a9..5cc73a388c4f 100644
--- a/drivers/gator/gator.h
+++ b/drivers/gator/gator.h
@@ -14,13 +14,13 @@
 #include <linux/mm.h>
 #include <linux/list.h>
 
-#define GATOR_PERF_SUPPORT		LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
-#define GATOR_PERF_PMU_SUPPORT  GATOR_PERF_SUPPORT && defined(CONFIG_PERF_EVENTS) && (!(defined(__arm__) || defined(__aarch64__)) || defined(CONFIG_HW_PERF_EVENTS))
+#define GATOR_PERF_SUPPORT      (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0))
+#define GATOR_PERF_PMU_SUPPORT  (GATOR_PERF_SUPPORT && defined(CONFIG_PERF_EVENTS) && (!(defined(__arm__) || defined(__aarch64__)) || defined(CONFIG_HW_PERF_EVENTS)))
 #define GATOR_NO_PERF_SUPPORT   (!(GATOR_PERF_SUPPORT))
-#define GATOR_CPU_FREQ_SUPPORT  (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)) && defined(CONFIG_CPU_FREQ)
+#define GATOR_CPU_FREQ_SUPPORT  ((LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)) && defined(CONFIG_CPU_FREQ))
 #define GATOR_IKS_SUPPORT       defined(CONFIG_BL_SWITCHER)
 
-// cpu ids
+/* cpu ids */
 #define ARM1136     0xb36
 #define ARM1156     0xb56
 #define ARM1176     0xb76
@@ -29,7 +29,6 @@
 #define CORTEX_A7   0xc07
 #define CORTEX_A8   0xc08
 #define CORTEX_A9   0xc09
-#define CORTEX_A12  0xc0d
 #define CORTEX_A15  0xc0f
 #define CORTEX_A17  0xc0e
 #define SCORPION    0x00f
@@ -42,20 +41,20 @@
 #define AARCH64     0xd0f
 #define OTHER       0xfff
 
-// gpu enums
+/* gpu enums */
 #define MALI_4xx     1
-#define MALI_T6xx    2
+#define MALI_MIDGARD 2
 
 #define MAXSIZE_CORE_NAME 32
 
 struct gator_cpu {
 	const int cpuid;
-	// Human readable name
+	/* Human readable name */
 	const char core_name[MAXSIZE_CORE_NAME];
-	// gatorfs event and Perf PMU name
-	const char * const pmnc_name;
-	// compatible from Documentation/devicetree/bindings/arm/cpus.txt
-	const char * const dt_name;
+	/* gatorfs event and Perf PMU name */
+	const char *const pmnc_name;
+	/* compatible from Documentation/devicetree/bindings/arm/cpus.txt */
+	const char *const dt_name;
 	const int pmnc_counters;
 };
 
@@ -98,7 +97,7 @@ int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
 		extern struct tracepoint *gator_tracepoint_##probe_name; \
 		static void probe_##probe_name(void *data, PARAMS(proto))
 #	define GATOR_REGISTER_TRACE(probe_name) \
-		tracepoint_probe_register(gator_tracepoint_##probe_name, probe_##probe_name, NULL)
+		((gator_tracepoint_##probe_name == NULL) || tracepoint_probe_register(gator_tracepoint_##probe_name, probe_##probe_name, NULL))
 #	define GATOR_UNREGISTER_TRACE(probe_name) \
 		tracepoint_probe_unregister(gator_tracepoint_##probe_name, probe_##probe_name, NULL)
 #endif
@@ -107,15 +106,19 @@ int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
  * Events
  ******************************************************************************/
 struct gator_interface {
-	void (*shutdown)(void);	// Complementary function to init
+	/* Complementary function to init */
+	void (*shutdown)(void);
 	int (*create_files)(struct super_block *sb, struct dentry *root);
 	int (*start)(void);
-	void (*stop)(void);		// Complementary function to start
+	/* Complementary function to start */
+	void (*stop)(void);
 	int (*online)(int **buffer, bool migrate);
 	int (*offline)(int **buffer, bool migrate);
-	void (*online_dispatch)(int cpu, bool migrate);	// called in process context but may not be running on core 'cpu'
-	void (*offline_dispatch)(int cpu, bool migrate);	// called in process context but may not be running on core 'cpu'
-	int (*read)(int **buffer);
+	/* called in process context but may not be running on core 'cpu' */
+	void (*online_dispatch)(int cpu, bool migrate);
+	/* called in process context but may not be running on core 'cpu' */
+	void (*offline_dispatch)(int cpu, bool migrate);
+	int (*read)(int **buffer, bool sched_switch);
 	int (*read64)(long long **buffer);
 	int (*read_proc)(long long **buffer, struct task_struct *);
 	struct list_head list;
@@ -146,4 +149,4 @@ int pcpu_to_lcpu(const int pcpu);
 #define get_logical_cpu() smp_processor_id()
 #define on_primary_core() (get_logical_cpu() == 0)
 
-#endif // GATOR_H_
+#endif /* GATOR_H_ */
diff --git a/drivers/gator/gator_annotate.c b/drivers/gator/gator_annotate.c
index 7e2c6e5d8715..ff9a3cef7b2e 100644
--- a/drivers/gator/gator_annotate.c
+++ b/drivers/gator/gator_annotate.c
@@ -11,12 +11,12 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/current.h>
 #include <linux/spinlock.h>
 
 static DEFINE_SPINLOCK(annotate_lock);
-static bool collect_annotations = false;
+static bool collect_annotations;
 
 static int annotate_copy(struct file *file, char const __user *buf, size_t count)
 {
@@ -24,10 +24,10 @@ static int annotate_copy(struct file *file, char const __user *buf, size_t count
 	int write = per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF];
 
 	if (file == NULL) {
-		// copy from kernel
+		/* copy from kernel */
 		memcpy(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count);
 	} else {
-		// copy from user space
+		/* copy from user space */
 		if (copy_from_user(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count) != 0)
 			return -1;
 	}
@@ -41,70 +41,70 @@ static ssize_t annotate_write(struct file *file, char const __user *buf, size_t
 	int pid, cpu, header_size, available, contiguous, length1, length2, size, count = count_orig & 0x7fffffff;
 	bool interrupt_context;
 
-	if (*offset) {
+	if (*offset)
 		return -EINVAL;
-	}
 
 	interrupt_context = in_interrupt();
-	// Annotations are not supported in interrupt context, but may work if you comment out the the next four lines of code.
-	//   By doing so, annotations in interrupt context can result in deadlocks and lost data.
+	/* Annotations are not supported in interrupt context, but may work
+	 * if you comment out the the next four lines of code. By doing so,
+	 * annotations in interrupt context can result in deadlocks and lost
+	 * data.
+	 */
 	if (interrupt_context) {
-		printk(KERN_WARNING "gator: Annotations are not supported in interrupt context. Edit gator_annotate.c in the gator driver to enable annotations in interrupt context.\n");
+		pr_warning("gator: Annotations are not supported in interrupt context. Edit gator_annotate.c in the gator driver to enable annotations in interrupt context.\n");
 		return -EINVAL;
 	}
 
  retry:
-	// synchronize between cores and with collect_annotations
+	/* synchronize between cores and with collect_annotations */
 	spin_lock(&annotate_lock);
 
 	if (!collect_annotations) {
-		// Not collecting annotations, tell the caller everything was written
+		/* Not collecting annotations, tell the caller everything was written */
 		size = count_orig;
 		goto annotate_write_out;
 	}
 
-	// Annotation only uses a single per-cpu buffer as the data must be in order to the engine
+	/* Annotation only uses a single per-cpu buffer as the data must be in order to the engine */
 	cpu = 0;
 
-	if (current == NULL) {
+	if (current == NULL)
 		pid = 0;
-	} else {
+	else
 		pid = current->pid;
-	}
 
-	// determine total size of the payload
+	/* determine total size of the payload */
 	header_size = MAXSIZE_PACK32 * 3 + MAXSIZE_PACK64;
 	available = buffer_bytes_available(cpu, ANNOTATE_BUF) - header_size;
 	size = count < available ? count : available;
 
 	if (size <= 0) {
-		// Buffer is full, wait until space is available
+		/* Buffer is full, wait until space is available */
 		spin_unlock(&annotate_lock);
 
-		// Drop the annotation as blocking is not allowed in interrupt context
-		if (interrupt_context) {
+		/* Drop the annotation as blocking is not allowed in interrupt context */
+		if (interrupt_context)
 			return -EINVAL;
-		}
 
 		wait_event_interruptible(gator_annotate_wait, buffer_bytes_available(cpu, ANNOTATE_BUF) > header_size || !collect_annotations);
 
-		// Check to see if a signal is pending
-		if (signal_pending(current)) {
+		/* Check to see if a signal is pending */
+		if (signal_pending(current))
 			return -EINTR;
-		}
 
 		goto retry;
 	}
 
-	// synchronize shared variables annotateBuf and annotatePos
+	/* synchronize shared variables annotateBuf and annotatePos */
 	if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF]) {
 		u64 time = gator_get_time();
+
 		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu());
 		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid);
 		gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, time);
 		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, size);
 
-		// determine the sizes to capture, length1 + length2 will equal size
+		/* determine the sizes to capture, length1 + length2 will equal size */
 		contiguous = contiguous_space_available(cpu, ANNOTATE_BUF);
 		if (size < contiguous) {
 			length1 = size;
@@ -124,14 +124,14 @@ static ssize_t annotate_write(struct file *file, char const __user *buf, size_t
 			goto annotate_write_out;
 		}
 
-		// Check and commit; commit is set to occur once buffer is 3/4 full
+		/* Check and commit; commit is set to occur once buffer is 3/4 full */
 		buffer_check(cpu, ANNOTATE_BUF, time);
 	}
 
 annotate_write_out:
 	spin_unlock(&annotate_lock);
 
-	// return the number of bytes written
+	/* return the number of bytes written */
 	return size;
 }
 
@@ -141,18 +141,21 @@ static int annotate_release(struct inode *inode, struct file *file)
 {
 	int cpu = 0;
 
-	// synchronize between cores
+	/* synchronize between cores */
 	spin_lock(&annotate_lock);
 
 	if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF] && buffer_check_space(cpu, ANNOTATE_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) {
 		uint32_t pid = current->pid;
+
 		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu());
 		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid);
-		gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, 0);	// time
-		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, 0);	// size
+		/* time */
+		gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, 0);
+		/* size */
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, 0);
 	}
 
-	// Check and commit; commit is set to occur once buffer is 3/4 full
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
 	buffer_check(cpu, ANNOTATE_BUF, gator_get_time());
 
 	spin_unlock(&annotate_lock);
@@ -178,7 +181,7 @@ static int gator_annotate_start(void)
 
 static void gator_annotate_stop(void)
 {
-	// the spinlock here will ensure that when this function exits, we are not in the middle of an annotation
+	/* the spinlock here will ensure that when this function exits, we are not in the middle of an annotation */
 	spin_lock(&annotate_lock);
 	collect_annotations = false;
 	wake_up(&gator_annotate_wait);
diff --git a/drivers/gator/gator_annotate_kernel.c b/drivers/gator/gator_annotate_kernel.c
index 010806825529..69471f99e5fb 100644
--- a/drivers/gator/gator_annotate_kernel.c
+++ b/drivers/gator/gator_annotate_kernel.c
@@ -19,10 +19,11 @@ static void kannotate_write(const char *ptr, unsigned int size)
 	int retval;
 	int pos = 0;
 	loff_t offset = 0;
+
 	while (pos < size) {
 		retval = annotate_write(NULL, &ptr[pos], size - pos, &offset);
 		if (retval < 0) {
-			printk(KERN_WARNING "gator: kannotate_write failed with return value %d\n", retval);
+			pr_warning("gator: kannotate_write failed with return value %d\n", retval);
 			return;
 		}
 		pos += retval;
@@ -47,6 +48,7 @@ void gator_annotate_channel(int channel, const char *str)
 {
 	const u16 str_size = strlen(str) & 0xffff;
 	char header[8];
+
 	header[0] = ESCAPE_CODE;
 	header[1] = STRING_ANNOTATION;
 	marshal_u32(header + 2, channel);
@@ -54,20 +56,19 @@ void gator_annotate_channel(int channel, const char *str)
 	kannotate_write(header, sizeof(header));
 	kannotate_write(str, str_size);
 }
-
 EXPORT_SYMBOL(gator_annotate_channel);
 
 void gator_annotate(const char *str)
 {
 	gator_annotate_channel(0, str);
 }
-
 EXPORT_SYMBOL(gator_annotate);
 
 void gator_annotate_channel_color(int channel, int color, const char *str)
 {
 	const u16 str_size = (strlen(str) + 4) & 0xffff;
 	char header[12];
+
 	header[0] = ESCAPE_CODE;
 	header[1] = STRING_ANNOTATION;
 	marshal_u32(header + 2, channel);
@@ -76,39 +77,37 @@ void gator_annotate_channel_color(int channel, int color, const char *str)
 	kannotate_write(header, sizeof(header));
 	kannotate_write(str, str_size - 4);
 }
-
 EXPORT_SYMBOL(gator_annotate_channel_color);
 
 void gator_annotate_color(int color, const char *str)
 {
 	gator_annotate_channel_color(0, color, str);
 }
-
 EXPORT_SYMBOL(gator_annotate_color);
 
 void gator_annotate_channel_end(int channel)
 {
 	char header[8];
+
 	header[0] = ESCAPE_CODE;
 	header[1] = STRING_ANNOTATION;
 	marshal_u32(header + 2, channel);
 	marshal_u16(header + 6, 0);
 	kannotate_write(header, sizeof(header));
 }
-
 EXPORT_SYMBOL(gator_annotate_channel_end);
 
 void gator_annotate_end(void)
 {
 	gator_annotate_channel_end(0);
 }
-
 EXPORT_SYMBOL(gator_annotate_end);
 
-void gator_annotate_name_channel(int channel, int group, const char* str)
+void gator_annotate_name_channel(int channel, int group, const char *str)
 {
 	const u16 str_size = strlen(str) & 0xffff;
 	char header[12];
+
 	header[0] = ESCAPE_CODE;
 	header[1] = NAME_CHANNEL_ANNOTATION;
 	marshal_u32(header + 2, channel);
@@ -117,13 +116,13 @@ void gator_annotate_name_channel(int channel, int group, const char* str)
 	kannotate_write(header, sizeof(header));
 	kannotate_write(str, str_size);
 }
-
 EXPORT_SYMBOL(gator_annotate_name_channel);
 
-void gator_annotate_name_group(int group, const char* str)
+void gator_annotate_name_group(int group, const char *str)
 {
 	const u16 str_size = strlen(str) & 0xffff;
 	char header[8];
+
 	header[0] = ESCAPE_CODE;
 	header[1] = NAME_GROUP_ANNOTATION;
 	marshal_u32(header + 2, group);
@@ -131,7 +130,6 @@ void gator_annotate_name_group(int group, const char* str)
 	kannotate_write(header, sizeof(header));
 	kannotate_write(str, str_size);
 }
-
 EXPORT_SYMBOL(gator_annotate_name_group);
 
 void gator_annotate_visual(const char *data, unsigned int length, const char *str)
@@ -139,6 +137,7 @@ void gator_annotate_visual(const char *data, unsigned int length, const char *st
 	const u16 str_size = strlen(str) & 0xffff;
 	char header[4];
 	char header_length[4];
+
 	header[0] = ESCAPE_CODE;
 	header[1] = VISUAL_ANNOTATION;
 	marshal_u16(header + 2, str_size);
@@ -148,49 +147,49 @@ void gator_annotate_visual(const char *data, unsigned int length, const char *st
 	kannotate_write(header_length, sizeof(header_length));
 	kannotate_write(data, length);
 }
-
 EXPORT_SYMBOL(gator_annotate_visual);
 
 void gator_annotate_marker(void)
 {
 	char header[4];
+
 	header[0] = ESCAPE_CODE;
 	header[1] = MARKER_ANNOTATION;
 	marshal_u16(header + 2, 0);
 	kannotate_write(header, sizeof(header));
 }
-
 EXPORT_SYMBOL(gator_annotate_marker);
 
 void gator_annotate_marker_str(const char *str)
 {
 	const u16 str_size = strlen(str) & 0xffff;
 	char header[4];
+
 	header[0] = ESCAPE_CODE;
 	header[1] = MARKER_ANNOTATION;
 	marshal_u16(header + 2, str_size);
 	kannotate_write(header, sizeof(header));
 	kannotate_write(str, str_size);
 }
-
 EXPORT_SYMBOL(gator_annotate_marker_str);
 
 void gator_annotate_marker_color(int color)
 {
 	char header[8];
+
 	header[0] = ESCAPE_CODE;
 	header[1] = MARKER_ANNOTATION;
 	marshal_u16(header + 2, 4);
 	marshal_u32(header + 4, color);
 	kannotate_write(header, sizeof(header));
 }
-
 EXPORT_SYMBOL(gator_annotate_marker_color);
 
 void gator_annotate_marker_color_str(int color, const char *str)
 {
 	const u16 str_size = (strlen(str) + 4) & 0xffff;
 	char header[8];
+
 	header[0] = ESCAPE_CODE;
 	header[1] = MARKER_ANNOTATION;
 	marshal_u16(header + 2, str_size);
@@ -198,5 +197,4 @@ void gator_annotate_marker_color_str(int color, const char *str)
 	kannotate_write(header, sizeof(header));
 	kannotate_write(str, str_size - 4);
 }
-
 EXPORT_SYMBOL(gator_annotate_marker_color_str);
diff --git a/drivers/gator/gator_backtrace.c b/drivers/gator/gator_backtrace.c
index e03c1653c5b5..76c941d009a9 100644
--- a/drivers/gator/gator_backtrace.c
+++ b/drivers/gator/gator_backtrace.c
@@ -14,17 +14,17 @@ struct stack_frame_eabi {
 	union {
 		struct {
 			unsigned long fp;
-			// May be the fp in the case of a leaf function or clang
+			/* May be the fp in the case of a leaf function or clang */
 			unsigned long lr;
-			// If lr is really the fp, lr2 is the corresponding lr
+			/* If lr is really the fp, lr2 is the corresponding lr */
 			unsigned long lr2;
 		};
-		// Used to read 32 bit fp/lr from a 64 bit kernel
+		/* Used to read 32 bit fp/lr from a 64 bit kernel */
 		struct {
 			u32 fp_32;
-			// same as lr above
+			/* same as lr above */
 			u32 lr_32;
-			// same as lr2 above
+			/* same as lr2 above */
 			u32 lr2_32;
 		};
 	};
@@ -35,9 +35,8 @@ static void gator_add_trace(int cpu, unsigned long address)
 	off_t offset = 0;
 	unsigned long cookie = get_address_cookie(cpu, current, address & ~1, &offset);
 
-	if (cookie == NO_COOKIE || cookie == UNRESOLVED_COOKIE) {
+	if (cookie == NO_COOKIE || cookie == UNRESOLVED_COOKIE)
 		offset = address;
-	}
 
 	marshal_backtrace(offset & ~1, cookie, 0);
 }
@@ -54,36 +53,34 @@ static void arm_backtrace_eabi(int cpu, struct pt_regs *const regs, unsigned int
 	unsigned long lr = regs->ARM_lr;
 	const int gcc_frame_offset = sizeof(unsigned long);
 #else
-	// Is userspace aarch32 (32 bit)
+	/* Is userspace aarch32 (32 bit) */
 	const bool is_compat = compat_user_mode(regs);
 	unsigned long fp = (is_compat ? regs->regs[11] : regs->regs[29]);
 	unsigned long sp = (is_compat ? regs->compat_sp : regs->sp);
 	unsigned long lr = (is_compat ? regs->compat_lr : regs->regs[30]);
 	const int gcc_frame_offset = (is_compat ? sizeof(u32) : 0);
 #endif
-	// clang frame offset is always zero
+	/* clang frame offset is always zero */
 	int is_user_mode = user_mode(regs);
 
-	// pc (current function) has already been added
+	/* pc (current function) has already been added */
 
-	if (!is_user_mode) {
+	if (!is_user_mode)
 		return;
-	}
 
-	// Add the lr (parent function)
-	// entry preamble may not have executed
+	/* Add the lr (parent function), entry preamble may not have
+	 * executed
+	 */
 	gator_add_trace(cpu, lr);
 
-	// check fp is valid
-	if (fp == 0 || fp < sp) {
+	/* check fp is valid */
+	if (fp == 0 || fp < sp)
 		return;
-	}
 
-	// Get the current stack frame
+	/* Get the current stack frame */
 	curr = (struct stack_frame_eabi *)(fp - gcc_frame_offset);
-	if ((unsigned long)curr & 3) {
+	if ((unsigned long)curr & 3)
 		return;
-	}
 
 	while (depth-- && curr) {
 		if (!access_ok(VERIFY_READ, curr, sizeof(struct stack_frame_eabi)) ||
@@ -95,13 +92,15 @@ static void arm_backtrace_eabi(int cpu, struct pt_regs *const regs, unsigned int
 		lr = (is_compat ? bufcurr.lr_32 : bufcurr.lr);
 
 #define calc_next(reg) ((reg) - gcc_frame_offset)
-		// Returns true if reg is a valid fp
+		/* Returns true if reg is a valid fp */
 #define validate_next(reg, curr) \
 		((reg) != 0 && (calc_next(reg) & 3) == 0 && (unsigned long)(curr) < calc_next(reg))
 
-		// Try lr from the stack as the fp because gcc leaf functions do not push lr
-		// If gcc_frame_offset is non-zero, the lr will also be the clang fp
-		// This assumes code is at a lower address than the stack
+		/* Try lr from the stack as the fp because gcc leaf functions do
+		 * not push lr. If gcc_frame_offset is non-zero, the lr will also
+		 * be the clang fp. This assumes code is at a lower address than
+		 * the stack
+		 */
 		if (validate_next(lr, curr)) {
 			fp = lr;
 			lr = (is_compat ? bufcurr.lr2_32 : bufcurr.lr2);
@@ -109,11 +108,10 @@ static void arm_backtrace_eabi(int cpu, struct pt_regs *const regs, unsigned int
 
 		gator_add_trace(cpu, lr);
 
-		if (!validate_next(fp, curr)) {
+		if (!validate_next(fp, curr))
 			return;
-		}
 
-		// Move to the next stack frame
+		/* Move to the next stack frame */
 		curr = (struct stack_frame_eabi *)calc_next(fp);
 	}
 #endif
@@ -129,6 +127,7 @@ static int report_trace(struct stackframe *frame, void *d)
 #if defined(MODULE)
 		unsigned int cpu = get_physical_cpu();
 		struct module *mod = __module_address(addr);
+
 		if (mod) {
 			cookie = get_cookie(cpu, current, mod->name, false);
 			addr = addr - (unsigned long)mod->module_core;
@@ -142,13 +141,13 @@ static int report_trace(struct stackframe *frame, void *d)
 }
 #endif
 
-// Uncomment the following line to enable kernel stack unwinding within gator, note it can also be defined from the Makefile
-// #define GATOR_KERNEL_STACK_UNWINDING
+/* Uncomment the following line to enable kernel stack unwinding within gator, note it can also be defined from the Makefile */
+/* #define GATOR_KERNEL_STACK_UNWINDING */
 
 #if (defined(__arm__) || defined(__aarch64__)) && !defined(GATOR_KERNEL_STACK_UNWINDING)
-// Disabled by default
+/* Disabled by default */
 MODULE_PARM_DESC(kernel_stack_unwinding, "Allow kernel stack unwinding.");
-static bool kernel_stack_unwinding = 0;
+static bool kernel_stack_unwinding;
 module_param(kernel_stack_unwinding, bool, 0644);
 #endif
 
@@ -161,6 +160,7 @@ static void kernel_backtrace(int cpu, struct pt_regs *const regs)
 	int depth = (kernel_stack_unwinding ? gator_backtrace_depth : 1);
 #endif
 	struct stackframe frame;
+
 	if (depth == 0)
 		depth = 1;
 #if defined(__arm__)
@@ -196,10 +196,10 @@ static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time)
 	if (in_kernel) {
 		kernel_backtrace(cpu, regs);
 	} else {
-		// Cookie+PC
+		/* Cookie+PC */
 		gator_add_trace(cpu, PC_REG);
 
-		// Backtrace
+		/* Backtrace */
 		if (gator_backtrace_depth)
 			arm_backtrace_eabi(cpu, regs, gator_backtrace_depth);
 	}
diff --git a/drivers/gator/gator_buffer.c b/drivers/gator/gator_buffer.c
index dfbc97d80221..910d5aa15066 100644
--- a/drivers/gator/gator_buffer.c
+++ b/drivers/gator/gator_buffer.c
@@ -10,55 +10,65 @@
 static void marshal_frame(int cpu, int buftype)
 {
 	int frame;
+	bool write_cpu;
 
-	if (!per_cpu(gator_buffer, cpu)[buftype]) {
+	if (!per_cpu(gator_buffer, cpu)[buftype])
 		return;
-	}
 
 	switch (buftype) {
 	case SUMMARY_BUF:
+		write_cpu = false;
 		frame = FRAME_SUMMARY;
 		break;
 	case BACKTRACE_BUF:
+		write_cpu = true;
 		frame = FRAME_BACKTRACE;
 		break;
 	case NAME_BUF:
+		write_cpu = true;
 		frame = FRAME_NAME;
 		break;
 	case COUNTER_BUF:
+		write_cpu = false;
 		frame = FRAME_COUNTER;
 		break;
 	case BLOCK_COUNTER_BUF:
+		write_cpu = true;
 		frame = FRAME_BLOCK_COUNTER;
 		break;
 	case ANNOTATE_BUF:
+		write_cpu = false;
 		frame = FRAME_ANNOTATE;
 		break;
 	case SCHED_TRACE_BUF:
+		write_cpu = true;
 		frame = FRAME_SCHED_TRACE;
 		break;
 	case IDLE_BUF:
+		write_cpu = false;
 		frame = FRAME_IDLE;
 		break;
 	case ACTIVITY_BUF:
+		write_cpu = false;
 		frame = FRAME_ACTIVITY;
 		break;
 	default:
+		write_cpu = false;
 		frame = -1;
 		break;
 	}
 
-	// add response type
-	if (gator_response_type > 0) {
+	/* add response type */
+	if (gator_response_type > 0)
 		gator_buffer_write_packed_int(cpu, buftype, gator_response_type);
-	}
 
-	// leave space for 4-byte unpacked length
+	/* leave space for 4-byte unpacked length */
 	per_cpu(gator_buffer_write, cpu)[buftype] = (per_cpu(gator_buffer_write, cpu)[buftype] + sizeof(s32)) & gator_buffer_mask[buftype];
 
-	// add frame type and core number
+	/* add frame type and core number */
 	gator_buffer_write_packed_int(cpu, buftype, frame);
-	gator_buffer_write_packed_int(cpu, buftype, cpu);
+	if (write_cpu)
+		gator_buffer_write_packed_int(cpu, buftype, cpu);
 }
 
 static int buffer_bytes_available(int cpu, int buftype)
@@ -66,19 +76,17 @@ static int buffer_bytes_available(int cpu, int buftype)
 	int remaining, filled;
 
 	filled = per_cpu(gator_buffer_write, cpu)[buftype] - per_cpu(gator_buffer_read, cpu)[buftype];
-	if (filled < 0) {
+	if (filled < 0)
 		filled += gator_buffer_size[buftype];
-	}
 
 	remaining = gator_buffer_size[buftype] - filled;
 
-	if (per_cpu(buffer_space_available, cpu)[buftype]) {
-		// Give some extra room; also allows space to insert the overflow error packet
+	if (per_cpu(buffer_space_available, cpu)[buftype])
+		/* Give some extra room; also allows space to insert the overflow error packet */
 		remaining -= 200;
-	} else {
-		// Hysteresis, prevents multiple overflow messages
+	else
+		/* Hysteresis, prevents multiple overflow messages */
 		remaining -= 2000;
-	}
 
 	return remaining;
 }
@@ -87,11 +95,10 @@ static bool buffer_check_space(int cpu, int buftype, int bytes)
 {
 	int remaining = buffer_bytes_available(cpu, buftype);
 
-	if (remaining < bytes) {
+	if (remaining < bytes)
 		per_cpu(buffer_space_available, cpu)[buftype] = false;
-	} else {
+	else
 		per_cpu(buffer_space_available, cpu)[buftype] = true;
-	}
 
 	return per_cpu(buffer_space_available, cpu)[buftype];
 }
@@ -100,10 +107,10 @@ static int contiguous_space_available(int cpu, int buftype)
 {
 	int remaining = buffer_bytes_available(cpu, buftype);
 	int contiguous = gator_buffer_size[buftype] - per_cpu(gator_buffer_write, cpu)[buftype];
+
 	if (remaining < contiguous)
 		return remaining;
-	else
-		return contiguous;
+	return contiguous;
 }
 
 static void gator_commit_buffer(int cpu, int buftype, u64 time)
@@ -114,41 +121,38 @@ static void gator_commit_buffer(int cpu, int buftype, u64 time)
 	if (!per_cpu(gator_buffer, cpu)[buftype])
 		return;
 
-	// post-populate the length, which does not include the response type length nor the length itself, i.e. only the length of the payload
+	/* post-populate the length, which does not include the response type length nor the length itself, i.e. only the length of the payload */
 	local_irq_save(flags);
 	type_length = gator_response_type ? 1 : 0;
 	commit = per_cpu(gator_buffer_commit, cpu)[buftype];
 	length = per_cpu(gator_buffer_write, cpu)[buftype] - commit;
-	if (length < 0) {
+	if (length < 0)
 		length += gator_buffer_size[buftype];
-	}
 	length = length - type_length - sizeof(s32);
 
 	if (length <= FRAME_HEADER_SIZE) {
-		// Nothing to write, only the frame header is present
+		/* Nothing to write, only the frame header is present */
 		local_irq_restore(flags);
 		return;
 	}
 
-	for (byte = 0; byte < sizeof(s32); byte++) {
+	for (byte = 0; byte < sizeof(s32); byte++)
 		per_cpu(gator_buffer, cpu)[buftype][(commit + type_length + byte) & gator_buffer_mask[buftype]] = (length >> byte * 8) & 0xFF;
-	}
 
 	per_cpu(gator_buffer_commit, cpu)[buftype] = per_cpu(gator_buffer_write, cpu)[buftype];
 
 	if (gator_live_rate > 0) {
-		while (time > per_cpu(gator_buffer_commit_time, cpu)) {
+		while (time > per_cpu(gator_buffer_commit_time, cpu))
 			per_cpu(gator_buffer_commit_time, cpu) += gator_live_rate;
-		}
 	}
 
 	marshal_frame(cpu, buftype);
 	local_irq_restore(flags);
 
-	// had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater
+	/* had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater */
 	if (per_cpu(in_scheduler_context, cpu)) {
 #ifndef CONFIG_PREEMPT_RT_FULL
-		// mod_timer can not be used in interrupt context in RT-Preempt full
+		/* mod_timer can not be used in interrupt context in RT-Preempt full */
 		mod_timer(&gator_buffer_wake_up_timer, jiffies + 1);
 #endif
 	} else {
@@ -159,10 +163,9 @@ static void gator_commit_buffer(int cpu, int buftype, u64 time)
 static void buffer_check(int cpu, int buftype, u64 time)
 {
 	int filled = per_cpu(gator_buffer_write, cpu)[buftype] - per_cpu(gator_buffer_commit, cpu)[buftype];
-	if (filled < 0) {
+
+	if (filled < 0)
 		filled += gator_buffer_size[buftype];
-	}
-	if (filled >= ((gator_buffer_size[buftype] * 3) / 4)) {
+	if (filled >= ((gator_buffer_size[buftype] * 3) / 4))
 		gator_commit_buffer(cpu, buftype, time);
-	}
 }
diff --git a/drivers/gator/gator_buffer_write.c b/drivers/gator/gator_buffer_write.c
index b621ba93ee5e..654ec606cfad 100644
--- a/drivers/gator/gator_buffer_write.c
+++ b/drivers/gator/gator_buffer_write.c
@@ -14,16 +14,17 @@ static void gator_buffer_write_packed_int(int cpu, int buftype, int x)
 	char *buffer = per_cpu(gator_buffer, cpu)[buftype];
 	int packedBytes = 0;
 	int more = true;
+
 	while (more) {
-		// low order 7 bits of x
+		/* low order 7 bits of x */
 		char b = x & 0x7f;
+
 		x >>= 7;
 
-		if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0)) {
+		if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0))
 			more = false;
-		} else {
+		else
 			b |= 0x80;
-		}
 
 		buffer[(write + packedBytes) & mask] = b;
 		packedBytes++;
@@ -39,16 +40,17 @@ static void gator_buffer_write_packed_int64(int cpu, int buftype, long long x)
 	char *buffer = per_cpu(gator_buffer, cpu)[buftype];
 	int packedBytes = 0;
 	int more = true;
+
 	while (more) {
-		// low order 7 bits of x
+		/* low order 7 bits of x */
 		char b = x & 0x7f;
+
 		x >>= 7;
 
-		if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0)) {
+		if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0))
 			more = false;
-		} else {
+		else
 			b |= 0x80;
-		}
 
 		buffer[(write + packedBytes) & mask] = b;
 		packedBytes++;
@@ -75,6 +77,7 @@ static void gator_buffer_write_bytes(int cpu, int buftype, const char *x, int le
 static void gator_buffer_write_string(int cpu, int buftype, const char *x)
 {
 	int len = strlen(x);
+
 	gator_buffer_write_packed_int(cpu, buftype, len);
 	gator_buffer_write_bytes(cpu, buftype, x, len);
 }
diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c
index 5c7d842070e0..c43cce815226 100644
--- a/drivers/gator/gator_cookies.c
+++ b/drivers/gator/gator_cookies.c
@@ -7,8 +7,10 @@
  *
  */
 
-#define COOKIEMAP_ENTRIES	1024	/* must be power of 2 */
-#define TRANSLATE_BUFFER_SIZE 512  // must be a power of 2 - 512/4 = 128 entries
+/* must be power of 2 */
+#define COOKIEMAP_ENTRIES	1024
+/* must be a power of 2 - 512/4 = 128 entries */
+#define TRANSLATE_BUFFER_SIZE 512
 #define TRANSLATE_TEXT_SIZE		256
 #define MAX_COLLISIONS		2
 
@@ -38,6 +40,7 @@ static uint32_t cookiemap_code(uint64_t value64)
 {
 	uint32_t value = (uint32_t)((value64 >> 32) + value64);
 	uint32_t cookiecode = (value >> 24) & 0xff;
+
 	cookiecode = cookiecode * 31 + ((value >> 16) & 0xff);
 	cookiecode = cookiecode * 31 + ((value >> 8) & 0xff);
 	cookiecode = cookiecode * 31 + ((value >> 0) & 0xff);
@@ -52,9 +55,8 @@ static uint32_t gator_chksum_crc32(const char *data)
 	int i, length = strlen(data);
 
 	crc = 0xFFFFFFFF;
-	for (i = 0; i < length; i++) {
+	for (i = 0; i < length; i++)
 		crc = ((crc >> 8) & 0x00FFFFFF) ^ gator_crc32_table[(crc ^ *block++) & 0xFF];
-	}
 
 	return (crc ^ 0xFFFFFFFF);
 }
@@ -72,11 +74,12 @@ static uint32_t cookiemap_exists(uint64_t key)
 	uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]);
 	uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]);
 
-	// Can be called from interrupt handler or from work queue
+	/* Can be called from interrupt handler or from work queue */
 	local_irq_save(flags);
 	for (x = 0; x < MAX_COLLISIONS; x++) {
 		if (keys[x] == key) {
 			uint32_t value = values[x];
+
 			for (; x > 0; x--) {
 				keys[x] = keys[x - 1];
 				values[x] = values[x - 1];
@@ -126,7 +129,7 @@ static void translate_buffer_write_args(int cpu, struct task_struct *task, const
 	write = per_cpu(translate_buffer_write, cpu);
 	next_write = (write + 1) & translate_buffer_mask;
 
-	// At least one entry must always remain available as when read == write, the queue is empty not full
+	/* At least one entry must always remain available as when read == write, the queue is empty not full */
 	if (next_write != per_cpu(translate_buffer_read, cpu)) {
 		args = &per_cpu(translate_buffer, cpu)[write];
 		args->task = task;
@@ -178,11 +181,11 @@ static void wq_cookie_handler(struct work_struct *unused)
 
 static void app_process_wake_up_handler(unsigned long unused_data)
 {
-	// had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater
+	/* had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater */
 	schedule_work(&cookie_work);
 }
 
-// Retrieve full name from proc/pid/cmdline for java processes on Android
+/* Retrieve full name from proc/pid/cmdline for java processes on Android */
 static int translate_app_process(const char **text, int cpu, struct task_struct *task, bool from_wq)
 {
 	void *maddr;
@@ -195,12 +198,16 @@ static int translate_app_process(const char **text, int cpu, struct task_struct
 	char *buf = per_cpu(translate_text, cpu);
 
 #ifndef CONFIG_PREEMPT_RT_FULL
-	// Push work into a work queue if in atomic context as the kernel functions below might sleep
-	// Rely on the in_interrupt variable rather than in_irq() or in_interrupt() kernel functions, as the value of these functions seems
-	//   inconsistent during a context switch between android/linux versions
+	/* Push work into a work queue if in atomic context as the kernel
+	 * functions below might sleep. Rely on the in_interrupt variable
+	 * rather than in_irq() or in_interrupt() kernel functions, as the
+	 * value of these functions seems inconsistent during a context
+	 * switch between android/linux versions
+	 */
 	if (!from_wq) {
-		// Check if already in buffer
+		/* Check if already in buffer */
 		int pos = per_cpu(translate_buffer_read, cpu);
+
 		while (pos != per_cpu(translate_buffer_write, cpu)) {
 			if (per_cpu(translate_buffer, cpu)[pos].task == task)
 				goto out;
@@ -209,7 +216,7 @@ static int translate_app_process(const char **text, int cpu, struct task_struct
 
 		translate_buffer_write_args(cpu, task, *text);
 
-		// Not safe to call in RT-Preempt full in schedule switch context
+		/* Not safe to call in RT-Preempt full in schedule switch context */
 		mod_timer(&app_process_wake_up_timer, jiffies + 1);
 		goto out;
 	}
@@ -239,7 +246,8 @@ static int translate_app_process(const char **text, int cpu, struct task_struct
 
 		copy_from_user_page(page_vma, page, addr, buf, maddr + offset, bytes);
 
-		kunmap(page);	// release page allocated by get_user_pages()
+		/* release page allocated by get_user_pages() */
+		kunmap(page);
 		page_cache_release(page);
 
 		len -= bytes;
@@ -250,7 +258,7 @@ static int translate_app_process(const char **text, int cpu, struct task_struct
 		retval = 1;
 	}
 
-	// On app_process startup, /proc/pid/cmdline is initially "zygote" then "<pre-initialized>" but changes after an initial startup period
+	/* On app_process startup, /proc/pid/cmdline is initially "zygote" then "<pre-initialized>" but changes after an initial startup period */
 	if (strcmp(*text, "zygote") == 0 || strcmp(*text, "<pre-initialized>") == 0)
 		retval = 0;
 
@@ -262,6 +270,8 @@ static int translate_app_process(const char **text, int cpu, struct task_struct
 	return retval;
 }
 
+static const char APP_PROCESS[] = "app_process";
+
 static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq)
 {
 	unsigned long flags, cookie;
@@ -271,16 +281,16 @@ static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text,
 	key = (key << 32) | (uint32_t)task->tgid;
 
 	cookie = cookiemap_exists(key);
-	if (cookie) {
+	if (cookie)
 		return cookie;
-	}
 
-	if (strcmp(text, "app_process") == 0) {
+	/* On 64-bit android app_process can be app_process32 or app_process64 */
+	if (strncmp(text, APP_PROCESS, sizeof(APP_PROCESS) - 1) == 0) {
 		if (!translate_app_process(&text, cpu, task, from_wq))
 			return UNRESOLVED_COOKIE;
 	}
 
-	// Can be called from interrupt handler or from work queue or from scheduler trace
+	/* Can be called from interrupt handler or from work queue or from scheduler trace */
 	local_irq_save(flags);
 
 	cookie = UNRESOLVED_COOKIE;
@@ -300,7 +310,7 @@ static int get_exec_cookie(int cpu, struct task_struct *task)
 	struct mm_struct *mm = task->mm;
 	const char *text;
 
-	// kernel threads have no address space
+	/* kernel threads have no address space */
 	if (!mm)
 		return NO_COOKIE;
 
@@ -355,7 +365,7 @@ static int cookies_initialize(void)
 		per_cpu(cookie_next_key, cpu) = nr_cpu_ids + cpu;
 
 		size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint64_t);
-		per_cpu(cookie_keys, cpu) = (uint64_t *)kmalloc(size, GFP_KERNEL);
+		per_cpu(cookie_keys, cpu) = kmalloc(size, GFP_KERNEL);
 		if (!per_cpu(cookie_keys, cpu)) {
 			err = -ENOMEM;
 			goto cookie_setup_error;
@@ -363,14 +373,14 @@ static int cookies_initialize(void)
 		memset(per_cpu(cookie_keys, cpu), 0, size);
 
 		size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint32_t);
-		per_cpu(cookie_values, cpu) = (uint32_t *)kmalloc(size, GFP_KERNEL);
+		per_cpu(cookie_values, cpu) = kmalloc(size, GFP_KERNEL);
 		if (!per_cpu(cookie_values, cpu)) {
 			err = -ENOMEM;
 			goto cookie_setup_error;
 		}
 		memset(per_cpu(cookie_values, cpu), 0, size);
 
-		per_cpu(translate_buffer, cpu) = (struct cookie_args *)kmalloc(TRANSLATE_BUFFER_SIZE, GFP_KERNEL);
+		per_cpu(translate_buffer, cpu) = kmalloc(TRANSLATE_BUFFER_SIZE, GFP_KERNEL);
 		if (!per_cpu(translate_buffer, cpu)) {
 			err = -ENOMEM;
 			goto cookie_setup_error;
@@ -379,16 +389,16 @@ static int cookies_initialize(void)
 		per_cpu(translate_buffer_write, cpu) = 0;
 		per_cpu(translate_buffer_read, cpu) = 0;
 
-		per_cpu(translate_text, cpu) = (char *)kmalloc(TRANSLATE_TEXT_SIZE, GFP_KERNEL);
+		per_cpu(translate_text, cpu) = kmalloc(TRANSLATE_TEXT_SIZE, GFP_KERNEL);
 		if (!per_cpu(translate_text, cpu)) {
 			err = -ENOMEM;
 			goto cookie_setup_error;
 		}
 	}
 
-	// build CRC32 table
+	/* build CRC32 table */
 	poly = 0x04c11db7;
-	gator_crc32_table = (uint32_t *)kmalloc(256 * sizeof(uint32_t), GFP_KERNEL);
+	gator_crc32_table = kmalloc(256 * sizeof(*gator_crc32_table), GFP_KERNEL);
 	if (!gator_crc32_table) {
 		err = -ENOMEM;
 		goto cookie_setup_error;
@@ -396,11 +406,10 @@ static int cookies_initialize(void)
 	for (i = 0; i < 256; i++) {
 		crc = i;
 		for (j = 8; j > 0; j--) {
-			if (crc & 1) {
+			if (crc & 1)
 				crc = (crc >> 1) ^ poly;
-			} else {
+			else
 				crc >>= 1;
-			}
 		}
 		gator_crc32_table[i] = crc;
 	}
diff --git a/drivers/gator/gator_events_armv6.c b/drivers/gator/gator_events_armv6.c
index 353645622306..a157a0013302 100644
--- a/drivers/gator/gator_events_armv6.c
+++ b/drivers/gator/gator_events_armv6.c
@@ -8,7 +8,7 @@
 
 #include "gator.h"
 
-// gator_events_perf_pmu.c is used if perf is supported
+/* gator_events_perf_pmu.c is used if perf is supported */
 #if GATOR_NO_PERF_SUPPORT
 
 static const char *pmnc_name;
@@ -28,7 +28,7 @@ static const char *pmnc_name;
 #define CCNT 2
 #define CNTMAX	(CCNT+1)
 
-static int pmnc_counters = 0;
+static int pmnc_counters;
 static unsigned long pmnc_enabled[CNTMAX];
 static unsigned long pmnc_event[CNTMAX];
 static unsigned long pmnc_key[CNTMAX];
@@ -45,6 +45,7 @@ static inline void armv6_pmnc_write(u32 val)
 static inline u32 armv6_pmnc_read(void)
 {
 	u32 val;
+
 	asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r" (val));
 	return val;
 }
@@ -52,6 +53,7 @@ static inline u32 armv6_pmnc_read(void)
 static void armv6_pmnc_reset_counter(unsigned int cnt)
 {
 	u32 val = 0;
+
 	switch (cnt) {
 	case CCNT:
 		asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r" (val));
@@ -74,20 +76,18 @@ int gator_events_armv6_create_files(struct super_block *sb, struct dentry *root)
 
 	for (i = PMN0; i <= CCNT; i++) {
 		char buf[40];
-		if (i == CCNT) {
-			snprintf(buf, sizeof buf, "ARM_%s_ccnt", pmnc_name);
-		} else {
-			snprintf(buf, sizeof buf, "ARM_%s_cnt%d", pmnc_name, i);
-		}
+
+		if (i == CCNT)
+			snprintf(buf, sizeof(buf), "ARM_%s_ccnt", pmnc_name);
+		else
+			snprintf(buf, sizeof(buf), "ARM_%s_cnt%d", pmnc_name, i);
 		dir = gatorfs_mkdir(sb, root, buf);
-		if (!dir) {
+		if (!dir)
 			return -1;
-		}
 		gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
 		gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
-		if (i != CCNT) {
+		if (i != CCNT)
 			gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
-		}
 	}
 
 	return 0;
@@ -98,9 +98,8 @@ static int gator_events_armv6_online(int **buffer, bool migrate)
 	unsigned int cnt, len = 0, cpu = smp_processor_id();
 	u32 pmnc;
 
-	if (armv6_pmnc_read() & PMCR_E) {
+	if (armv6_pmnc_read() & PMCR_E)
 		armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E);
-	}
 
 	/* initialize PMNC, reset overflow, D bit, C bit and P bit. */
 	armv6_pmnc_write(PMCR_OFL_PMN0 | PMCR_OFL_PMN1 | PMCR_OFL_CCNT |
@@ -115,19 +114,18 @@ static int gator_events_armv6_online(int **buffer, bool migrate)
 
 		event = pmnc_event[cnt] & 255;
 
-		// Set event (if destined for PMNx counters)
-		if (cnt == PMN0) {
+		/* Set event (if destined for PMNx counters) */
+		if (cnt == PMN0)
 			pmnc |= event << 20;
-		} else if (cnt == PMN1) {
+		else if (cnt == PMN1)
 			pmnc |= event << 12;
-		}
 
-		// Reset counter
+		/* Reset counter */
 		armv6_pmnc_reset_counter(cnt);
 	}
 	armv6_pmnc_write(pmnc | PMCR_E);
 
-	// return zero values, no need to read as the counters were just reset
+	/* return zero values, no need to read as the counters were just reset */
 	for (cnt = PMN0; cnt <= CCNT; cnt++) {
 		if (pmnc_enabled[cnt]) {
 			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
@@ -146,9 +144,8 @@ static int gator_events_armv6_offline(int **buffer, bool migrate)
 	unsigned int cnt;
 
 	armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E);
-	for (cnt = PMN0; cnt <= CCNT; cnt++) {
+	for (cnt = PMN0; cnt <= CCNT; cnt++)
 		armv6_pmnc_reset_counter(cnt);
-	}
 
 	return 0;
 }
@@ -163,19 +160,19 @@ static void gator_events_armv6_stop(void)
 	}
 }
 
-static int gator_events_armv6_read(int **buffer)
+static int gator_events_armv6_read(int **buffer, bool sched_switch)
 {
 	int cnt, len = 0;
 	int cpu = smp_processor_id();
 
-	// a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled
-	if (!(armv6_pmnc_read() & PMCR_E)) {
+	/* a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled */
+	if (!(armv6_pmnc_read() & PMCR_E))
 		return 0;
-	}
 
 	for (cnt = PMN0; cnt <= CCNT; cnt++) {
 		if (pmnc_enabled[cnt]) {
 			u32 value = 0;
+
 			switch (cnt) {
 			case CCNT:
 				asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r" (value));
diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c
index bd8a9ba24e99..09c94220114c 100644
--- a/drivers/gator/gator_events_armv7.c
+++ b/drivers/gator/gator_events_armv7.c
@@ -15,16 +15,16 @@
 
 #include "gator.h"
 
-// gator_events_perf_pmu.c is used if perf is supported
+/* gator_events_perf_pmu.c is used if perf is supported */
 #if GATOR_NO_PERF_SUPPORT
 
-// Per-CPU PMNC: config reg
+/* Per-CPU PMNC: config reg */
 #define PMNC_E		(1 << 0)	/* Enable all counters */
 #define PMNC_P		(1 << 1)	/* Reset all counters */
 #define PMNC_C		(1 << 2)	/* Cycle counter reset */
 #define	PMNC_MASK	0x3f	/* Mask for writable bits */
 
-// ccnt reg
+/* ccnt reg */
 #define CCNT_REG	(1 << 31)
 
 #define CCNT		0
@@ -49,6 +49,7 @@ inline void armv7_pmnc_write(u32 val)
 inline u32 armv7_pmnc_read(void)
 {
 	u32 val;
+
 	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
 	return val;
 }
@@ -61,10 +62,10 @@ inline u32 armv7_ccnt_read(u32 reset_value)
 	u32 val;
 
 	local_irq_save(flags);
-	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den));	// disable
-	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));	// read
-	asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (newval));	// new value
-	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den));	// enable
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den));	/* disable */
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));	/* read */
+	asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (newval));	/* new value */
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den));	/* enable */
 	local_irq_restore(flags);
 
 	return val;
@@ -79,11 +80,11 @@ inline u32 armv7_cntn_read(unsigned int cnt, u32 reset_value)
 	u32 oldval;
 
 	local_irq_save(flags);
-	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den));	// disable
-	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (sel));	// select
-	asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (oldval));	// read
-	asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (newval));	// new value
-	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den));	// enable
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den));	/* disable */
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (sel));	/* select */
+	asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (oldval));	/* read */
+	asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (newval));	/* new value */
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den));	/* enable */
 	local_irq_restore(flags);
 
 	return oldval;
@@ -92,13 +93,15 @@ inline u32 armv7_cntn_read(unsigned int cnt, u32 reset_value)
 static inline void armv7_pmnc_disable_interrupt(unsigned int cnt)
 {
 	u32 val = cnt ? (1 << (cnt - CNT0)) : (1 << 31);
+
 	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
 }
 
 inline u32 armv7_pmnc_reset_interrupt(void)
 {
-	// Get and reset overflow status flags
+	/* Get and reset overflow status flags */
 	u32 flags;
+
 	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (flags));
 	flags &= 0x8000003f;
 	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (flags));
@@ -108,6 +111,7 @@ inline u32 armv7_pmnc_reset_interrupt(void)
 static inline u32 armv7_pmnc_enable_counter(unsigned int cnt)
 {
 	u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG;
+
 	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
 	return cnt;
 }
@@ -115,6 +119,7 @@ static inline u32 armv7_pmnc_enable_counter(unsigned int cnt)
 static inline u32 armv7_pmnc_disable_counter(unsigned int cnt)
 {
 	u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG;
+
 	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
 	return cnt;
 }
@@ -122,15 +127,15 @@ static inline u32 armv7_pmnc_disable_counter(unsigned int cnt)
 static inline int armv7_pmnc_select_counter(unsigned int cnt)
 {
 	u32 val = (cnt - CNT0);
+
 	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
 	return cnt;
 }
 
 static inline void armv7_pmnc_write_evtsel(unsigned int cnt, u32 val)
 {
-	if (armv7_pmnc_select_counter(cnt) == cnt) {
+	if (armv7_pmnc_select_counter(cnt) == cnt)
 		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
-	}
 }
 
 static int gator_events_armv7_create_files(struct super_block *sb, struct dentry *root)
@@ -140,20 +145,18 @@ static int gator_events_armv7_create_files(struct super_block *sb, struct dentry
 
 	for (i = 0; i < pmnc_counters; i++) {
 		char buf[40];
-		if (i == 0) {
-			snprintf(buf, sizeof buf, "%s_ccnt", pmnc_name);
-		} else {
-			snprintf(buf, sizeof buf, "%s_cnt%d", pmnc_name, i - 1);
-		}
+
+		if (i == 0)
+			snprintf(buf, sizeof(buf), "%s_ccnt", pmnc_name);
+		else
+			snprintf(buf, sizeof(buf), "%s_cnt%d", pmnc_name, i - 1);
 		dir = gatorfs_mkdir(sb, root, buf);
-		if (!dir) {
+		if (!dir)
 			return -1;
-		}
 		gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
 		gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
-		if (i > 0) {
+		if (i > 0)
 			gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
-		}
 	}
 
 	return 0;
@@ -163,14 +166,13 @@ static int gator_events_armv7_online(int **buffer, bool migrate)
 {
 	unsigned int cnt, len = 0, cpu = smp_processor_id();
 
-	if (armv7_pmnc_read() & PMNC_E) {
+	if (armv7_pmnc_read() & PMNC_E)
 		armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E);
-	}
 
-	// Initialize & Reset PMNC: C bit and P bit
+	/* Initialize & Reset PMNC: C bit and P bit */
 	armv7_pmnc_write(PMNC_P | PMNC_C);
 
-	// Reset overflow flags
+	/* Reset overflow flags */
 	armv7_pmnc_reset_interrupt();
 
 	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
@@ -179,28 +181,28 @@ static int gator_events_armv7_online(int **buffer, bool migrate)
 		if (!pmnc_enabled[cnt])
 			continue;
 
-		// Disable counter
+		/* Disable counter */
 		armv7_pmnc_disable_counter(cnt);
 
 		event = pmnc_event[cnt] & 255;
 
-		// Set event (if destined for PMNx counters), we don't need to set the event if it's a cycle count
+		/* Set event (if destined for PMNx counters), we don't need to set the event if it's a cycle count */
 		if (cnt != CCNT)
 			armv7_pmnc_write_evtsel(cnt, event);
 
 		armv7_pmnc_disable_interrupt(cnt);
 
-		// Reset counter
+		/* Reset counter */
 		cnt ? armv7_cntn_read(cnt, 0) : armv7_ccnt_read(0);
 
-		// Enable counter
+		/* Enable counter */
 		armv7_pmnc_enable_counter(cnt);
 	}
 
-	// enable
+	/* enable */
 	armv7_pmnc_write(armv7_pmnc_read() | PMNC_E);
 
-	// return zero values, no need to read as the counters were just reset
+	/* return zero values, no need to read as the counters were just reset */
 	for (cnt = 0; cnt < pmnc_counters; cnt++) {
 		if (pmnc_enabled[cnt]) {
 			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
@@ -216,7 +218,7 @@ static int gator_events_armv7_online(int **buffer, bool migrate)
 
 static int gator_events_armv7_offline(int **buffer, bool migrate)
 {
-	// disable all counters, including PMCCNTR; overflow IRQs will not be signaled
+	/* disable all counters, including PMCCNTR; overflow IRQs will not be signaled */
 	armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E);
 
 	return 0;
@@ -232,24 +234,23 @@ static void gator_events_armv7_stop(void)
 	}
 }
 
-static int gator_events_armv7_read(int **buffer)
+static int gator_events_armv7_read(int **buffer, bool sched_switch)
 {
 	int cnt, len = 0;
 	int cpu = smp_processor_id();
 
-	// a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled
-	if (!(armv7_pmnc_read() & PMNC_E)) {
+	/* a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled */
+	if (!(armv7_pmnc_read() & PMNC_E))
 		return 0;
-	}
 
 	for (cnt = 0; cnt < pmnc_counters; cnt++) {
 		if (pmnc_enabled[cnt]) {
 			int value;
-			if (cnt == CCNT) {
+
+			if (cnt == CCNT)
 				value = armv7_ccnt_read(0);
-			} else {
+			else
 				value = armv7_cntn_read(cnt, 0);
-			}
 			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
 			per_cpu(perfCnt, cpu)[len++] = value;
 		}
@@ -290,17 +291,16 @@ int gator_events_armv7_init(void)
 		pmnc_name = "ARMv7_Cortex_A9";
 		pmnc_counters = 6;
 		break;
-	// ARM Cortex A12 is not supported by version of Linux before 3.0
 	case CORTEX_A15:
 		pmnc_name = "ARMv7_Cortex_A15";
 		pmnc_counters = 6;
 		break;
-	// ARM Cortex A17 is not supported by version of Linux before 3.0
+	/* ARM Cortex A17 is not supported by version of Linux before 3.0 */
 	default:
 		return -1;
 	}
 
-	pmnc_counters++;	// CNT[n] + CCNT
+	pmnc_counters++;	/* CNT[n] + CCNT */
 
 	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
 		pmnc_enabled[cnt] = 0;
diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c
index 03eed4fb9ebb..a352a54afa02 100644
--- a/drivers/gator/gator_events_block.c
+++ b/drivers/gator/gator_events_block.c
@@ -28,7 +28,7 @@ static ulong block_rq_rd_key;
 static atomic_t blockCnt[BLOCK_TOTAL];
 static int blockGet[BLOCK_TOTAL * 4];
 
-// Tracepoint changed in 3.15 backported to older kernels. The Makefile tries to autodetect the correct value, but if it fails change the #if below
+/* Tracepoint changed in 3.15 backported to older kernels. The Makefile tries to autodetect the correct value, but if it fails change the #if below */
 #if OLD_BLOCK_RQ_COMPLETE
 GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq))
 #else
@@ -52,13 +52,11 @@ GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct r
 		return;
 
 	if (write) {
-		if (block_rq_wr_enabled) {
+		if (block_rq_wr_enabled)
 			atomic_add(size, &blockCnt[BLOCK_RQ_WR]);
-		}
 	} else {
-		if (block_rq_rd_enabled) {
+		if (block_rq_rd_enabled)
 			atomic_add(size, &blockCnt[BLOCK_RQ_RD]);
-		}
 	}
 }
 
@@ -68,17 +66,15 @@ static int gator_events_block_create_files(struct super_block *sb, struct dentry
 
 	/* block_complete_wr */
 	dir = gatorfs_mkdir(sb, root, "Linux_block_rq_wr");
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 	gatorfs_create_ulong(sb, dir, "enabled", &block_rq_wr_enabled);
 	gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_wr_key);
 
 	/* block_complete_rd */
 	dir = gatorfs_mkdir(sb, root, "Linux_block_rq_rd");
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 	gatorfs_create_ulong(sb, dir, "enabled", &block_rq_rd_enabled);
 	gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_rd_key);
 
@@ -87,7 +83,7 @@ static int gator_events_block_create_files(struct super_block *sb, struct dentry
 
 static int gator_events_block_start(void)
 {
-	// register tracepoints
+	/* register tracepoints */
 	if (block_rq_wr_enabled || block_rq_rd_enabled)
 		if (GATOR_REGISTER_TRACE(block_rq_complete))
 			goto fail_block_rq_exit;
@@ -95,7 +91,7 @@ static int gator_events_block_start(void)
 
 	return 0;
 
-	// unregister tracepoints on error
+	/* unregister tracepoints on error */
 fail_block_rq_exit:
 	pr_err("gator: block event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
 
@@ -112,19 +108,19 @@ static void gator_events_block_stop(void)
 	block_rq_rd_enabled = 0;
 }
 
-static int gator_events_block_read(int **buffer)
+static int gator_events_block_read(int **buffer, bool sched_switch)
 {
 	int len, value, data = 0;
 
-	if (!on_primary_core()) {
+	if (!on_primary_core())
 		return 0;
-	}
 
 	len = 0;
 	if (block_rq_wr_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_WR])) > 0) {
 		atomic_sub(value, &blockCnt[BLOCK_RQ_WR]);
 		blockGet[len++] = block_rq_wr_key;
-		blockGet[len++] = 0;	// indicates to Streamline that value bytes were written now, not since the last message
+		/* Indicates to Streamline that value bytes were written now, not since the last message */
+		blockGet[len++] = 0;
 		blockGet[len++] = block_rq_wr_key;
 		blockGet[len++] = value;
 		data += value;
@@ -132,7 +128,8 @@ static int gator_events_block_read(int **buffer)
 	if (block_rq_rd_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_RD])) > 0) {
 		atomic_sub(value, &blockCnt[BLOCK_RQ_RD]);
 		blockGet[len++] = block_rq_rd_key;
-		blockGet[len++] = 0;	// indicates to Streamline that value bytes were read now, not since the last message
+		/* Indicates to Streamline that value bytes were read now, not since the last message */
+		blockGet[len++] = 0;
 		blockGet[len++] = block_rq_rd_key;
 		blockGet[len++] = value;
 		data += value;
diff --git a/drivers/gator/gator_events_ccn-504.c b/drivers/gator/gator_events_ccn-504.c
deleted file mode 100644
index 024ffc2856aa..000000000000
--- a/drivers/gator/gator_events_ccn-504.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/**
- * Copyright (C) ARM Limited 2013-2014. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/io.h>
-#include <linux/module.h>
-
-#include "gator.h"
-
-#define NUM_REGIONS 256
-#define REGION_SIZE (64*1024)
-#define REGION_DEBUG 1
-#define REGION_XP 64
-#define NUM_XPS 11
-
-// DT (Debug) region
-#define PMEVCNTSR0    0x0150
-#define PMCCNTRSR     0x0190
-#define PMCR          0x01A8
-#define PMSR          0x01B0
-#define PMSR_REQ      0x01B8
-#define PMSR_CLR      0x01C0
-
-// XP region
-#define DT_CONFIG     0x0300
-#define DT_CONTROL    0x0370
-
-// Multiple
-#define PMU_EVENT_SEL 0x0600
-#define OLY_ID        0xFF00
-
-#define CCNT 4
-#define CNTMAX (CCNT + 1)
-
-#define get_pmu_event_id(event) (((event) >> 0) & 0xFF)
-#define get_node_type(event) (((event) >> 8) & 0xFF)
-#define get_region(event) (((event) >> 16) & 0xFF)
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-
-// From kernel/params.c
-#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn)      	\
-	int param_set_##name(const char *val, struct kernel_param *kp)	\
-	{								\
-		tmptype l;						\
-		int ret;						\
-									\
-		if (!val) return -EINVAL;				\
-		ret = strtolfn(val, 0, &l);				\
-		if (ret == -EINVAL || ((type)l != l))			\
-			return -EINVAL;					\
-		*((type *)kp->arg) = l;					\
-		return 0;						\
-	}								\
-	int param_get_##name(char *buffer, struct kernel_param *kp)	\
-	{								\
-		return sprintf(buffer, format, *((type *)kp->arg));	\
-	}
-
-#else
-
-// From kernel/params.c
-#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn)      	\
-	int param_set_##name(const char *val, const struct kernel_param *kp) \
-	{								\
-		tmptype l;						\
-		int ret;						\
-									\
-		ret = strtolfn(val, 0, &l);				\
-		if (ret < 0 || ((type)l != l))				\
-			return ret < 0 ? ret : -EINVAL;			\
-		*((type *)kp->arg) = l;					\
-		return 0;						\
-	}								\
-	int param_get_##name(char *buffer, const struct kernel_param *kp) \
-	{								\
-		return scnprintf(buffer, PAGE_SIZE, format,		\
-				*((type *)kp->arg));			\
-	}								\
-	struct kernel_param_ops param_ops_##name = {			\
-		.set = param_set_##name,				\
-		.get = param_get_##name,				\
-	};								\
-	EXPORT_SYMBOL(param_set_##name);				\
-	EXPORT_SYMBOL(param_get_##name);				\
-	EXPORT_SYMBOL(param_ops_##name)
-
-#endif
-
-STANDARD_PARAM_DEF(u64, u64, "%llu", u64, strict_strtoull);
-
-// From include/linux/moduleparam.h
-#define param_check_u64(name, p) __param_check(name, p, u64)
-
-MODULE_PARM_DESC(ccn504_addr, "CCN-504 physical base address");
-static u64 ccn504_addr = 0;
-module_param(ccn504_addr, u64, 0444);
-
-static void __iomem *gator_events_ccn504_base;
-static bool gator_events_ccn504_global_enabled;
-static unsigned long gator_events_ccn504_enabled[CNTMAX];
-static unsigned long gator_events_ccn504_event[CNTMAX];
-static unsigned long gator_events_ccn504_key[CNTMAX];
-static int gator_events_ccn504_buffer[2*CNTMAX];
-static int gator_events_ccn504_prev[CNTMAX];
-
-static void gator_events_ccn504_create_shutdown(void)
-{
-	if (gator_events_ccn504_base != NULL) {
-		iounmap(gator_events_ccn504_base);
-	}
-}
-
-static int gator_events_ccn504_create_files(struct super_block *sb, struct dentry *root)
-{
-	struct dentry *dir;
-	int i;
-	char buf[32];
-
-	for (i = 0; i < CNTMAX; ++i) {
-		if (i == CCNT) {
-			snprintf(buf, sizeof(buf), "CCN-504_ccnt");
-		} else {
-			snprintf(buf, sizeof(buf), "CCN-504_cnt%i", i);
-		}
-		dir = gatorfs_mkdir(sb, root, buf);
-		if (!dir) {
-			return -1;
-		}
-
-		gatorfs_create_ulong(sb, dir, "enabled", &gator_events_ccn504_enabled[i]);
-		if (i != CCNT) {
-			gatorfs_create_ulong(sb, dir, "event", &gator_events_ccn504_event[i]);
-		}
-		gatorfs_create_ro_ulong(sb, dir, "key", &gator_events_ccn504_key[i]);
-	}
-
-	return 0;
-}
-
-static void gator_events_ccn504_set_dt_config(int xp_node_id, int event_num, int value)
-{
-	u32 dt_config;
-
-	dt_config = readl(gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
-	dt_config |= (value + event_num) << (4*event_num);
-	writel(dt_config, gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
-}
-
-static int gator_events_ccn504_start(void)
-{
-	int i;
-
-	gator_events_ccn504_global_enabled = 0;
-	for (i = 0; i < CNTMAX; ++i) {
-		if (gator_events_ccn504_enabled[i]) {
-			gator_events_ccn504_global_enabled = 1;
-			break;
-		}
-	}
-
-	if (!gator_events_ccn504_global_enabled) {
-		return 0;
-	}
-
-	memset(&gator_events_ccn504_prev, 0x80, sizeof(gator_events_ccn504_prev));
-
-	// Disable INTREQ on overflow
-	// [6] ovfl_intr_en = 0
-	// perhaps set to 1?
-	// [5] cntr_rst = 0
-	// No register paring
-	// [4:1] cntcfg = 0
-	// Enable PMU features
-	// [0] pmu_en = 1
-	writel(0x1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMCR);
-
-	// Configure the XPs
-	for (i = 0; i < NUM_XPS; ++i) {
-		int dt_control;
-
-		// Pass on all events
-		writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG);
-
-		// Enable PMU capability
-		// [0] dt_enable = 1
-		dt_control = readl(gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONTROL);
-		dt_control |= 0x1;
-		writel(dt_control, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONTROL);
-	}
-
-	// Assume no other pmu_event_sel registers are set
-
-	// cycle counter does not need to be enabled
-	for (i = 0; i < CCNT; ++i) {
-		int pmu_event_id;
-		int node_type;
-		int region;
-		u32 pmu_event_sel;
-		u32 oly_id_whole;
-		u32 oly_id;
-		u32 node_id;
-
-		if (!gator_events_ccn504_enabled[i]) {
-			continue;
-		}
-
-		pmu_event_id = get_pmu_event_id(gator_events_ccn504_event[i]);
-		node_type = get_node_type(gator_events_ccn504_event[i]);
-		region = get_region(gator_events_ccn504_event[i]);
-
-		// Verify the node_type
-		oly_id_whole = readl(gator_events_ccn504_base + region*REGION_SIZE + OLY_ID);
-		oly_id = oly_id_whole & 0x1F;
-		node_id = (oly_id_whole >> 8) & 0x7F;
-		if ((oly_id != node_type) ||
-				((node_type == 0x16) && ((oly_id != 0x14) && (oly_id != 0x15) && (oly_id != 0x16) && (oly_id != 0x18) && (oly_id != 0x19) && (oly_id != 0x1A)))) {
-			printk(KERN_ERR "gator: oly_id is 0x%x expected 0x%x\n", oly_id, node_type);
-			return -1;
-		}
-
-		// Set the control register
-		pmu_event_sel = readl(gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
-		switch (node_type) {
-		case 0x08: // XP
-			pmu_event_sel |= pmu_event_id << (7*i);
-			gator_events_ccn504_set_dt_config(node_id, i, 0x4);
-			break;
-		case 0x04: // HN-F
-		case 0x16: // RN-I
-		case 0x10: // SBAS
-			pmu_event_sel |= pmu_event_id << (4*i);
-			gator_events_ccn504_set_dt_config(node_id/2, i, (node_id & 1) == 0 ? 0x8 : 0xC);
-			break;
-		}
-		writel(pmu_event_sel, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
-	}
-
-	return 0;
-}
-
-static void gator_events_ccn504_stop(void)
-{
-	int i;
-
-	if (!gator_events_ccn504_global_enabled) {
-		return;
-	}
-
-	// cycle counter does not need to be disabled
-	for (i = 0; i < CCNT; ++i) {
-		int region;
-
-		if (!gator_events_ccn504_enabled[i]) {
-			continue;
-		}
-
-		region = get_region(gator_events_ccn504_event[i]);
-
-		writel(0, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
-	}
-
-	// Clear dt_config
-	for (i = 0; i < NUM_XPS; ++i) {
-		writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG);
-	}
-}
-
-static int gator_events_ccn504_read(int **buffer)
-{
-	int i;
-	int len = 0;
-	int value;
-
-	if (!on_primary_core() || !gator_events_ccn504_global_enabled) {
-		return 0;
-	}
-
-	// Verify the pmsr register is zero
-	while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) != 0);
-
-	// Request a PMU snapshot
-	writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_REQ);
-
-	// Wait for the snapshot
-	while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) == 0);
-
-	// Read the shadow registers
-	for (i = 0; i < CNTMAX; ++i) {
-		if (!gator_events_ccn504_enabled[i]) {
-			continue;
-		}
-
-		value = readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + (i == CCNT ? PMCCNTRSR : PMEVCNTSR0 + 8*i));
-		if (gator_events_ccn504_prev[i] != 0x80808080) {
-			gator_events_ccn504_buffer[len++] = gator_events_ccn504_key[i];
-			gator_events_ccn504_buffer[len++] = value - gator_events_ccn504_prev[i];
-		}
-		gator_events_ccn504_prev[i] = value;
-
-		// Are the counters registers cleared when read? Is that what the cntr_rst bit on the pmcr register does?
-	}
-
-	// Clear the PMU snapshot status
-	writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_CLR);
-
-	if (buffer)
-		*buffer = gator_events_ccn504_buffer;
-
-	return len;
-}
-
-static struct gator_interface gator_events_ccn504_interface = {
-	.shutdown = gator_events_ccn504_create_shutdown,
-	.create_files = gator_events_ccn504_create_files,
-	.start = gator_events_ccn504_start,
-	.stop = gator_events_ccn504_stop,
-	.read = gator_events_ccn504_read,
-};
-
-int gator_events_ccn504_init(void)
-{
-	int i;
-
-	if (ccn504_addr == 0) {
-		return -1;
-	}
-
-	gator_events_ccn504_base = ioremap(ccn504_addr, NUM_REGIONS*REGION_SIZE);
-	if (gator_events_ccn504_base == NULL) {
-		printk(KERN_ERR "gator: ioremap returned NULL\n");
-		return -1;
-	}
-
-	for (i = 0; i < CNTMAX; ++i) {
-		gator_events_ccn504_enabled[i] = 0;
-		gator_events_ccn504_event[i] = 0;
-		gator_events_ccn504_key[i] = gator_events_get_key();
-	}
-
-	return gator_events_install(&gator_events_ccn504_interface);
-}
diff --git a/drivers/gator/gator_events_irq.c b/drivers/gator/gator_events_irq.c
index facbdd62325e..5221aac581b3 100644
--- a/drivers/gator/gator_events_irq.c
+++ b/drivers/gator/gator_events_irq.c
@@ -42,17 +42,15 @@ static int gator_events_irq_create_files(struct super_block *sb, struct dentry *
 
 	/* irq */
 	dir = gatorfs_mkdir(sb, root, "Linux_irq_irq");
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 	gatorfs_create_ulong(sb, dir, "enabled", &hardirq_enabled);
 	gatorfs_create_ro_ulong(sb, dir, "key", &hardirq_key);
 
 	/* soft irq */
 	dir = gatorfs_mkdir(sb, root, "Linux_irq_softirq");
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 	gatorfs_create_ulong(sb, dir, "enabled", &softirq_enabled);
 	gatorfs_create_ro_ulong(sb, dir, "key", &softirq_key);
 
@@ -63,7 +61,7 @@ static int gator_events_irq_online(int **buffer, bool migrate)
 {
 	int len = 0, cpu = get_physical_cpu();
 
-	// synchronization with the irq_exit functions is not necessary as the values are being reset
+	/* synchronization with the irq_exit functions is not necessary as the values are being reset */
 	if (hardirq_enabled) {
 		atomic_set(&per_cpu(irqCnt, cpu)[HARDIRQ], 0);
 		per_cpu(irqGet, cpu)[len++] = hardirq_key;
@@ -84,7 +82,7 @@ static int gator_events_irq_online(int **buffer, bool migrate)
 
 static int gator_events_irq_start(void)
 {
-	// register tracepoints
+	/* register tracepoints */
 	if (hardirq_enabled)
 		if (GATOR_REGISTER_TRACE(irq_handler_exit))
 			goto fail_hardirq_exit;
@@ -95,7 +93,7 @@ static int gator_events_irq_start(void)
 
 	return 0;
 
-	// unregister tracepoints on error
+	/* unregister tracepoints on error */
 fail_softirq_exit:
 	if (hardirq_enabled)
 		GATOR_UNREGISTER_TRACE(irq_handler_exit);
@@ -117,7 +115,7 @@ static void gator_events_irq_stop(void)
 	softirq_enabled = 0;
 }
 
-static int gator_events_irq_read(int **buffer)
+static int gator_events_irq_read(int **buffer, bool sched_switch)
 {
 	int len, value;
 	int cpu = get_physical_cpu();
diff --git a/drivers/gator/gator_events_l2c-310.c b/drivers/gator/gator_events_l2c-310.c
index 553f9707bdbf..73aaac32327e 100644
--- a/drivers/gator/gator_events_l2c-310.c
+++ b/drivers/gator/gator_events_l2c-310.c
@@ -91,7 +91,7 @@ static void gator_events_l2c310_stop(void)
 	writel(0, l2c310_base + L2X0_EVENT_CNT_CTRL);
 }
 
-static int gator_events_l2c310_read(int **buffer)
+static int gator_events_l2c310_read(int **buffer, bool sched_switch)
 {
 	static const unsigned long l2x0_event_cntx_val[L2C310_COUNTERS_NUM] = {
 		L2X0_EVENT_CNT0_VAL,
@@ -149,8 +149,8 @@ static void __iomem *gator_events_l2c310_probe(void)
 		0xa0412000,
 #endif
 #if defined(CONFIG_ARCH_VEXPRESS)
-		0x1e00a000, // A9x4 core tile (HBI-0191)
-		0x2c0f0000, // New memory map tiles
+		0x1e00a000, /* A9x4 core tile (HBI-0191) */
+		0x2c0f0000, /* New memory map tiles */
 #endif
 	};
 	int i;
diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c
index 9e1c7064bd73..9cf43fe2c29b 100644
--- a/drivers/gator/gator_events_mali_4xx.c
+++ b/drivers/gator/gator_events_mali_4xx.c
@@ -36,7 +36,7 @@
 #elif GATOR_MALI_INTERFACE_STYLE == 2
 #error GATOR_MALI_INTERFACE_STYLE 2 is obsolete
 #elif GATOR_MALI_INTERFACE_STYLE >= 3
-// Valid GATOR_MALI_INTERFACE_STYLE
+/* Valid GATOR_MALI_INTERFACE_STYLE */
 #else
 #error Unknown GATOR_MALI_INTERFACE_STYLE option.
 #endif
@@ -54,7 +54,7 @@
 #error MALI_SUPPORT set to an invalid device code: expecting MALI_4xx
 #endif
 
-static const char mali_name[] = "Mali-4xx";
+static const char mali_name[] = "4xx";
 
 /* gatorfs variables for counter enable state,
  * the event the counter should count and the
@@ -73,8 +73,8 @@ static u32 *counter_address[NUMBER_OF_EVENTS];
 /* An array used to return the data we recorded
  * as key,value pairs hence the *2
  */
-static unsigned long counter_dump[NUMBER_OF_EVENTS * 2];
-static unsigned long counter_prev[NUMBER_OF_EVENTS];
+static int counter_dump[NUMBER_OF_EVENTS * 2];
+static int counter_prev[NUMBER_OF_EVENTS];
 static bool prev_set[NUMBER_OF_EVENTS];
 
 /* Note whether tracepoints have been registered */
@@ -89,8 +89,8 @@ static unsigned int n_vp_cores = MAX_NUM_VP_CORES;
 static unsigned int n_l2_cores = MAX_NUM_L2_CACHE_CORES;
 static unsigned int n_fp_cores = MAX_NUM_FP_CORES;
 
-extern mali_counter mali_activity[2];
-static const char* const mali_activity_names[] = {
+extern struct mali_counter mali_activity[2];
+static const char *const mali_activity_names[] = {
 	"fragment",
 	"vertex",
 };
@@ -112,36 +112,11 @@ static inline int is_hw_counter(unsigned int event_id)
 	return (event_id >= FIRST_HW_COUNTER && event_id <= LAST_HW_COUNTER);
 }
 
-/*
- * These are provided for utgard compatibility.
- */
-typedef void _mali_profiling_get_mali_version_type(struct _mali_profiling_mali_version *values);
-typedef u32 _mali_profiling_get_l2_counters_type(_mali_profiling_l2_counter_values *values);
-
-/* Probe for continuously sampled counter */
-#if 0				//WE_DONT_CURRENTLY_USE_THIS_SO_SUPPRESS_WARNING
-GATOR_DEFINE_PROBE(mali_sample_address, TP_PROTO(unsigned int event_id, u32 *addr))
-{
-	/* Turning on too many pr_debug statements in frequently called functions
-	 * can cause stability and/or performance problems
-	 */
-	//pr_debug("gator: mali_sample_address %d %d\n", event_id, addr);
-	if (event_id >= ACTIVITY_VP && event_id <= COUNTER_FP3_C1) {
-		counter_address[event_id] = addr;
-	}
-}
-#endif
-
 /* Probe for hardware counter events */
 GATOR_DEFINE_PROBE(mali_hw_counter, TP_PROTO(unsigned int event_id, unsigned int value))
 {
-	/* Turning on too many pr_debug statements in frequently called functions
-	 * can cause stability and/or performance problems
-	 */
-	//pr_debug("gator: mali_hw_counter %d %d\n", event_id, value);
-	if (is_hw_counter(event_id)) {
+	if (is_hw_counter(event_id))
 		counter_data[event_id] = value;
-	}
 }
 
 GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters))
@@ -150,9 +125,8 @@ GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surfac
 
 	/* Copy over the values for those counters which are enabled. */
 	for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) {
-		if (counter_enabled[i]) {
+		if (counter_enabled[i])
 			counter_data[i] = (u32)(counters[i - FIRST_SW_COUNTER]);
-		}
 	}
 }
 
@@ -172,13 +146,11 @@ static int create_fs_entry(struct super_block *sb, struct dentry *root, const ch
 
 	dir = gatorfs_mkdir(sb, root, name);
 
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 
-	if (create_event_item) {
+	if (create_event_item)
 		gatorfs_create_ulong(sb, dir, "event", &counter_event[event]);
-	}
 
 	gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
 	gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
@@ -192,7 +164,7 @@ static int create_fs_entry(struct super_block *sb, struct dentry *root, const ch
  */
 static void initialise_version_info(void)
 {
-	_mali_profiling_get_mali_version_type *mali_profiling_get_mali_version_symbol;
+	void (*mali_profiling_get_mali_version_symbol)(struct _mali_profiling_mali_version *values);
 
 	mali_profiling_get_mali_version_symbol = symbol_get(_mali_profiling_get_mali_version);
 
@@ -214,8 +186,8 @@ static void initialise_version_info(void)
 		/* Release the function - we're done with it. */
 		symbol_put(_mali_profiling_get_mali_version);
 	} else {
-		printk("gator: mali online _mali_profiling_get_mali_version symbol not found\n");
-		printk("gator:  check your Mali DDK version versus the GATOR_MALI_INTERFACE_STYLE setting\n");
+		pr_err("gator: mali online _mali_profiling_get_mali_version symbol not found\n");
+		pr_err("gator:  check your Mali DDK version versus the GATOR_MALI_INTERFACE_STYLE setting\n");
 	}
 }
 #endif
@@ -242,26 +214,24 @@ static int create_files(struct super_block *sb, struct dentry *root)
 	mali_activity[0].cores = n_fp_cores;
 	mali_activity[1].cores = n_vp_cores;
 	for (event = 0; event < ARRAY_SIZE(mali_activity); event++) {
-		if (gator_mali_create_file_system(mali_name, mali_activity_names[event], sb, root, &mali_activity[event], NULL) != 0) {
+		if (gator_mali_create_file_system(mali_name, mali_activity_names[event], sb, root, &mali_activity[event], NULL) != 0)
 			return -1;
-		}
 	}
 
 	/* Vertex processor counters */
 	for (core_id = 0; core_id < n_vp_cores; core_id++) {
 		int activity_counter_id = ACTIVITY_VP_0;
-		snprintf(buf, sizeof buf, "ARM_%s_VP_%d_active", mali_name, core_id);
-		if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) {
+
+		snprintf(buf, sizeof(buf), "ARM_Mali-%s_VP_%d_active", mali_name, core_id);
+		if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0)
 			return -1;
-		}
 
 		for (counter_number = 0; counter_number < 2; counter_number++) {
 			int counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
 
-			snprintf(buf, sizeof buf, "ARM_%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
-			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+			snprintf(buf, sizeof(buf), "ARM_Mali-%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0)
 				return -1;
-			}
 		}
 	}
 
@@ -269,18 +239,16 @@ static int create_files(struct super_block *sb, struct dentry *root)
 	for (core_id = 0; core_id < n_fp_cores; core_id++) {
 		int activity_counter_id = ACTIVITY_FP_0 + core_id;
 
-		snprintf(buf, sizeof buf, "ARM_%s_FP_%d_active", mali_name, core_id);
-		if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) {
+		snprintf(buf, sizeof(buf), "ARM_Mali-%s_FP_%d_active", mali_name, core_id);
+		if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0)
 			return -1;
-		}
 
 		for (counter_number = 0; counter_number < 2; counter_number++) {
 			int counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
 
-			snprintf(buf, sizeof buf, "ARM_%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
-			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+			snprintf(buf, sizeof(buf), "ARM_Mali-%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0)
 				return -1;
-			}
 		}
 	}
 
@@ -289,38 +257,33 @@ static int create_files(struct super_block *sb, struct dentry *root)
 		for (counter_number = 0; counter_number < 2; counter_number++) {
 			int counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
 
-			snprintf(buf, sizeof buf, "ARM_%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
-			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+			snprintf(buf, sizeof(buf), "ARM_Mali-%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0)
 				return -1;
-			}
 		}
 	}
 
 	/* Now set up the software counter entries */
 	for (event = FIRST_SW_COUNTER; event <= LAST_SW_COUNTER; event++) {
-		snprintf(buf, sizeof(buf), "ARM_%s_SW_%d", mali_name, event - FIRST_SW_COUNTER);
+		snprintf(buf, sizeof(buf), "ARM_Mali-%s_SW_%d", mali_name, event - FIRST_SW_COUNTER);
 
-		if (create_fs_entry(sb, root, buf, event, 0) != 0) {
+		if (create_fs_entry(sb, root, buf, event, 0) != 0)
 			return -1;
-		}
 	}
 
 	/* Now set up the special counter entries */
-	snprintf(buf, sizeof(buf), "ARM_%s_Filmstrip_cnt0", mali_name);
-	if (create_fs_entry(sb, root, buf, COUNTER_FILMSTRIP, 1) != 0) {
+	snprintf(buf, sizeof(buf), "ARM_Mali-%s_Filmstrip_cnt0", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_FILMSTRIP, 1) != 0)
 		return -1;
-	}
 
 #ifdef DVFS_REPORTED_BY_DDK
-	snprintf(buf, sizeof(buf), "ARM_%s_Frequency", mali_name);
-	if (create_fs_entry(sb, root, buf, COUNTER_FREQUENCY, 1) != 0) {
+	snprintf(buf, sizeof(buf), "ARM_Mali-%s_Frequency", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_FREQUENCY, 1) != 0)
 		return -1;
-	}
 
-	snprintf(buf, sizeof(buf), "ARM_%s_Voltage", mali_name);
-	if (create_fs_entry(sb, root, buf, COUNTER_VOLTAGE, 1) != 0) {
+	snprintf(buf, sizeof(buf), "ARM_Mali-%s_Voltage", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_VOLTAGE, 1) != 0)
 		return -1;
-	}
 #endif
 
 	return 0;
@@ -330,8 +293,8 @@ static int create_files(struct super_block *sb, struct dentry *root)
  * Local store for the get_counters entry point into the DDK.
  * This is stored here since it is used very regularly.
  */
-static mali_profiling_get_counters_type *mali_get_counters = NULL;
-static _mali_profiling_get_l2_counters_type *mali_get_l2_counters = NULL;
+static void (*mali_get_counters)(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
+static u32 (*mali_get_l2_counters)(struct _mali_profiling_l2_counter_values *values);
 
 /*
  * Examine list of counters between two index limits and determine if any one is enabled.
@@ -342,9 +305,8 @@ static int is_any_counter_enabled(unsigned int first_counter, unsigned int last_
 	unsigned int i;
 
 	for (i = first_counter; i <= last_counter; i++) {
-		if (counter_enabled[i]) {
+		if (counter_enabled[i])
 			return 1;	/* At least one counter is enabled */
-		}
 	}
 
 	return 0;		/* No s/w counters enabled */
@@ -366,16 +328,15 @@ static void init_counters(unsigned int from_counter, unsigned int to_counter)
 		pr_debug("gator: mali online _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
 
 		for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
-			if (counter_enabled[counter_id]) {
+			if (counter_enabled[counter_id])
 				mali_set_hw_event(counter_id, counter_event[counter_id]);
-			} else {
+			else
 				mali_set_hw_event(counter_id, 0xFFFFFFFF);
-			}
 		}
 
 		symbol_put(_mali_profiling_set_event);
 	} else {
-		printk("gator: mali online _mali_profiling_set_event symbol not found\n");
+		pr_err("gator: mali online _mali_profiling_set_event symbol not found\n");
 	}
 }
 
@@ -407,27 +368,23 @@ static void mali_counter_initialize(void)
 
 		symbol_put(_mali_profiling_control);
 	} else {
-		printk("gator: mali online _mali_profiling_control symbol not found\n");
+		pr_err("gator: mali online _mali_profiling_control symbol not found\n");
 	}
 
 	mali_get_counters = symbol_get(_mali_profiling_get_counters);
-	if (mali_get_counters) {
+	if (mali_get_counters)
 		pr_debug("gator: mali online _mali_profiling_get_counters symbol @ %p\n", mali_get_counters);
-
-	} else {
-		pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined");
-	}
+	else
+		pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined\n");
 
 	mali_get_l2_counters = symbol_get(_mali_profiling_get_l2_counters);
-	if (mali_get_l2_counters) {
+	if (mali_get_l2_counters)
 		pr_debug("gator: mali online _mali_profiling_get_l2_counters symbol @ %p\n", mali_get_l2_counters);
-
-	} else {
-		pr_debug("gator WARNING: mali _mali_profiling_get_l2_counters symbol not defined");
-	}
+	else
+		pr_debug("gator WARNING: mali _mali_profiling_get_l2_counters symbol not defined\n");
 
 	if (!mali_get_counters && !mali_get_l2_counters) {
-		pr_debug("gator: WARNING: no L2 counters available");
+		pr_debug("gator: WARNING: no L2 counters available\n");
 		n_l2_cores = 0;
 	}
 
@@ -449,13 +406,12 @@ static void mali_counter_deinitialize(void)
 		int i;
 
 		pr_debug("gator: mali offline _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
-		for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++) {
+		for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++)
 			mali_set_hw_event(i, 0xFFFFFFFF);
-		}
 
 		symbol_put(_mali_profiling_set_event);
 	} else {
-		printk("gator: mali offline _mali_profiling_set_event symbol not found\n");
+		pr_err("gator: mali offline _mali_profiling_set_event symbol not found\n");
 	}
 
 	/* Generic control interface for Mali DDK. */
@@ -471,29 +427,27 @@ static void mali_counter_deinitialize(void)
 
 		symbol_put(_mali_profiling_control);
 	} else {
-		printk("gator: mali offline _mali_profiling_control symbol not found\n");
+		pr_err("gator: mali offline _mali_profiling_control symbol not found\n");
 	}
 
-	if (mali_get_counters) {
+	if (mali_get_counters)
 		symbol_put(_mali_profiling_get_counters);
-	}
 
-	if (mali_get_l2_counters) {
+	if (mali_get_l2_counters)
 		symbol_put(_mali_profiling_get_l2_counters);
-	}
 }
 
 static int start(void)
 {
-	// register tracepoints
+	/* register tracepoints */
 	if (GATOR_REGISTER_TRACE(mali_hw_counter)) {
-		printk("gator: mali_hw_counter tracepoint failed to activate\n");
+		pr_err("gator: mali_hw_counter tracepoint failed to activate\n");
 		return -1;
 	}
 
 	/* For Mali drivers with built-in support. */
 	if (GATOR_REGISTER_TRACE(mali_sw_counters)) {
-		printk("gator: mali_sw_counters tracepoint failed to activate\n");
+		pr_err("gator: mali_sw_counters tracepoint failed to activate\n");
 		return -1;
 	}
 
@@ -543,17 +497,17 @@ static void dump_counters(unsigned int from_counter, unsigned int to_counter, un
 	}
 }
 
-static int read(int **buffer)
+static int read(int **buffer, bool sched_switch)
 {
 	int len = 0;
 
 	if (!on_primary_core())
 		return 0;
 
-	// Read the L2 C0 and C1 here.
+	/* Read the L2 C0 and C1 here. */
 	if (n_l2_cores > 0 && is_any_counter_enabled(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores))) {
 		unsigned int unavailable_l2_caches = 0;
-		_mali_profiling_l2_counter_values cache_values;
+		struct _mali_profiling_l2_counter_values cache_values;
 		unsigned int cache_id;
 		struct _mali_profiling_core_counters *per_core;
 
@@ -572,25 +526,24 @@ static int read(int **buffer)
 			unsigned int counter_id_0 = COUNTER_L2_0_C0 + (2 * cache_id);
 			unsigned int counter_id_1 = counter_id_0 + 1;
 
-			if ((1 << cache_id) & unavailable_l2_caches) {
+			if ((1 << cache_id) & unavailable_l2_caches)
 				continue; /* This cache is unavailable (powered-off, possibly). */
-			}
 
 			per_core = &cache_values.cores[cache_id];
 
 			if (counter_enabled[counter_id_0] && prev_set[counter_id_0]) {
-				// Calculate and save src0's counter val0
+				/* Calculate and save src0's counter val0 */
 				counter_dump[len++] = counter_key[counter_id_0];
 				counter_dump[len++] = per_core->value0 - counter_prev[counter_id_0];
 			}
 
 			if (counter_enabled[counter_id_1] && prev_set[counter_id_1]) {
-				// Calculate and save src1's counter val1
+				/* Calculate and save src1's counter val1 */
 				counter_dump[len++] = counter_key[counter_id_1];
 				counter_dump[len++] = per_core->value1 - counter_prev[counter_id_1];
 			}
 
-			// Save the previous values for the counters.
+			/* Save the previous values for the counters. */
 			counter_prev[counter_id_0] = per_core->value0;
 			prev_set[counter_id_0] = true;
 			counter_prev[counter_id_1] = per_core->value1;
@@ -608,8 +561,9 @@ static int read(int **buffer)
 	{
 		int cnt;
 		/*
-		 * Add in the voltage and frequency counters if enabled.  Note that, since these are
-		 * actually passed as events, the counter value should not be cleared.
+		 * Add in the voltage and frequency counters if enabled. Note
+		 * that, since these are actually passed as events, the counter
+		 * value should not be cleared.
 		 */
 		cnt = COUNTER_FREQUENCY;
 		if (counter_enabled[cnt]) {
@@ -625,9 +579,8 @@ static int read(int **buffer)
 	}
 #endif
 
-	if (buffer) {
-		*buffer = (int *)counter_dump;
-	}
+	if (buffer)
+		*buffer = counter_dump;
 
 	return len;
 }
diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c
index 4f2cce4ce67b..1af87d649afe 100644
--- a/drivers/gator/gator_events_mali_common.c
+++ b/drivers/gator/gator_events_mali_common.c
@@ -8,7 +8,7 @@
  */
 #include "gator_events_mali_common.h"
 
-extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event)
+extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, struct mali_counter *counter, unsigned long *event)
 {
 	int err;
 	char buf[255];
@@ -17,36 +17,39 @@ extern int gator_mali_create_file_system(const char *mali_name, const char *even
 	/* If the counter name is empty ignore it */
 	if (strlen(event_name) != 0) {
 		/* Set up the filesystem entry for this event. */
-		snprintf(buf, sizeof(buf), "ARM_%s_%s", mali_name, event_name);
+		if (mali_name == NULL)
+			snprintf(buf, sizeof(buf), "ARM_Mali-%s", event_name);
+		else
+			snprintf(buf, sizeof(buf), "ARM_Mali-%s_%s", mali_name, event_name);
 
 		dir = gatorfs_mkdir(sb, root, buf);
 
 		if (dir == NULL) {
-			pr_debug("gator: %s: error creating file system for: %s (%s)", mali_name, event_name, buf);
+			pr_debug("gator: %s: error creating file system for: %s (%s)\n", mali_name, event_name, buf);
 			return -1;
 		}
 
 		err = gatorfs_create_ulong(sb, dir, "enabled", &counter->enabled);
 		if (err != 0) {
-			pr_debug("gator: %s: error calling gatorfs_create_ulong for: %s (%s)", mali_name, event_name, buf);
+			pr_debug("gator: %s: error calling gatorfs_create_ulong for: %s (%s)\n", mali_name, event_name, buf);
 			return -1;
 		}
 		err = gatorfs_create_ro_ulong(sb, dir, "key", &counter->key);
 		if (err != 0) {
-			pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)", mali_name, event_name, buf);
+			pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
 			return -1;
 		}
 		if (counter->cores != -1) {
 			err = gatorfs_create_ro_ulong(sb, dir, "cores", &counter->cores);
 			if (err != 0) {
-				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)", mali_name, event_name, buf);
+				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
 				return -1;
 			}
 		}
 		if (event != NULL) {
 			err = gatorfs_create_ulong(sb, dir, "event", event);
 			if (err != 0) {
-				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)", mali_name, event_name, buf);
+				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
 				return -1;
 			}
 		}
@@ -55,12 +58,12 @@ extern int gator_mali_create_file_system(const char *mali_name, const char *even
 	return 0;
 }
 
-extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int n_counters)
+extern void gator_mali_initialise_counters(struct mali_counter counters[], unsigned int n_counters)
 {
 	unsigned int cnt;
 
 	for (cnt = 0; cnt < n_counters; cnt++) {
-		mali_counter *counter = &counters[cnt];
+		struct mali_counter *counter = &counters[cnt];
 
 		counter->key = gator_events_get_key();
 		counter->enabled = 0;
diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h
index 91d871bc915a..e7082e62fe88 100644
--- a/drivers/gator/gator_events_mali_common.h
+++ b/drivers/gator/gator_events_mali_common.h
@@ -16,7 +16,7 @@
 #include <linux/time.h>
 #include <linux/math64.h>
 #include <linux/slab.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 /* Ensure that MALI_SUPPORT has been defined to something. */
 #ifndef MALI_SUPPORT
@@ -30,21 +30,20 @@
 /*
  * Runtime state information for a counter.
  */
-typedef struct {
-	// 'key' (a unique id set by gatord and returned by gator.ko)
+struct mali_counter {
+	/* 'key' (a unique id set by gatord and returned by gator.ko) */
 	unsigned long key;
-	// counter enable state
+	/* counter enable state */
 	unsigned long enabled;
-	// for activity counters, the number of cores, otherwise -1
+	/* for activity counters, the number of cores, otherwise -1 */
 	unsigned long cores;
-} mali_counter;
+};
 
 /*
  * Mali-4xx
  */
 typedef int mali_profiling_set_event_type(unsigned int, int);
 typedef void mali_profiling_control_type(unsigned int, unsigned int);
-typedef void mali_profiling_get_counters_type(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
 
 /*
  * Driver entry points for functions called directly by gator.
@@ -65,7 +64,7 @@ extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigne
  *
  * @return 0 if entry point was created, non-zero if not.
  */
-extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event);
+extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, struct mali_counter *counter, unsigned long *event);
 
 /**
  * Initializes the counter array.
@@ -73,6 +72,6 @@ extern int gator_mali_create_file_system(const char *mali_name, const char *even
  * @param keys The array of counters
  * @param n_counters The number of entries in each of the arrays.
  */
-extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int n_counters);
+extern void gator_mali_initialise_counters(struct mali_counter counters[], unsigned int n_counters);
 
 #endif /* GATOR_EVENTS_MALI_COMMON_H  */
diff --git a/drivers/gator/gator_events_mali_t6xx.c b/drivers/gator/gator_events_mali_midgard.c
similarity index 78%
rename from drivers/gator/gator_events_mali_t6xx.c
rename to drivers/gator/gator_events_mali_midgard.c
index e56ba84aefb8..0aec906d7ae5 100644
--- a/drivers/gator/gator_events_mali_t6xx.c
+++ b/drivers/gator/gator_events_mali_midgard.c
@@ -13,7 +13,7 @@
 #include <linux/time.h>
 #include <linux/math64.h>
 #include <linux/slab.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 #ifdef MALI_DIR_MIDGARD
 /* New DDK Directory structure with kernel/drivers/gpu/arm/midgard*/
@@ -28,13 +28,13 @@
 /*
  * Check that the MALI_SUPPORT define is set to one of the allowable device codes.
  */
-#if (MALI_SUPPORT != MALI_T6xx)
-#error MALI_SUPPORT set to an invalid device code: expecting MALI_T6xx
+#if (MALI_SUPPORT != MALI_MIDGARD)
+#error MALI_SUPPORT set to an invalid device code: expecting MALI_MIDGARD
 #endif
 
-static const char mali_name[] = "Mali-T6xx";
+static const char mali_name[] = "Midgard";
 
-/* Counters for Mali-T6xx:
+/* Counters for Mali-Midgard:
  *
  *  - Timeline events
  *    They are tracepoints, but instead of reporting a number they report a START/STOP event.
@@ -49,7 +49,7 @@ static const char mali_name[] = "Mali-T6xx";
  */
 
 /* Timeline (start/stop) activity */
-static const char *timeline_event_names[] = {
+static const char *const timeline_event_names[] = {
 	"PM_SHADER_0",
 	"PM_SHADER_1",
 	"PM_SHADER_2",
@@ -88,7 +88,7 @@ enum {
 #define NUM_PM_SHADER (8)
 
 /* Software Counters */
-static const char *software_counter_names[] = {
+static const char *const software_counter_names[] = {
 	"MMU_PAGE_FAULT_0",
 	"MMU_PAGE_FAULT_1",
 	"MMU_PAGE_FAULT_2",
@@ -103,7 +103,7 @@ enum {
 };
 
 /* Software Counters */
-static const char *accumulators_names[] = {
+static const char *const accumulators_names[] = {
 	"TOTAL_ALLOC_PAGES"
 };
 
@@ -123,17 +123,18 @@ enum {
 /*
  * gatorfs variables for counter enable state
  */
-static mali_counter counters[NUMBER_OF_EVENTS];
+static struct mali_counter counters[NUMBER_OF_EVENTS];
 static unsigned long filmstrip_event;
 
 /* An array used to return the data we recorded
  * as key,value pairs hence the *2
  */
-static unsigned long counter_dump[NUMBER_OF_EVENTS * 2];
+static int counter_dump[NUMBER_OF_EVENTS * 2];
 
 /*
- * Array holding counter start times (in ns) for each counter.  A zero here
- * indicates that the activity monitored by this counter is not running.
+ * Array holding counter start times (in ns) for each counter. A zero
+ * here indicates that the activity monitored by this counter is not
+ * running.
  */
 static struct timespec timeline_event_starttime[NUMBER_OF_TIMELINE_EVENTS];
 
@@ -156,6 +157,7 @@ static struct timespec prev_timestamp;
 static inline long get_duration_us(const struct timespec *start, const struct timespec *end)
 {
 	long event_duration_us = (end->tv_nsec - start->tv_nsec) / 1000;
+
 	event_duration_us += (end->tv_sec - start->tv_sec) * 1000000;
 
 	return event_duration_us;
@@ -172,9 +174,8 @@ static void record_timeline_event(unsigned int timeline_index, unsigned int type
 		getnstimeofday(&event_timestamp);
 
 		/* Remember the start time if the activity is not already started */
-		if (event_start->tv_sec == 0) {
+		if (event_start->tv_sec == 0)
 			*event_start = event_timestamp;	/* Structure copy */
-		}
 		break;
 
 	case ACTIVITY_STOP:
@@ -208,9 +209,9 @@ GATOR_DEFINE_PROBE(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long
 #define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
 #define BIT_AT(value, pos) ((value >> pos) & 1)
 
-	static unsigned long long previous_shader_bitmask = 0;
-	static unsigned long long previous_tiler_bitmask = 0;
-	static unsigned long long previous_l2_bitmask = 0;
+	static unsigned long long previous_shader_bitmask;
+	static unsigned long long previous_tiler_bitmask;
+	static unsigned long long previous_l2_bitmask;
 
 	switch (event_id) {
 	case SHADER_PRESENT_LO:
@@ -219,9 +220,8 @@ GATOR_DEFINE_PROBE(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long
 			int pos;
 
 			for (pos = 0; pos < NUM_PM_SHADER; ++pos) {
-				if (BIT_AT(changed_bitmask, pos)) {
+				if (BIT_AT(changed_bitmask, pos))
 					record_timeline_event(PM_SHADER_0 + pos, BIT_AT(value, pos) ? ACTIVITY_START : ACTIVITY_STOP);
-				}
 			}
 
 			previous_shader_bitmask = value;
@@ -232,9 +232,8 @@ GATOR_DEFINE_PROBE(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long
 		{
 			unsigned long long changed = previous_tiler_bitmask ^ value;
 
-			if (BIT_AT(changed, 0)) {
+			if (BIT_AT(changed, 0))
 				record_timeline_event(PM_TILER_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP);
-			}
 
 			previous_tiler_bitmask = value;
 			break;
@@ -244,12 +243,10 @@ GATOR_DEFINE_PROBE(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long
 		{
 			unsigned long long changed = previous_l2_bitmask ^ value;
 
-			if (BIT_AT(changed, 0)) {
+			if (BIT_AT(changed, 0))
 				record_timeline_event(PM_L2_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP);
-			}
-			if (BIT_AT(changed, 4)) {
+			if (BIT_AT(changed, 4))
 				record_timeline_event(PM_L2_1, BIT_AT(value, 4) ? ACTIVITY_START : ACTIVITY_STOP);
-			}
 
 			previous_l2_bitmask = value;
 			break;
@@ -297,31 +294,27 @@ static int create_files(struct super_block *sb, struct dentry *root)
 	mali_profiling_control_type *mali_control;
 
 	for (event = FIRST_TIMELINE_EVENT; event < FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS; event++) {
-		if (gator_mali_create_file_system(mali_name, timeline_event_names[counter_index], sb, root, &counters[event], NULL) != 0) {
+		if (gator_mali_create_file_system(mali_name, timeline_event_names[counter_index], sb, root, &counters[event], NULL) != 0)
 			return -1;
-		}
 		counter_index++;
 	}
 	counter_index = 0;
 	for (event = FIRST_SOFTWARE_COUNTER; event < FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS; event++) {
-		if (gator_mali_create_file_system(mali_name, software_counter_names[counter_index], sb, root, &counters[event], NULL) != 0) {
+		if (gator_mali_create_file_system(mali_name, software_counter_names[counter_index], sb, root, &counters[event], NULL) != 0)
 			return -1;
-		}
 		counter_index++;
 	}
 	counter_index = 0;
 	for (event = FIRST_ACCUMULATOR; event < FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS; event++) {
-		if (gator_mali_create_file_system(mali_name, accumulators_names[counter_index], sb, root, &counters[event], NULL) != 0) {
+		if (gator_mali_create_file_system(mali_name, accumulators_names[counter_index], sb, root, &counters[event], NULL) != 0)
 			return -1;
-		}
 		counter_index++;
 	}
 
 	mali_control = symbol_get(_mali_profiling_control);
 	if (mali_control) {
-		if (gator_mali_create_file_system(mali_name, "Filmstrip_cnt0", sb, root, &counters[FILMSTRIP], &filmstrip_event) != 0) {
+		if (gator_mali_create_file_system(mali_name, "Filmstrip_cnt0", sb, root, &counters[FILMSTRIP], &filmstrip_event) != 0)
 			return -1;
-		}
 		symbol_put(_mali_profiling_control);
 	}
 
@@ -331,36 +324,36 @@ static int create_files(struct super_block *sb, struct dentry *root)
 static int register_tracepoints(void)
 {
 	if (GATOR_REGISTER_TRACE(mali_pm_status)) {
-		pr_debug("gator: Mali-T6xx: mali_pm_status tracepoint failed to activate\n");
+		pr_debug("gator: Mali-Midgard: mali_pm_status tracepoint failed to activate\n");
 		return 0;
 	}
 
 	if (GATOR_REGISTER_TRACE(mali_page_fault_insert_pages)) {
-		pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages tracepoint failed to activate\n");
+		pr_debug("gator: Mali-Midgard: mali_page_fault_insert_pages tracepoint failed to activate\n");
 		return 0;
 	}
 
 	if (GATOR_REGISTER_TRACE(mali_mmu_as_in_use)) {
-		pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use tracepoint failed to activate\n");
+		pr_debug("gator: Mali-Midgard: mali_mmu_as_in_use tracepoint failed to activate\n");
 		return 0;
 	}
 
 	if (GATOR_REGISTER_TRACE(mali_mmu_as_released)) {
-		pr_debug("gator: Mali-T6xx: mali_mmu_as_released tracepoint failed to activate\n");
+		pr_debug("gator: Mali-Midgard: mali_mmu_as_released tracepoint failed to activate\n");
 		return 0;
 	}
 
 	if (GATOR_REGISTER_TRACE(mali_total_alloc_pages_change)) {
-		pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint failed to activate\n");
+		pr_debug("gator: Mali-Midgard: mali_total_alloc_pages_change tracepoint failed to activate\n");
 		return 0;
 	}
 
-	pr_debug("gator: Mali-T6xx: start\n");
-	pr_debug("gator: Mali-T6xx: mali_pm_status probe is at %p\n", &probe_mali_pm_status);
-	pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages probe is at %p\n", &probe_mali_page_fault_insert_pages);
-	pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use probe is at %p\n", &probe_mali_mmu_as_in_use);
-	pr_debug("gator: Mali-T6xx: mali_mmu_as_released probe is at %p\n", &probe_mali_mmu_as_released);
-	pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change probe is at %p\n", &probe_mali_total_alloc_pages_change);
+	pr_debug("gator: Mali-Midgard: start\n");
+	pr_debug("gator: Mali-Midgard: mali_pm_status probe is at %p\n", &probe_mali_pm_status);
+	pr_debug("gator: Mali-Midgard: mali_page_fault_insert_pages probe is at %p\n", &probe_mali_page_fault_insert_pages);
+	pr_debug("gator: Mali-Midgard: mali_mmu_as_in_use probe is at %p\n", &probe_mali_mmu_as_in_use);
+	pr_debug("gator: Mali-Midgard: mali_mmu_as_released probe is at %p\n", &probe_mali_mmu_as_released);
+	pr_debug("gator: Mali-Midgard: mali_total_alloc_pages_change probe is at %p\n", &probe_mali_total_alloc_pages_change);
 
 	return 1;
 }
@@ -376,18 +369,15 @@ static int start(void)
 		timeline_data[cnt] = 0;
 	}
 
-	for (cnt = 0; cnt < NUMBER_OF_SOFTWARE_COUNTERS; cnt++) {
+	for (cnt = 0; cnt < NUMBER_OF_SOFTWARE_COUNTERS; cnt++)
 		sw_counter_data[cnt] = 0;
-	}
 
-	for (cnt = 0; cnt < NUMBER_OF_ACCUMULATORS; cnt++) {
+	for (cnt = 0; cnt < NUMBER_OF_ACCUMULATORS; cnt++)
 		accumulators_data[cnt] = 0;
-	}
 
 	/* Register tracepoints */
-	if (register_tracepoints() == 0) {
+	if (register_tracepoints() == 0)
 		return -1;
-	}
 
 	/* Generic control interface for Mali DDK. */
 	mali_control = symbol_get(_mali_profiling_control);
@@ -410,7 +400,7 @@ static int start(void)
 
 		symbol_put(_mali_profiling_control);
 	} else {
-		printk("gator: mali online _mali_profiling_control symbol not found\n");
+		pr_err("gator: mali online _mali_profiling_control symbol not found\n");
 	}
 
 	/*
@@ -427,26 +417,26 @@ static void stop(void)
 {
 	mali_profiling_control_type *mali_control;
 
-	pr_debug("gator: Mali-T6xx: stop\n");
+	pr_debug("gator: Mali-Midgard: stop\n");
 
 	/*
 	 * It is safe to unregister traces even if they were not successfully
 	 * registered, so no need to check.
 	 */
 	GATOR_UNREGISTER_TRACE(mali_pm_status);
-	pr_debug("gator: Mali-T6xx: mali_pm_status tracepoint deactivated\n");
+	pr_debug("gator: Mali-Midgard: mali_pm_status tracepoint deactivated\n");
 
 	GATOR_UNREGISTER_TRACE(mali_page_fault_insert_pages);
-	pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages tracepoint deactivated\n");
+	pr_debug("gator: Mali-Midgard: mali_page_fault_insert_pages tracepoint deactivated\n");
 
 	GATOR_UNREGISTER_TRACE(mali_mmu_as_in_use);
-	pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use tracepoint deactivated\n");
+	pr_debug("gator: Mali-Midgard: mali_mmu_as_in_use tracepoint deactivated\n");
 
 	GATOR_UNREGISTER_TRACE(mali_mmu_as_released);
-	pr_debug("gator: Mali-T6xx: mali_mmu_as_released tracepoint deactivated\n");
+	pr_debug("gator: Mali-Midgard: mali_mmu_as_released tracepoint deactivated\n");
 
 	GATOR_UNREGISTER_TRACE(mali_total_alloc_pages_change);
-	pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint deactivated\n");
+	pr_debug("gator: Mali-Midgard: mali_total_alloc_pages_change tracepoint deactivated\n");
 
 	/* Generic control interface for Mali DDK. */
 	mali_control = symbol_get(_mali_profiling_control);
@@ -457,20 +447,19 @@ static void stop(void)
 
 		symbol_put(_mali_profiling_control);
 	} else {
-		printk("gator: mali offline _mali_profiling_control symbol not found\n");
+		pr_err("gator: mali offline _mali_profiling_control symbol not found\n");
 	}
 }
 
-static int read(int **buffer)
+static int read(int **buffer, bool sched_switch)
 {
 	int cnt;
 	int len = 0;
 	long sample_interval_us = 0;
 	struct timespec read_timestamp;
 
-	if (!on_primary_core()) {
+	if (!on_primary_core())
 		return 0;
-	}
 
 	/* Get the start of this sample period. */
 	getnstimeofday(&read_timestamp);
@@ -479,9 +468,8 @@ static int read(int **buffer)
 	 * Calculate the sample interval if the previous sample time is valid.
 	 * We use tv_sec since it will not be 0.
 	 */
-	if (prev_timestamp.tv_sec != 0) {
+	if (prev_timestamp.tv_sec != 0)
 		sample_interval_us = get_duration_us(&prev_timestamp, &read_timestamp);
-	}
 
 	/* Structure copy. Update the previous timestamp. */
 	prev_timestamp = read_timestamp;
@@ -490,15 +478,19 @@ static int read(int **buffer)
 	 * Report the timeline counters (ACTIVITY_START/STOP)
 	 */
 	for (cnt = FIRST_TIMELINE_EVENT; cnt < (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS); cnt++) {
-		mali_counter *counter = &counters[cnt];
+		struct mali_counter *counter = &counters[cnt];
+
 		if (counter->enabled) {
 			const int index = cnt - FIRST_TIMELINE_EVENT;
 			unsigned int value;
 
-			/* If the activity is still running, reset its start time to the start of this sample period
-			 * to correct the count.  Add the time up to the end of the sample onto the count. */
+			/* If the activity is still running, reset its start time to the
+			 * start of this sample period to correct the count. Add the
+			 * time up to the end of the sample onto the count.
+			 */
 			if (timeline_event_starttime[index].tv_sec != 0) {
 				const long event_duration = get_duration_us(&timeline_event_starttime[index], &read_timestamp);
+
 				timeline_data[index] += event_duration;
 				timeline_event_starttime[index] = read_timestamp;	/* Activity is still running. */
 			}
@@ -507,7 +499,7 @@ static int read(int **buffer)
 				/* Convert the counter to a percent-of-sample value */
 				value = (timeline_data[index] * 100) / sample_interval_us;
 			} else {
-				pr_debug("gator: Mali-T6xx: setting value to zero\n");
+				pr_debug("gator: Mali-Midgard: setting value to zero\n");
 				value = 0;
 			}
 
@@ -521,9 +513,11 @@ static int read(int **buffer)
 
 	/* Report the software counters */
 	for (cnt = FIRST_SOFTWARE_COUNTER; cnt < (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS); cnt++) {
-		const mali_counter *counter = &counters[cnt];
+		const struct mali_counter *counter = &counters[cnt];
+
 		if (counter->enabled) {
 			const int index = cnt - FIRST_SOFTWARE_COUNTER;
+
 			counter_dump[len++] = counter->key;
 			counter_dump[len++] = sw_counter_data[index];
 			/* Set the value to zero for the next time */
@@ -533,9 +527,11 @@ static int read(int **buffer)
 
 	/* Report the accumulators */
 	for (cnt = FIRST_ACCUMULATOR; cnt < (FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS); cnt++) {
-		const mali_counter *counter = &counters[cnt];
+		const struct mali_counter *counter = &counters[cnt];
+
 		if (counter->enabled) {
 			const int index = cnt - FIRST_ACCUMULATOR;
+
 			counter_dump[len++] = counter->key;
 			counter_dump[len++] = accumulators_data[index];
 			/* Do not zero the accumulator */
@@ -543,25 +539,24 @@ static int read(int **buffer)
 	}
 
 	/* Update the buffer */
-	if (buffer) {
-		*buffer = (int *)counter_dump;
-	}
+	if (buffer)
+		*buffer = counter_dump;
 
 	return len;
 }
 
-static struct gator_interface gator_events_mali_t6xx_interface = {
+static struct gator_interface gator_events_mali_midgard_interface = {
 	.create_files = create_files,
 	.start = start,
 	.stop = stop,
 	.read = read
 };
 
-extern int gator_events_mali_t6xx_init(void)
+extern int gator_events_mali_midgard_init(void)
 {
-	pr_debug("gator: Mali-T6xx: sw_counters init\n");
+	pr_debug("gator: Mali-Midgard: sw_counters init\n");
 
 	gator_mali_initialise_counters(counters, NUMBER_OF_EVENTS);
 
-	return gator_events_install(&gator_events_mali_t6xx_interface);
+	return gator_events_install(&gator_events_mali_midgard_interface);
 }
diff --git a/drivers/gator/gator_events_mali_t6xx_hw.c b/drivers/gator/gator_events_mali_midgard_hw.c
similarity index 59%
rename from drivers/gator/gator_events_mali_t6xx_hw.c
rename to drivers/gator/gator_events_mali_midgard_hw.c
index 3a072bb6ac06..c8065da56815 100644
--- a/drivers/gator/gator_events_mali_t6xx_hw.c
+++ b/drivers/gator/gator_events_mali_midgard_hw.c
@@ -13,19 +13,19 @@
 #include <linux/time.h>
 #include <linux/math64.h>
 #include <linux/slab.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
-/* Mali T6xx DDK includes */
+/* Mali Midgard DDK includes */
 #if defined(MALI_SIMPLE_API)
 /* Header with wrapper functions to kbase structures and functions */
-#include "mali/mali_dd_gator_api.h"
+#include "mali/mali_kbase_gator_api.h"
 #elif defined(MALI_DIR_MIDGARD)
-/* New DDK Directory structure with kernel/drivers/gpu/arm/midgard*/
+/* New DDK Directory structure with kernel/drivers/gpu/arm/midgard */
 #include "mali_linux_trace.h"
 #include "mali_kbase.h"
 #include "mali_kbase_mem_linux.h"
 #else
-/* Old DDK Directory structure with kernel/drivers/gpu/arm/t6xx*/
+/* Old DDK Directory structure with kernel/drivers/gpu/arm/t6xx */
 #include "linux/mali_linux_trace.h"
 #include "kbase/src/common/mali_kbase.h"
 #include "kbase/src/linux/mali_kbase_mem_linux.h"
@@ -37,76 +37,63 @@
 #endif
 
 #if (MALI_DDK_GATOR_API_VERSION != 1) && (MALI_DDK_GATOR_API_VERSION != 2) && (MALI_DDK_GATOR_API_VERSION != 3)
-#error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3 DDK, or 3 for r? DDK).
+#error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3/r4 DDK, or 3 for r5 and later DDK).
 #endif
 
 #include "gator_events_mali_common.h"
 
 /*
- * Mali-T6xx
+ * Mali-Midgard
  */
 #if MALI_DDK_GATOR_API_VERSION == 3
-typedef uint32_t kbase_dd_instr_hwcnt_dump_irq_type(struct mali_dd_hwcnt_handles *);
-typedef uint32_t kbase_dd_instr_hwcnt_dump_complete_type(struct mali_dd_hwcnt_handles *, uint32_t *);
-typedef struct mali_dd_hwcnt_handles* mali_dd_hwcnt_init_type(struct mali_dd_hwcnt_info *);
-typedef void mali_dd_hwcnt_clear_type(struct mali_dd_hwcnt_info *, struct mali_dd_hwcnt_handles *);
-
-static kbase_dd_instr_hwcnt_dump_irq_type *kbase_dd_instr_hwcnt_dump_irq_symbol;
-static kbase_dd_instr_hwcnt_dump_complete_type *kbase_dd_instr_hwcnt_dump_complete_symbol;
-static mali_dd_hwcnt_init_type *mali_dd_hwcnt_init_symbol;
-static mali_dd_hwcnt_clear_type *mali_dd_hwcnt_clear_symbol;
+static uint32_t (*kbase_gator_instr_hwcnt_dump_irq_symbol)(struct kbase_gator_hwcnt_handles *);
+static uint32_t (*kbase_gator_instr_hwcnt_dump_complete_symbol)(struct kbase_gator_hwcnt_handles *, uint32_t *const);
+static struct kbase_gator_hwcnt_handles *(*kbase_gator_hwcnt_init_symbol)(struct kbase_gator_hwcnt_info *);
+static void (*kbase_gator_hwcnt_term_symbol)(struct kbase_gator_hwcnt_info *, struct kbase_gator_hwcnt_handles *);
 
 #else
-typedef struct kbase_device *kbase_find_device_type(int);
-typedef struct kbase_context *kbase_create_context_type(struct kbase_device *);
-typedef void kbase_destroy_context_type(struct kbase_context *);
+static struct kbase_device *(*kbase_find_device_symbol)(int);
+static struct kbase_context *(*kbase_create_context_symbol)(struct kbase_device *);
+static void (*kbase_destroy_context_symbol)(struct kbase_context *);
 
 #if MALI_DDK_GATOR_API_VERSION == 1
-typedef void *kbase_va_alloc_type(struct kbase_context *, u32);
-typedef void kbase_va_free_type(struct kbase_context *, void *);
+static void *(*kbase_va_alloc_symbol)(struct kbase_context *, u32);
+static void (*kbase_va_free_symbol)(struct kbase_context *, void *);
 #elif MALI_DDK_GATOR_API_VERSION == 2
-typedef void *kbase_va_alloc_type(struct kbase_context *, u32, kbase_hwc_dma_mapping * handle);
-typedef void kbase_va_free_type(struct kbase_context *, kbase_hwc_dma_mapping * handle);
+static void *(*kbase_va_alloc_symbol)(struct kbase_context *, u32, struct kbase_hwc_dma_mapping *);
+static void (*kbase_va_free_symbol)(struct kbase_context *, struct kbase_hwc_dma_mapping *);
 #endif
 
-typedef mali_error kbase_instr_hwcnt_enable_type(struct kbase_context *, struct kbase_uk_hwcnt_setup *);
-typedef mali_error kbase_instr_hwcnt_disable_type(struct kbase_context *);
-typedef mali_error kbase_instr_hwcnt_clear_type(struct kbase_context *);
-typedef mali_error kbase_instr_hwcnt_dump_irq_type(struct kbase_context *);
-typedef mali_bool kbase_instr_hwcnt_dump_complete_type(struct kbase_context *, mali_bool *);
+static mali_error (*kbase_instr_hwcnt_enable_symbol)(struct kbase_context *, struct kbase_uk_hwcnt_setup *);
+static mali_error (*kbase_instr_hwcnt_disable_symbol)(struct kbase_context *);
+static mali_error (*kbase_instr_hwcnt_clear_symbol)(struct kbase_context *);
+static mali_error (*kbase_instr_hwcnt_dump_irq_symbol)(struct kbase_context *);
+static mali_bool (*kbase_instr_hwcnt_dump_complete_symbol)(struct kbase_context *, mali_bool *);
 
-static kbase_find_device_type *kbase_find_device_symbol;
-static kbase_create_context_type *kbase_create_context_symbol;
-static kbase_va_alloc_type *kbase_va_alloc_symbol;
-static kbase_instr_hwcnt_enable_type *kbase_instr_hwcnt_enable_symbol;
-static kbase_instr_hwcnt_clear_type *kbase_instr_hwcnt_clear_symbol;
-static kbase_instr_hwcnt_dump_irq_type *kbase_instr_hwcnt_dump_irq_symbol;
-static kbase_instr_hwcnt_dump_complete_type *kbase_instr_hwcnt_dump_complete_symbol;
-static kbase_instr_hwcnt_disable_type *kbase_instr_hwcnt_disable_symbol;
-static kbase_va_free_type *kbase_va_free_symbol;
-static kbase_destroy_context_type *kbase_destroy_context_symbol;
+static long shader_present_low;
 #endif
 
-static long shader_present_low = 0;
-
 /** The interval between reads, in ns.
  *
- * Earlier we introduced
- * a 'hold off for 1ms after last read' to resolve MIDBASE-2178 and MALINE-724.
- * However, the 1ms hold off is too long if no context switches occur as there is a race
- * between this value and the tick of the read clock in gator which is also 1ms. If we 'miss' the
- * current read, the counter values are effectively 'spread' over 2ms and the values seen are half
- * what they should be (since Streamline averages over sample time). In the presence of context switches
- * this spread can vary and markedly affect the counters.  Currently there is no 'proper' solution to
- * this, but empirically we have found that reducing the minimum read interval to 950us causes the
- * counts to be much more stable.
+ * Earlier we introduced a 'hold off for 1ms after last read' to
+ * resolve MIDBASE-2178 and MALINE-724. However, the 1ms hold off is
+ * too long if no context switches occur as there is a race between
+ * this value and the tick of the read clock in gator which is also
+ * 1ms. If we 'miss' the current read, the counter values are
+ * effectively 'spread' over 2ms and the values seen are half what
+ * they should be (since Streamline averages over sample time). In the
+ * presence of context switches this spread can vary and markedly
+ * affect the counters. Currently there is no 'proper' solution to
+ * this, but empirically we have found that reducing the minimum read
+ * interval to 950us causes the counts to be much more stable.
  */
 static const int READ_INTERVAL_NSEC = 950000;
 
 #if GATOR_TEST
-#include "gator_events_mali_t6xx_hw_test.c"
+#include "gator_events_mali_midgard_hw_test.c"
 #endif
 
+#if MALI_DDK_GATOR_API_VERSION != 3
 /* Blocks for HW counters */
 enum {
 	JM_BLOCK = 0,
@@ -114,12 +101,12 @@ enum {
 	SHADER_BLOCK,
 	MMU_BLOCK
 };
+#endif
 
-static const char mali_name[] = "Mali-T6xx";
+static const char *mali_name;
 
-/* Counters for Mali-T6xx:
+/* Counters for Mali-Midgard:
  *
- *  - HW counters, 4 blocks
  *    For HW counters we need strings to create /dev/gator/events files.
  *    Enums are not needed because the position of the HW name in the array is the same
  *    of the corresponding value in the received block of memory.
@@ -128,6 +115,13 @@ static const char mali_name[] = "Mali-T6xx";
  */
 
 /* Hardware Counters */
+#if MALI_DDK_GATOR_API_VERSION == 3
+
+static const char *const *hardware_counter_names;
+static int number_of_hardware_counters;
+
+#else
+
 static const char *const hardware_counter_names[] = {
 	/* Job Manager */
 	"",
@@ -394,17 +388,19 @@ static const char *const hardware_counter_names[] = {
 	"L2_REPLAY_FULL"
 };
 
-#define NUMBER_OF_HARDWARE_COUNTERS (sizeof(hardware_counter_names) / sizeof(hardware_counter_names[0]))
+static const int number_of_hardware_counters = ARRAY_SIZE(hardware_counter_names);
+
+#endif
 
 #define GET_HW_BLOCK(c) (((c) >> 6) & 0x3)
 #define GET_COUNTER_OFFSET(c) ((c) & 0x3f)
 
 #if MALI_DDK_GATOR_API_VERSION == 3
 /* Opaque handles for kbase_context and kbase_hwc_dma_mapping */
-static struct mali_dd_hwcnt_handles *handles;
+static struct kbase_gator_hwcnt_handles *handles;
 
 /* Information about hardware counters */
-static struct mali_dd_hwcnt_info *in_out_info;
+static struct kbase_gator_hwcnt_info *in_out_info;
 
 #else
 /* Memory to dump hardware counters into */
@@ -412,56 +408,58 @@ static void *kernel_dump_buffer;
 
 #if MALI_DDK_GATOR_API_VERSION == 2
 /* DMA state used to manage lifetime of the buffer */
-kbase_hwc_dma_mapping kernel_dump_buffer_handle;
+struct kbase_hwc_dma_mapping kernel_dump_buffer_handle;
 #endif
 
 /* kbase context and device */
-static struct kbase_context *kbcontext = NULL;
-static struct kbase_device *kbdevice = NULL;
-#endif
-
-static volatile bool kbase_device_busy = false;
-static unsigned int num_hardware_counters_enabled;
+static struct kbase_context *kbcontext;
+static struct kbase_device *kbdevice;
 
 /*
- * gatorfs variables for counter enable state
+ * The following function has no external prototype in older DDK
+ * revisions. When the DDK is updated then this should be removed.
  */
-static mali_counter counters[NUMBER_OF_HARDWARE_COUNTERS];
+struct kbase_device *kbase_find_device(int minor);
+#endif
 
-/* An array used to return the data we recorded
- * as key,value pairs hence the *2
- */
-static unsigned long counter_dump[NUMBER_OF_HARDWARE_COUNTERS * 2];
+static volatile bool kbase_device_busy;
+static unsigned int num_hardware_counters_enabled;
 
-extern mali_counter mali_activity[3];
-static const char* const mali_activity_names[] = {
+/* gatorfs variables for counter enable state */
+static struct mali_counter *counters;
+
+/* An array used to return the data we recorded as key,value pairs */
+static int *counter_dump;
+
+extern struct mali_counter mali_activity[3];
+
+static const char *const mali_activity_names[] = {
 	"fragment",
 	"vertex",
 	"opencl",
 };
 
 #define SYMBOL_GET(FUNCTION, ERROR_COUNT) \
-	if(FUNCTION ## _symbol) \
-	{ \
-		printk("gator: mali " #FUNCTION " symbol was already registered\n"); \
-		(ERROR_COUNT)++; \
-	} \
-	else \
-	{ \
-		FUNCTION ## _symbol = symbol_get(FUNCTION); \
-		if(! FUNCTION ## _symbol) \
-		{ \
-			printk("gator: mali online " #FUNCTION " symbol not found\n"); \
+	do { \
+		if (FUNCTION ## _symbol) { \
+			pr_err("gator: mali " #FUNCTION " symbol was already registered\n"); \
 			(ERROR_COUNT)++; \
+		} else { \
+			FUNCTION ## _symbol = symbol_get(FUNCTION); \
+			if (!FUNCTION ## _symbol) { \
+				pr_err("gator: mali online " #FUNCTION " symbol not found\n"); \
+				(ERROR_COUNT)++; \
+			} \
 		} \
-	}
+	} while (0)
 
 #define SYMBOL_CLEANUP(FUNCTION) \
-	if(FUNCTION ## _symbol) \
-	{ \
-		symbol_put(FUNCTION); \
-		FUNCTION ## _symbol = NULL; \
-	}
+	do { \
+		if (FUNCTION ## _symbol) { \
+			symbol_put(FUNCTION); \
+			FUNCTION ## _symbol = NULL; \
+		} \
+	} while (0)
 
 /**
  * Execute symbol_get for all the Mali symbols and check for success.
@@ -471,10 +469,10 @@ static int init_symbols(void)
 {
 	int error_count = 0;
 #if MALI_DDK_GATOR_API_VERSION == 3
-	SYMBOL_GET(kbase_dd_instr_hwcnt_dump_irq, error_count);
-	SYMBOL_GET(kbase_dd_instr_hwcnt_dump_complete, error_count);
-	SYMBOL_GET(mali_dd_hwcnt_init, error_count);
-	SYMBOL_GET(mali_dd_hwcnt_clear, error_count);
+	SYMBOL_GET(kbase_gator_instr_hwcnt_dump_irq, error_count);
+	SYMBOL_GET(kbase_gator_instr_hwcnt_dump_complete, error_count);
+	SYMBOL_GET(kbase_gator_hwcnt_init, error_count);
+	SYMBOL_GET(kbase_gator_hwcnt_term, error_count);
 #else
 	SYMBOL_GET(kbase_find_device, error_count);
 	SYMBOL_GET(kbase_create_context, error_count);
@@ -497,10 +495,10 @@ static int init_symbols(void)
 static void clean_symbols(void)
 {
 #if MALI_DDK_GATOR_API_VERSION == 3
-	SYMBOL_CLEANUP(kbase_dd_instr_hwcnt_dump_irq);
-	SYMBOL_CLEANUP(kbase_dd_instr_hwcnt_dump_complete);
-	SYMBOL_CLEANUP(mali_dd_hwcnt_init);
-	SYMBOL_CLEANUP(mali_dd_hwcnt_clear);
+	SYMBOL_CLEANUP(kbase_gator_instr_hwcnt_dump_irq);
+	SYMBOL_CLEANUP(kbase_gator_instr_hwcnt_dump_complete);
+	SYMBOL_CLEANUP(kbase_gator_hwcnt_init);
+	SYMBOL_CLEANUP(kbase_gator_hwcnt_term);
 #else
 	SYMBOL_CLEANUP(kbase_find_device);
 	SYMBOL_CLEANUP(kbase_create_context);
@@ -526,14 +524,12 @@ static void clean_symbols(void)
 static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns)
 {
 	/* If the current ns count rolls over a second, roll the next read time too. */
-	if (current_time->tv_sec != *prev_time_s) {
+	if (current_time->tv_sec != *prev_time_s)
 		*next_read_time_ns = *next_read_time_ns - NSEC_PER_SEC;
-	}
 
 	/* Abort the read if the next read time has not arrived. */
-	if (current_time->tv_nsec < *next_read_time_ns) {
+	if (current_time->tv_nsec < *next_read_time_ns)
 		return 0;
-	}
 
 	/* Set the next read some fixed time after this one, and update the read timestamp. */
 	*next_read_time_ns = current_time->tv_nsec + READ_INTERVAL_NSEC;
@@ -544,7 +540,7 @@ static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time
 
 static int start(void)
 {
-#if MALI_DDK_GATOR_API_VERSION < 3
+#if MALI_DDK_GATOR_API_VERSION != 3
 	struct kbase_uk_hwcnt_setup setup;
 	unsigned long long shadersPresent = 0;
 	u16 bitmask[] = { 0, 0, 0, 0 };
@@ -552,37 +548,27 @@ static int start(void)
 #endif
 	int cnt;
 
+#if MALI_DDK_GATOR_API_VERSION == 3
 	/* Setup HW counters */
 	num_hardware_counters_enabled = 0;
 
-	if (NUMBER_OF_HARDWARE_COUNTERS != 256) {
-		pr_debug("Unexpected number of hardware counters defined: expecting 256, got %d\n", NUMBER_OF_HARDWARE_COUNTERS);
-	}
-
-#if MALI_DDK_GATOR_API_VERSION == 3
-	/* Declare and initialise mali_dd_hwcnt_info structure */
-	in_out_info = kmalloc(sizeof(struct mali_dd_hwcnt_info), GFP_KERNEL);
-	for (cnt = 0; cnt < 4; cnt++){
+	/* Declare and initialise kbase_gator_hwcnt_info structure */
+	in_out_info = kmalloc(sizeof(*in_out_info), GFP_KERNEL);
+	for (cnt = 0; cnt < ARRAY_SIZE(in_out_info->bitmask); cnt++)
 		in_out_info->bitmask[cnt] = 0;
-	}
-#endif
+
 	/* Calculate enable bitmasks based on counters_enabled array */
-	for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
-		const mali_counter *counter = &counters[cnt];
-		if (counter->enabled) {
+	for (cnt = 0; cnt < number_of_hardware_counters; cnt++) {
+		if (counters[cnt].enabled) {
 			int block = GET_HW_BLOCK(cnt);
 			int enable_bit = GET_COUNTER_OFFSET(cnt) / 4;
-#if MALI_DDK_GATOR_API_VERSION == 3
+
 			in_out_info->bitmask[block] |= (1 << enable_bit);
-#else
-			bitmask[block] |= (1 << enable_bit);
-#endif
-			pr_debug("gator: Mali-T6xx: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt);
+			pr_debug("gator: Mali-Midgard: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt);
 			num_hardware_counters_enabled++;
 		}
 	}
 
-#if MALI_DDK_GATOR_API_VERSION == 3
 	/* Create a kbase context for HW counters */
 	if (num_hardware_counters_enabled > 0) {
 		if (init_symbols() > 0) {
@@ -591,20 +577,33 @@ static int start(void)
 			return 0;
 		}
 
-		handles = mali_dd_hwcnt_init_symbol(in_out_info);
+		handles = kbase_gator_hwcnt_init_symbol(in_out_info);
 
-		if(handles == NULL) {
+		if (handles == NULL)
 			goto out;
-		}
-
-		/* See if we can get the number of shader cores */
-		shader_present_low = (unsigned long)in_out_info->shader_present_bitmap;
 
 		kbase_device_busy = false;
 	}
 
 	return 0;
 #else
+	/* Setup HW counters */
+	num_hardware_counters_enabled = 0;
+
+	/* Calculate enable bitmasks based on counters_enabled array */
+	for (cnt = 0; cnt < number_of_hardware_counters; cnt++) {
+		const struct mali_counter *counter = &counters[cnt];
+
+		if (counter->enabled) {
+			int block = GET_HW_BLOCK(cnt);
+			int enable_bit = GET_COUNTER_OFFSET(cnt) / 4;
+
+			bitmask[block] |= (1 << enable_bit);
+			pr_debug("gator: Mali-Midgard: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt);
+			num_hardware_counters_enabled++;
+		}
+	}
+
 	/* Create a kbase context for HW counters */
 	if (num_hardware_counters_enabled > 0) {
 		if (init_symbols() > 0) {
@@ -617,18 +616,17 @@ static int start(void)
 
 		/* If we already got a context, fail */
 		if (kbcontext) {
-			pr_debug("gator: Mali-T6xx: error context already present\n");
+			pr_debug("gator: Mali-Midgard: error context already present\n");
 			goto out;
 		}
 
 		/* kbcontext will only be valid after all the Mali symbols are loaded successfully */
 		kbcontext = kbase_create_context_symbol(kbdevice);
 		if (!kbcontext) {
-			pr_debug("gator: Mali-T6xx: error creating kbase context\n");
+			pr_debug("gator: Mali-Midgard: error creating kbase context\n");
 			goto out;
 		}
 
-
 		/* See if we can get the number of shader cores */
 		shadersPresent = kbdevice->shader_present_bitmap;
 		shader_present_low = (unsigned long)shadersPresent;
@@ -639,8 +637,8 @@ static int start(void)
 		 *             * number of blocks (always 8 for midgard)
 		 *             * number of counters per block (always 64 for midgard)
 		 *             * number of bytes per counter (always 4 in midgard)
-		 * For a Mali-T6xx with a single core group = 1 * 8 * 64 * 4 = 2048
-		 * For a Mali-T6xx with a dual core group   = 2 * 8 * 64 * 4 = 4096
+		 * For a Mali-Midgard with a single core group = 1 * 8 * 64 * 4 = 2048
+		 * For a Mali-Midgard with a dual core group   = 2 * 8 * 64 * 4 = 4096
 		 */
 #if MALI_DDK_GATOR_API_VERSION == 1
 		kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096);
@@ -648,7 +646,7 @@ static int start(void)
 		kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096, &kernel_dump_buffer_handle);
 #endif
 		if (!kernel_dump_buffer) {
-			pr_debug("gator: Mali-T6xx: error trying to allocate va\n");
+			pr_debug("gator: Mali-Midgard: error trying to allocate va\n");
 			goto destroy_context;
 		}
 
@@ -663,12 +661,12 @@ static int start(void)
 		/* Use kbase API to enable hardware counters and provide dump buffer */
 		err = kbase_instr_hwcnt_enable_symbol(kbcontext, &setup);
 		if (err != MALI_ERROR_NONE) {
-			pr_debug("gator: Mali-T6xx: can't setup hardware counters\n");
+			pr_debug("gator: Mali-Midgard: can't setup hardware counters\n");
 			goto free_buffer;
 		}
-		pr_debug("gator: Mali-T6xx: hardware counters enabled\n");
+		pr_debug("gator: Mali-Midgard: hardware counters enabled\n");
 		kbase_instr_hwcnt_clear_symbol(kbcontext);
-		pr_debug("gator: Mali-T6xx: hardware counters cleared \n");
+		pr_debug("gator: Mali-Midgard: hardware counters cleared\n");
 
 		kbase_device_busy = false;
 	}
@@ -695,17 +693,16 @@ static void stop(void)
 {
 	unsigned int cnt;
 #if MALI_DDK_GATOR_API_VERSION == 3
-	struct mali_dd_hwcnt_handles *temp_hand;
+	struct kbase_gator_hwcnt_handles *temp_hand;
 #else
 	struct kbase_context *temp_kbcontext;
 #endif
 
-	pr_debug("gator: Mali-T6xx: stop\n");
+	pr_debug("gator: Mali-Midgard: stop\n");
 
 	/* Set all counters as disabled */
-	for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
+	for (cnt = 0; cnt < number_of_hardware_counters; cnt++)
 		counters[cnt].enabled = 0;
-	}
 
 	/* Destroy the context for HW counters */
 #if MALI_DDK_GATOR_API_VERSION == 3
@@ -717,7 +714,7 @@ static void stop(void)
 		temp_hand = handles;
 		handles = NULL;
 
-		mali_dd_hwcnt_clear_symbol(in_out_info, temp_hand);
+		kbase_gator_hwcnt_term_symbol(in_out_info, temp_hand);
 
 		kfree(in_out_info);
 
@@ -741,60 +738,117 @@ static void stop(void)
 		kbase_destroy_context_symbol(temp_kbcontext);
 #endif
 
-		pr_debug("gator: Mali-T6xx: hardware counters stopped\n");
+		pr_debug("gator: Mali-Midgard: hardware counters stopped\n");
 
 		clean_symbols();
 	}
 }
 
-static int read(int **buffer)
+static int read_counter(const int cnt, const int len, const struct mali_counter *counter)
+{
+	const int block = GET_HW_BLOCK(cnt);
+	const int counter_offset = GET_COUNTER_OFFSET(cnt);
+
+#if MALI_DDK_GATOR_API_VERSION == 3
+	const char *block_base_address = (char *)in_out_info->kernel_dump_buffer;
+	int i;
+	int shader_core_count = 0;
+	u32 value = 0;
+
+	for (i = 0; i < in_out_info->nr_hwc_blocks; i++) {
+		if (block == in_out_info->hwc_layout[i]) {
+			value += *((u32 *)(block_base_address + (0x100 * i)) + counter_offset);
+			if (block == SHADER_BLOCK)
+				++shader_core_count;
+		}
+	}
+
+	if (shader_core_count > 1)
+		value /= shader_core_count;
+#else
+	const char *block_base_address = (char *)kernel_dump_buffer + vithar_blocks[block];
+
+	/* If counter belongs to shader block need to take into account all cores */
+	if (block == SHADER_BLOCK) {
+		int i = 0;
+		int shader_core_count = 0;
+
+		value = 0;
+
+		for (i = 0; i < 4; i++) {
+			if ((shader_present_low >> i) & 1) {
+				value += *((u32 *)(block_base_address + (0x100 * i)) + counter_offset);
+				shader_core_count++;
+			}
+		}
+
+		for (i = 0; i < 4; i++) {
+			if ((shader_present_low >> (i+4)) & 1) {
+				value += *((u32 *)(block_base_address + (0x100 * i) + 0x800) + counter_offset);
+				shader_core_count++;
+			}
+		}
+
+		/* Need to total by number of cores to produce an average */
+		if (shader_core_count != 0)
+			value /= shader_core_count;
+	} else {
+		value = *((u32 *)block_base_address + counter_offset);
+	}
+#endif
+
+	counter_dump[len + 0] = counter->key;
+	counter_dump[len + 1] = value;
+
+	return 2;
+}
+
+static int read(int **buffer, bool sched_switch)
 {
 	int cnt;
 	int len = 0;
-	u32 value = 0;
 	uint32_t success;
 
 	struct timespec current_time;
-	static u32 prev_time_s = 0;
-	static s32 next_read_time_ns = 0;
+	static u32 prev_time_s;
+	static s32 next_read_time_ns;
 
-	if (!on_primary_core()) {
+	if (!on_primary_core() || sched_switch)
 		return 0;
-	}
 
 	getnstimeofday(&current_time);
 
 	/*
-	 * Discard reads unless a respectable time has passed.  This reduces the load on the GPU without sacrificing
-	 * accuracy on the Streamline display.
+	 * Discard reads unless a respectable time has passed. This
+	 * reduces the load on the GPU without sacrificing accuracy on
+	 * the Streamline display.
 	 */
-	if (!is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns)) {
+	if (!is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns))
 		return 0;
-	}
 
 	/*
 	 * Report the HW counters
 	 * Only process hardware counters if at least one of the hardware counters is enabled.
 	 */
 	if (num_hardware_counters_enabled > 0) {
+#if MALI_DDK_GATOR_API_VERSION != 3
 		const unsigned int vithar_blocks[] = {
 			0x700,	/* VITHAR_JOB_MANAGER,     Block 0 */
 			0x400,	/* VITHAR_TILER,           Block 1 */
 			0x000,	/* VITHAR_SHADER_CORE,     Block 2 */
 			0x500	/* VITHAR_MEMORY_SYSTEM,   Block 3 */
 		};
+#endif
 
 #if MALI_DDK_GATOR_API_VERSION == 3
-		if (!handles) {
+		if (!handles)
 			return -1;
-		}
 
 		/* Mali symbols can be called safely since a kbcontext is valid */
-		if (kbase_dd_instr_hwcnt_dump_complete_symbol(handles, &success) == MALI_TRUE) {
+		if (kbase_gator_instr_hwcnt_dump_complete_symbol(handles, &success) == MALI_TRUE) {
 #else
-		if (!kbcontext) {
+		if (!kbcontext)
 			return -1;
-		}
 
 		/* Mali symbols can be called safely since a kbcontext is valid */
 		if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success) == MALI_TRUE) {
@@ -803,49 +857,11 @@ static int read(int **buffer)
 
 			if (success == MALI_TRUE) {
 				/* Cycle through hardware counters and accumulate totals */
-				for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
-					const mali_counter *counter = &counters[cnt];
-					if (counter->enabled) {
-						const int block = GET_HW_BLOCK(cnt);
-						const int counter_offset = GET_COUNTER_OFFSET(cnt);
+				for (cnt = 0; cnt < number_of_hardware_counters; cnt++) {
+					const struct mali_counter *counter = &counters[cnt];
 
-#if MALI_DDK_GATOR_API_VERSION == 3
-						const char* block_base_address = (char*)in_out_info->kernel_dump_buffer + vithar_blocks[block];
-#else
-						const char* block_base_address = (char*)kernel_dump_buffer + vithar_blocks[block];
-#endif
-
-						/* If counter belongs to shader block need to take into account all cores */
-						if (block == SHADER_BLOCK) {
-							int i = 0;
-							int shader_core_count = 0;
-							value = 0;
-
-							for (i = 0; i < 4; i++) {
-								if ((shader_present_low >> i) & 1) {
-									value += *((u32*) (block_base_address + (0x100 * i)) + counter_offset);
-									shader_core_count++;
-								}
-							}
-
-							for (i = 0; i < 4; i++) {
-								if((shader_present_low >> (i+4)) & 1) {
-									value += *((u32*)(block_base_address + (0x100 * i) + 0x800) + counter_offset);
-									shader_core_count++;
-								}
-							}
-
-							/* Need to total by number of cores to produce an average */
-							if (shader_core_count != 0) {
-								value /= shader_core_count;
-							}
-						} else {
-							value = *((u32*)block_base_address + counter_offset);
-						}
-
-						counter_dump[len++] = counter->key;
-						counter_dump[len++] = value;
-					}
+					if (counter->enabled)
+						len += read_counter(cnt, len, counter);
 				}
 			}
 		}
@@ -853,7 +869,7 @@ static int read(int **buffer)
 		if (!kbase_device_busy) {
 			kbase_device_busy = true;
 #if MALI_DDK_GATOR_API_VERSION == 3
-			kbase_dd_instr_hwcnt_dump_irq_symbol(handles);
+			kbase_gator_instr_hwcnt_dump_irq_symbol(handles);
 #else
 			kbase_instr_hwcnt_dump_irq_symbol(kbcontext);
 #endif
@@ -861,9 +877,8 @@ static int read(int **buffer)
 	}
 
 	/* Update the buffer */
-	if (buffer) {
-		*buffer = (int *)counter_dump;
-	}
+	if (buffer)
+		*buffer = counter_dump;
 
 	return len;
 }
@@ -874,40 +889,89 @@ static int create_files(struct super_block *sb, struct dentry *root)
 	/*
 	 * Create the filesystem for all events
 	 */
-	int counter_index = 0;
-
 	for (event = 0; event < ARRAY_SIZE(mali_activity); event++) {
-		if (gator_mali_create_file_system(mali_name, mali_activity_names[event], sb, root, &mali_activity[event], NULL) != 0) {
+		if (gator_mali_create_file_system("Midgard", mali_activity_names[event], sb, root, &mali_activity[event], NULL) != 0)
 			return -1;
-		}
 	}
 
-	for (event = 0; event < NUMBER_OF_HARDWARE_COUNTERS; event++) {
-		if (gator_mali_create_file_system(mali_name, hardware_counter_names[counter_index], sb, root, &counters[event], NULL) != 0)
+	for (event = 0; event < number_of_hardware_counters; event++) {
+		if (gator_mali_create_file_system(mali_name, hardware_counter_names[event], sb, root, &counters[event], NULL) != 0)
 			return -1;
-		counter_index++;
 	}
 
 	return 0;
 }
 
-static struct gator_interface gator_events_mali_t6xx_interface = {
+static void shutdown(void)
+{
+#if MALI_DDK_GATOR_API_VERSION == 3
+	void (*kbase_gator_hwcnt_term_names_symbol)(void) = NULL;
+	int error_count = 0;
+#endif
+
+	kfree(counters);
+	kfree(counter_dump);
+
+#if MALI_DDK_GATOR_API_VERSION == 3
+	SYMBOL_GET(kbase_gator_hwcnt_term_names, error_count);
+
+	number_of_hardware_counters = -1;
+	hardware_counter_names = NULL;
+	if (kbase_gator_hwcnt_term_names_symbol != NULL) {
+		kbase_gator_hwcnt_term_names_symbol();
+		pr_err("Released symbols\n");
+	}
+
+	SYMBOL_CLEANUP(kbase_gator_hwcnt_term_names);
+#endif
+}
+
+static struct gator_interface gator_events_mali_midgard_interface = {
+	.shutdown = shutdown,
 	.create_files = create_files,
 	.start = start,
 	.stop = stop,
 	.read = read
 };
 
-int gator_events_mali_t6xx_hw_init(void)
+int gator_events_mali_midgard_hw_init(void)
 {
-	pr_debug("gator: Mali-T6xx: sw_counters init\n");
+#if MALI_DDK_GATOR_API_VERSION == 3
+	const char *const *(*kbase_gator_hwcnt_init_names_symbol)(uint32_t *) = NULL;
+	int error_count = 0;
+#endif
+
+	pr_debug("gator: Mali-Midgard: sw_counters init\n");
 
 #if GATOR_TEST
 	test_all_is_read_scheduled();
 #endif
 
-	gator_mali_initialise_counters(mali_activity, ARRAY_SIZE(mali_activity));
-	gator_mali_initialise_counters(counters, NUMBER_OF_HARDWARE_COUNTERS);
+#if MALI_DDK_GATOR_API_VERSION == 3
+	SYMBOL_GET(kbase_gator_hwcnt_init_names, error_count);
+	if (error_count > 0) {
+		SYMBOL_CLEANUP(kbase_gator_hwcnt_init_names);
+		return 1;
+	}
 
-	return gator_events_install(&gator_events_mali_t6xx_interface);
+	number_of_hardware_counters = -1;
+	hardware_counter_names = kbase_gator_hwcnt_init_names_symbol(&number_of_hardware_counters);
+
+	SYMBOL_CLEANUP(kbase_gator_hwcnt_init_names);
+
+	if ((hardware_counter_names == NULL) || (number_of_hardware_counters <= 0)) {
+		pr_err("gator: Error reading hardware counters names: got %d names\n", number_of_hardware_counters);
+		return -1;
+	}
+#else
+	mali_name = "Midgard";
+#endif
+
+	counters = kmalloc(sizeof(*counters)*number_of_hardware_counters, GFP_KERNEL);
+	counter_dump = kmalloc(sizeof(*counter_dump)*number_of_hardware_counters*2, GFP_KERNEL);
+
+	gator_mali_initialise_counters(mali_activity, ARRAY_SIZE(mali_activity));
+	gator_mali_initialise_counters(counters, number_of_hardware_counters);
+
+	return gator_events_install(&gator_events_mali_midgard_interface);
 }
diff --git a/drivers/gator/gator_events_mali_t6xx_hw_test.c b/drivers/gator/gator_events_mali_midgard_hw_test.c
similarity index 88%
rename from drivers/gator/gator_events_mali_t6xx_hw_test.c
rename to drivers/gator/gator_events_mali_midgard_hw_test.c
index ba6553f3540f..31a91e1c72b2 100644
--- a/drivers/gator/gator_events_mali_t6xx_hw_test.c
+++ b/drivers/gator/gator_events_mali_midgard_hw_test.c
@@ -23,12 +23,12 @@ static int test_is_read_scheduled(u32 s, u32 ns, u32 prev_s, s32 next_ns, int ex
 	current_time.tv_nsec = ns;
 
 	if (is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns) != expected_result) {
-		printk("Failed do_read(%u, %u, %u, %d): expected %d\n", s, ns, prev_s, next_ns, expected_result);
+		pr_err("Failed do_read(%u, %u, %u, %d): expected %d\n", s, ns, prev_s, next_ns, expected_result);
 		return 0;
 	}
 
 	if (next_read_time_ns != expected_next_ns) {
-		printk("Failed: next_read_ns expected=%d, actual=%d\n", expected_next_ns, next_read_time_ns);
+		pr_err("Failed: next_read_ns expected=%d, actual=%d\n", expected_next_ns, next_read_time_ns);
 		return 0;
 	}
 
@@ -40,7 +40,7 @@ static void test_all_is_read_scheduled(void)
 	const int HIGHEST_NS = 999999999;
 	int n_tests_passed = 0;
 
-	printk("gator: running tests on %s\n", __FILE__);
+	pr_err("gator: running tests on %s\n", __FILE__);
 
 	n_tests_passed += test_is_read_scheduled(0, 0, 0, 0, 1, READ_INTERVAL_NSEC);	/* Null time */
 	n_tests_passed += test_is_read_scheduled(100, 1000, 0, 0, 1, READ_INTERVAL_NSEC + 1000);	/* Initial values */
@@ -51,5 +51,5 @@ static void test_all_is_read_scheduled(void)
 
 	n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500, 1, 600 + READ_INTERVAL_NSEC);
 
-	printk("gator: %d tests passed\n", n_tests_passed);
+	pr_err("gator: %d tests passed\n", n_tests_passed);
 }
diff --git a/drivers/gator/gator_events_meminfo.c b/drivers/gator/gator_events_meminfo.c
index c633dfdce306..c625ac5af9cd 100644
--- a/drivers/gator/gator_events_meminfo.c
+++ b/drivers/gator/gator_events_meminfo.c
@@ -16,6 +16,8 @@
 #include <linux/workqueue.h>
 #include <trace/events/kmem.h>
 
+#define USE_THREAD defined(CONFIG_PREEMPT_RT_FULL)
+
 enum {
 	MEMINFO_MEMFREE,
 	MEMINFO_MEMUSED,
@@ -48,7 +50,7 @@ static bool meminfo_global_enabled;
 static ulong meminfo_enabled[MEMINFO_TOTAL];
 static ulong meminfo_keys[MEMINFO_TOTAL];
 static long long meminfo_buffer[2 * (MEMINFO_TOTAL + 2)];
-static int meminfo_length = 0;
+static int meminfo_length;
 static bool new_data_avail;
 
 static bool proc_global_enabled;
@@ -56,22 +58,44 @@ static ulong proc_enabled[PROC_COUNT];
 static ulong proc_keys[PROC_COUNT];
 static DEFINE_PER_CPU(long long, proc_buffer[2 * (PROC_COUNT + 3)]);
 
+#if USE_THREAD
+
 static int gator_meminfo_func(void *data);
 static bool gator_meminfo_run;
-// Initialize semaphore unlocked to initialize memory values
+/* Initialize semaphore unlocked to initialize memory values */
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
 static DECLARE_MUTEX(gator_meminfo_sem);
 #else
 static DEFINE_SEMAPHORE(gator_meminfo_sem);
 #endif
 
+static void notify(void)
+{
+	up(&gator_meminfo_sem);
+}
+
+#else
+
+static unsigned int mem_event;
+static void wq_sched_handler(struct work_struct *wsptr);
+DECLARE_WORK(work, wq_sched_handler);
+static struct timer_list meminfo_wake_up_timer;
+static void meminfo_wake_up_handler(unsigned long unused_data);
+
+static void notify(void)
+{
+	mem_event++;
+}
+
+#endif
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
 GATOR_DEFINE_PROBE(mm_page_free_direct, TP_PROTO(struct page *page, unsigned int order))
 #else
 GATOR_DEFINE_PROBE(mm_page_free, TP_PROTO(struct page *page, unsigned int order))
 #endif
 {
-	up(&gator_meminfo_sem);
+	notify();
 }
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
@@ -80,12 +104,12 @@ GATOR_DEFINE_PROBE(mm_pagevec_free, TP_PROTO(struct page *page, int cold))
 GATOR_DEFINE_PROBE(mm_page_free_batched, TP_PROTO(struct page *page, int cold))
 #endif
 {
-	up(&gator_meminfo_sem);
+	notify();
 }
 
 GATOR_DEFINE_PROBE(mm_page_alloc, TP_PROTO(struct page *page, unsigned int order, gfp_t gfp_flags, int migratetype))
 {
-	up(&gator_meminfo_sem);
+	notify();
 }
 
 static int gator_events_meminfo_create_files(struct super_block *sb, struct dentry *root)
@@ -95,18 +119,16 @@ static int gator_events_meminfo_create_files(struct super_block *sb, struct dent
 
 	for (i = 0; i < MEMINFO_TOTAL; i++) {
 		dir = gatorfs_mkdir(sb, root, meminfo_names[i]);
-		if (!dir) {
+		if (!dir)
 			return -1;
-		}
 		gatorfs_create_ulong(sb, dir, "enabled", &meminfo_enabled[i]);
 		gatorfs_create_ro_ulong(sb, dir, "key", &meminfo_keys[i]);
 	}
 
 	for (i = 0; i < PROC_COUNT; ++i) {
 		dir = gatorfs_mkdir(sb, root, proc_names[i]);
-		if (!dir) {
+		if (!dir)
 			return -1;
-		}
 		gatorfs_create_ulong(sb, dir, "enabled", &proc_enabled[i]);
 		gatorfs_create_ro_ulong(sb, dir, "key", &proc_keys[i]);
 	}
@@ -134,9 +156,8 @@ static int gator_events_meminfo_start(void)
 			break;
 		}
 	}
-	if (meminfo_enabled[MEMINFO_MEMUSED]) {
+	if (meminfo_enabled[MEMINFO_MEMUSED])
 		proc_global_enabled = 1;
-	}
 
 	if (meminfo_global_enabled == 0)
 		return 0;
@@ -156,16 +177,22 @@ static int gator_events_meminfo_start(void)
 	if (GATOR_REGISTER_TRACE(mm_page_alloc))
 		goto mm_page_alloc_exit;
 
-	// Start worker thread
+#if USE_THREAD
+	/* Start worker thread */
 	gator_meminfo_run = true;
-	// Since the mutex starts unlocked, memory values will be initialized
+	/* Since the mutex starts unlocked, memory values will be initialized */
 	if (IS_ERR(kthread_run(gator_meminfo_func, NULL, "gator_meminfo")))
 		goto kthread_run_exit;
+#else
+	setup_timer(&meminfo_wake_up_timer, meminfo_wake_up_handler, 0);
+#endif
 
 	return 0;
 
+#if USE_THREAD
 kthread_run_exit:
 	GATOR_UNREGISTER_TRACE(mm_page_alloc);
+#endif
 mm_page_alloc_exit:
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
 	GATOR_UNREGISTER_TRACE(mm_pagevec_free);
@@ -194,75 +221,111 @@ static void gator_events_meminfo_stop(void)
 #endif
 		GATOR_UNREGISTER_TRACE(mm_page_alloc);
 
-		// Stop worker thread
+#if USE_THREAD
+		/* Stop worker thread */
 		gator_meminfo_run = false;
 		up(&gator_meminfo_sem);
+#else
+		del_timer_sync(&meminfo_wake_up_timer);
+#endif
 	}
 }
 
-// Must be run in process context as the kernel function si_meminfo() can sleep
-static int gator_meminfo_func(void *data)
+static void do_read(void)
 {
 	struct sysinfo info;
 	int i, len;
 	unsigned long long value;
 
-	for (;;) {
-		if (down_killable(&gator_meminfo_sem)) {
-			break;
-		}
+	meminfo_length = len = 0;
 
-		// Eat up any pending events
-		while (!down_trylock(&gator_meminfo_sem));
-
-		if (!gator_meminfo_run) {
-			break;
-		}
-
-		meminfo_length = len = 0;
-
-		si_meminfo(&info);
-		for (i = 0; i < MEMINFO_TOTAL; i++) {
-			if (meminfo_enabled[i]) {
-				switch (i) {
-				case MEMINFO_MEMFREE:
-					value = info.freeram * PAGE_SIZE;
-					break;
-				case MEMINFO_MEMUSED:
-					// pid -1 means system wide
-					meminfo_buffer[len++] = 1;
-					meminfo_buffer[len++] = -1;
-					// Emit value
-					meminfo_buffer[len++] = meminfo_keys[MEMINFO_MEMUSED];
-					meminfo_buffer[len++] = (info.totalram - info.freeram) * PAGE_SIZE;
-					// Clear pid
-					meminfo_buffer[len++] = 1;
-					meminfo_buffer[len++] = 0;
-					continue;
-				case MEMINFO_BUFFERRAM:
-					value = info.bufferram * PAGE_SIZE;
-					break;
-				default:
-					value = 0;
-					break;
-				}
-				meminfo_buffer[len++] = meminfo_keys[i];
-				meminfo_buffer[len++] = value;
+	si_meminfo(&info);
+	for (i = 0; i < MEMINFO_TOTAL; i++) {
+		if (meminfo_enabled[i]) {
+			switch (i) {
+			case MEMINFO_MEMFREE:
+				value = info.freeram * PAGE_SIZE;
+				break;
+			case MEMINFO_MEMUSED:
+				/* pid -1 means system wide */
+				meminfo_buffer[len++] = 1;
+				meminfo_buffer[len++] = -1;
+				/* Emit value */
+				meminfo_buffer[len++] = meminfo_keys[MEMINFO_MEMUSED];
+				meminfo_buffer[len++] = (info.totalram - info.freeram) * PAGE_SIZE;
+				/* Clear pid */
+				meminfo_buffer[len++] = 1;
+				meminfo_buffer[len++] = 0;
+				continue;
+			case MEMINFO_BUFFERRAM:
+				value = info.bufferram * PAGE_SIZE;
+				break;
+			default:
+				value = 0;
+				break;
 			}
+			meminfo_buffer[len++] = meminfo_keys[i];
+			meminfo_buffer[len++] = value;
 		}
+	}
 
-		meminfo_length = len;
-		new_data_avail = true;
+	meminfo_length = len;
+	new_data_avail = true;
+}
+
+#if USE_THREAD
+
+static int gator_meminfo_func(void *data)
+{
+	for (;;) {
+		if (down_killable(&gator_meminfo_sem))
+			break;
+
+		/* Eat up any pending events */
+		while (!down_trylock(&gator_meminfo_sem))
+			;
+
+		if (!gator_meminfo_run)
+			break;
+
+		do_read();
 	}
 
 	return 0;
 }
 
+#else
+
+/* Must be run in process context as the kernel function si_meminfo() can sleep */
+static void wq_sched_handler(struct work_struct *wsptr)
+{
+	do_read();
+}
+
+static void meminfo_wake_up_handler(unsigned long unused_data)
+{
+	/* had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater */
+	schedule_work(&work);
+}
+
+#endif
+
 static int gator_events_meminfo_read(long long **buffer)
 {
+#if !USE_THREAD
+	static unsigned int last_mem_event;
+#endif
+
 	if (!on_primary_core() || !meminfo_global_enabled)
 		return 0;
 
+#if !USE_THREAD
+	if (last_mem_event != mem_event) {
+		last_mem_event = mem_event;
+		mod_timer(&meminfo_wake_up_timer, jiffies + 1);
+	}
+#endif
+
 	if (!new_data_avail)
 		return 0;
 
@@ -280,6 +343,7 @@ static inline unsigned long gator_get_mm_counter(struct mm_struct *mm, int membe
 {
 #ifdef SPLIT_RSS_COUNTING
 	long val = atomic_long_read(&mm->rss_stat.count[member]);
+
 	if (val < 0)
 		val = 0;
 	return (unsigned long)val;
@@ -306,22 +370,19 @@ static int gator_events_meminfo_read_proc(long long **buffer, struct task_struct
 	int cpu = get_physical_cpu();
 	long long *buf = per_cpu(proc_buffer, cpu);
 
-	if (!proc_global_enabled) {
+	if (!proc_global_enabled)
 		return 0;
-	}
 
-	// Collect the memory stats of the process instead of the thread
-	if (task->group_leader != NULL) {
+	/* Collect the memory stats of the process instead of the thread */
+	if (task->group_leader != NULL)
 		task = task->group_leader;
-	}
 
-	// get_task_mm/mmput is not needed in this context because the task and it's mm are required as part of the sched_switch
+	/* get_task_mm/mmput is not needed in this context because the task and it's mm are required as part of the sched_switch */
 	mm = task->mm;
-	if (mm == NULL) {
+	if (mm == NULL)
 		return 0;
-	}
 
-	// Derived from task_statm in fs/proc/task_mmu.c
+	/* Derived from task_statm in fs/proc/task_mmu.c */
 	if (meminfo_enabled[MEMINFO_MEMUSED] || proc_enabled[PROC_SHARE]) {
 		share = get_mm_counter(mm,
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)
@@ -332,7 +393,7 @@ static int gator_events_meminfo_read_proc(long long **buffer, struct task_struct
 							   );
 	}
 
-	// key of 1 indicates a pid
+	/* key of 1 indicates a pid */
 	buf[len++] = 1;
 	buf[len++] = task->pid;
 
@@ -366,12 +427,12 @@ static int gator_events_meminfo_read_proc(long long **buffer, struct task_struct
 									   MM_ANONPAGES
 #endif
 									   );
-		// Send resident for this pid
+		/* Send resident for this pid */
 		buf[len++] = meminfo_keys[MEMINFO_MEMUSED];
 		buf[len++] = value * PAGE_SIZE;
 	}
 
-	// Clear pid
+	/* Clear pid */
 	buf[len++] = 1;
 	buf[len++] = 0;
 
diff --git a/drivers/gator/gator_events_mmapped.c b/drivers/gator/gator_events_mmapped.c
index 5bc01c42c3a2..6b2af995ed41 100644
--- a/drivers/gator/gator_events_mmapped.c
+++ b/drivers/gator/gator_events_mmapped.c
@@ -103,7 +103,7 @@ static int mmapped_simulate(int counter, int delta_in_us)
 	switch (counter) {
 	case 0:		/* sort-of-sine */
 		{
-			static int t = 0;
+			static int t;
 			int x;
 
 			t += delta_in_us;
@@ -140,7 +140,7 @@ static int mmapped_simulate(int counter, int delta_in_us)
 		break;
 	case 2:		/* PWM signal */
 		{
-			static int dc, x, t = 0;
+			static int dc, x, t;
 
 			t += delta_in_us;
 			if (t > 1000000)
@@ -157,7 +157,7 @@ static int mmapped_simulate(int counter, int delta_in_us)
 	return result;
 }
 
-static int gator_events_mmapped_read(int **buffer)
+static int gator_events_mmapped_read(int **buffer, bool sched_switch)
 {
 	int i;
 	int len = 0;
diff --git a/drivers/gator/gator_events_net.c b/drivers/gator/gator_events_net.c
index 11c10e375511..d21b4db7b77c 100644
--- a/drivers/gator/gator_events_net.c
+++ b/drivers/gator/gator_events_net.c
@@ -25,7 +25,7 @@ static int netGet[TOTALNET * 4];
 
 static struct timer_list net_wake_up_timer;
 
-// Must be run in process context as the kernel function dev_get_stats() can sleep
+/* Must be run in process context as the kernel function dev_get_stats() can sleep */
 static void get_network_stats(struct work_struct *wsptr)
 {
 	int rx = 0, tx = 0;
@@ -49,7 +49,7 @@ DECLARE_WORK(wq_get_stats, get_network_stats);
 
 static void net_wake_up_handler(unsigned long unused_data)
 {
-	// had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater
+	/* had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater */
 	schedule_work(&wq_get_stats);
 }
 
@@ -73,21 +73,19 @@ static void calculate_delta(int *rx, int *tx)
 
 static int gator_events_net_create_files(struct super_block *sb, struct dentry *root)
 {
-	// Network counters are not currently supported in RT-Preempt full because mod_timer is used
+	/* Network counters are not currently supported in RT-Preempt full because mod_timer is used */
 #ifndef CONFIG_PREEMPT_RT_FULL
 	struct dentry *dir;
 
 	dir = gatorfs_mkdir(sb, root, "Linux_net_rx");
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 	gatorfs_create_ulong(sb, dir, "enabled", &netrx_enabled);
 	gatorfs_create_ro_ulong(sb, dir, "key", &netrx_key);
 
 	dir = gatorfs_mkdir(sb, root, "Linux_net_tx");
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 	gatorfs_create_ulong(sb, dir, "enabled", &nettx_enabled);
 	gatorfs_create_ro_ulong(sb, dir, "key", &nettx_key);
 #endif
@@ -115,10 +113,10 @@ static void gator_events_net_stop(void)
 	nettx_enabled = 0;
 }
 
-static int gator_events_net_read(int **buffer)
+static int gator_events_net_read(int **buffer, bool sched_switch)
 {
 	int len, rx_delta, tx_delta;
-	static int last_rx_delta = 0, last_tx_delta = 0;
+	static int last_rx_delta, last_tx_delta;
 
 	if (!on_primary_core())
 		return 0;
@@ -134,7 +132,8 @@ static int gator_events_net_read(int **buffer)
 	if (netrx_enabled && last_rx_delta != rx_delta) {
 		last_rx_delta = rx_delta;
 		netGet[len++] = netrx_key;
-		netGet[len++] = 0;	// indicates to Streamline that rx_delta bytes were transmitted now, not since the last message
+		/* indicates to Streamline that rx_delta bytes were transmitted now, not since the last message */
+		netGet[len++] = 0;
 		netGet[len++] = netrx_key;
 		netGet[len++] = rx_delta;
 	}
@@ -142,7 +141,8 @@ static int gator_events_net_read(int **buffer)
 	if (nettx_enabled && last_tx_delta != tx_delta) {
 		last_tx_delta = tx_delta;
 		netGet[len++] = nettx_key;
-		netGet[len++] = 0;	// indicates to Streamline that tx_delta bytes were transmitted now, not since the last message
+		/* indicates to Streamline that tx_delta bytes were transmitted now, not since the last message */
+		netGet[len++] = 0;
 		netGet[len++] = nettx_key;
 		netGet[len++] = tx_delta;
 	}
diff --git a/drivers/gator/gator_events_perf_pmu.c b/drivers/gator/gator_events_perf_pmu.c
index 06bbad5b10c3..47cf278e508b 100644
--- a/drivers/gator/gator_events_perf_pmu.c
+++ b/drivers/gator/gator_events_perf_pmu.c
@@ -8,7 +8,7 @@
 
 #include "gator.h"
 
-// gator_events_armvX.c is used for Linux 2.6.x
+/* gator_events_armvX.c is used for Linux 2.6.x */
 #if GATOR_PERF_PMU_SUPPORT
 
 #include <linux/io.h>
@@ -20,39 +20,41 @@
 
 extern bool event_based_sampling;
 
-// Maximum number of per-core counters - currently reserves enough space for two full hardware PMUs for big.LITTLE
+/* Maximum number of per-core counters - currently reserves enough space for two full hardware PMUs for big.LITTLE */
 #define CNTMAX 16
 #define CCI_400 4
-// Maximum number of uncore counters
-// + 1 for the cci-400 cycles counter
-#define UCCNT (CCI_400 + 1)
+#define CCN_5XX 8
+/* Maximum number of uncore counters */
+/* + 1 for the cci-400 cycles counter */
+/* + 1 for the CCN-5xx cycles counter */
+#define UCCNT (CCI_400 + 1 + CCN_5XX + 1)
 
-// Default to 0 if unable to probe the revision which was the previous behavior
+/* Default to 0 if unable to probe the revision which was the previous behavior */
 #define DEFAULT_CCI_REVISION 0
 
-// A gator_attr is needed for every counter
+/* A gator_attr is needed for every counter */
 struct gator_attr {
-	// Set once in gator_events_perf_pmu_*_init - the name of the event in the gatorfs
+	/* Set once in gator_events_perf_pmu_*_init - the name of the event in the gatorfs */
 	char name[40];
-	// Exposed in gatorfs - set by gatord to enable this counter
+	/* Exposed in gatorfs - set by gatord to enable this counter */
 	unsigned long enabled;
-	// Set once in gator_events_perf_pmu_*_init - the perf type to use, see perf_type_id in the perf_event.h header file.
+	/* Set once in gator_events_perf_pmu_*_init - the perf type to use, see perf_type_id in the perf_event.h header file. */
 	unsigned long type;
-	// Exposed in gatorfs - set by gatord to select the event to collect
+	/* Exposed in gatorfs - set by gatord to select the event to collect */
 	unsigned long event;
-	// Exposed in gatorfs - set by gatord with the sample period to use and enable EBS for this counter
+	/* Exposed in gatorfs - set by gatord with the sample period to use and enable EBS for this counter */
 	unsigned long count;
-	// Exposed as read only in gatorfs - set once in __attr_init as the key to use in the APC data
+	/* Exposed as read only in gatorfs - set once in __attr_init as the key to use in the APC data */
 	unsigned long key;
 };
 
-// Per-core counter attributes
+/* Per-core counter attributes */
 static struct gator_attr attrs[CNTMAX];
-// Number of initialized per-core counters
+/* Number of initialized per-core counters */
 static int attr_count;
-// Uncore counter attributes
+/* Uncore counter attributes */
 static struct gator_attr uc_attrs[UCCNT];
-// Number of initialized uncore counters
+/* Number of initialized uncore counters */
 static int uc_attr_count;
 
 struct gator_event {
@@ -74,13 +76,11 @@ static int __create_files(struct super_block *sb, struct dentry *root, struct ga
 {
 	struct dentry *dir;
 
-	if (attr->name[0] == '\0') {
+	if (attr->name[0] == '\0')
 		return 0;
-	}
 	dir = gatorfs_mkdir(sb, root, attr->name);
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 	gatorfs_create_ulong(sb, dir, "enabled", &attr->enabled);
 	gatorfs_create_ulong(sb, dir, "count", &attr->count);
 	gatorfs_create_ro_ulong(sb, dir, "key", &attr->key);
@@ -94,15 +94,13 @@ static int gator_events_perf_pmu_create_files(struct super_block *sb, struct den
 	int cnt;
 
 	for (cnt = 0; cnt < attr_count; cnt++) {
-		if (__create_files(sb, root, &attrs[cnt]) != 0) {
+		if (__create_files(sb, root, &attrs[cnt]) != 0)
 			return -1;
-		}
 	}
 
 	for (cnt = 0; cnt < uc_attr_count; cnt++) {
-		if (__create_files(sb, root, &uc_attrs[cnt]) != 0) {
+		if (__create_files(sb, root, &uc_attrs[cnt]) != 0)
 			return -1;
-		}
 	}
 
 	return 0;
@@ -123,14 +121,14 @@ static void dummy_handler(struct perf_event *event, int unused, struct perf_samp
 static void dummy_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs)
 #endif
 {
-// Required as perf_event_create_kernel_counter() requires an overflow handler, even though all we do is poll
+	/* Required as perf_event_create_kernel_counter() requires an overflow handler, even though all we do is poll */
 }
 
-static int gator_events_perf_pmu_read(int **buffer);
+static int gator_events_perf_pmu_read(int **buffer, bool sched_switch);
 
 static int gator_events_perf_pmu_online(int **buffer, bool migrate)
 {
-	return gator_events_perf_pmu_read(buffer);
+	return gator_events_perf_pmu_read(buffer, false);
 }
 
 static void __online_dispatch(int cpu, bool migrate, struct gator_attr *const attr, struct gator_event *const event)
@@ -139,15 +137,13 @@ static void __online_dispatch(int cpu, bool migrate, struct gator_attr *const at
 
 	event->zero = true;
 
-	if (event->pevent != NULL || event->pevent_attr == 0 || migrate) {
+	if (event->pevent != NULL || event->pevent_attr == 0 || migrate)
 		return;
-	}
 
-	if (attr->count > 0) {
+	if (attr->count > 0)
 		handler = ebs_overflow_handler;
-	} else {
+	else
 		handler = dummy_handler;
-	}
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
 	event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler);
@@ -174,14 +170,12 @@ static void gator_events_perf_pmu_online_dispatch(int cpu, bool migrate)
 
 	cpu = pcpu_to_lcpu(cpu);
 
-	for (cnt = 0; cnt < attr_count; cnt++) {
+	for (cnt = 0; cnt < attr_count; cnt++)
 		__online_dispatch(cpu, migrate, &attrs[cnt], &per_cpu(events, cpu)[cnt]);
-	}
 
 	if (cpu == 0) {
-		for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		for (cnt = 0; cnt < uc_attr_count; cnt++)
 			__online_dispatch(cpu, migrate, &uc_attrs[cnt], &uc_events[cnt]);
-		}
 	}
 }
 
@@ -194,28 +188,24 @@ static void __offline_dispatch(int cpu, struct gator_event *const event)
 		event->pevent = NULL;
 	}
 
-	if (pe) {
+	if (pe)
 		perf_event_release_kernel(pe);
-	}
 }
 
 static void gator_events_perf_pmu_offline_dispatch(int cpu, bool migrate)
 {
 	int cnt;
 
-	if (migrate) {
+	if (migrate)
 		return;
-	}
 	cpu = pcpu_to_lcpu(cpu);
 
-	for (cnt = 0; cnt < attr_count; cnt++) {
+	for (cnt = 0; cnt < attr_count; cnt++)
 		__offline_dispatch(cpu, &per_cpu(events, cpu)[cnt]);
-	}
 
 	if (cpu == 0) {
-		for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		for (cnt = 0; cnt < uc_attr_count; cnt++)
 			__offline_dispatch(cpu, &uc_events[cnt]);
-		}
 	}
 }
 
@@ -225,7 +215,7 @@ static int __check_ebs(struct gator_attr *const attr)
 		if (!event_based_sampling) {
 			event_based_sampling = true;
 		} else {
-			printk(KERN_WARNING "gator: Only one ebs counter is allowed\n");
+			pr_warning("gator: Only one ebs counter is allowed\n");
 			return -1;
 		}
 	}
@@ -238,9 +228,9 @@ static int __start(struct gator_attr *const attr, struct gator_event *const even
 	u32 size = sizeof(struct perf_event_attr);
 
 	event->pevent = NULL;
-	if (!attr->enabled) {	// Skip disabled counters
+	/* Skip disabled counters */
+	if (!attr->enabled)
 		return 0;
-	}
 
 	event->prev = 0;
 	event->curr = 0;
@@ -267,29 +257,25 @@ static int gator_events_perf_pmu_start(void)
 
 	event_based_sampling = false;
 	for (cnt = 0; cnt < attr_count; cnt++) {
-		if (__check_ebs(&attrs[cnt]) != 0) {
+		if (__check_ebs(&attrs[cnt]) != 0)
 			return -1;
-		}
 	}
 
 	for (cnt = 0; cnt < uc_attr_count; cnt++) {
-		if (__check_ebs(&uc_attrs[cnt]) != 0) {
+		if (__check_ebs(&uc_attrs[cnt]) != 0)
 			return -1;
-		}
 	}
 
 	for_each_present_cpu(cpu) {
 		for (cnt = 0; cnt < attr_count; cnt++) {
-			if (__start(&attrs[cnt], &per_cpu(events, cpu)[cnt]) != 0) {
+			if (__start(&attrs[cnt], &per_cpu(events, cpu)[cnt]) != 0)
 				return -1;
-			}
 		}
 	}
 
 	for (cnt = 0; cnt < uc_attr_count; cnt++) {
-		if (__start(&uc_attrs[cnt], &uc_events[cnt]) != 0) {
+		if (__start(&uc_attrs[cnt], &uc_events[cnt]) != 0)
 			return -1;
-		}
 	}
 
 	return 0;
@@ -297,10 +283,8 @@ static int gator_events_perf_pmu_start(void)
 
 static void __event_stop(struct gator_event *const event)
 {
-	if (event->pevent_attr) {
-		kfree(event->pevent_attr);
-		event->pevent_attr = NULL;
-	}
+	kfree(event->pevent_attr);
+	event->pevent_attr = NULL;
 }
 
 static void __attr_stop(struct gator_attr *const attr)
@@ -315,29 +299,25 @@ static void gator_events_perf_pmu_stop(void)
 	unsigned int cnt, cpu;
 
 	for_each_present_cpu(cpu) {
-		for (cnt = 0; cnt < attr_count; cnt++) {
+		for (cnt = 0; cnt < attr_count; cnt++)
 			__event_stop(&per_cpu(events, cpu)[cnt]);
-		}
 	}
 
-	for (cnt = 0; cnt < uc_attr_count; cnt++) {
+	for (cnt = 0; cnt < uc_attr_count; cnt++)
 		__event_stop(&uc_events[cnt]);
-	}
 
-	for (cnt = 0; cnt < attr_count; cnt++) {
+	for (cnt = 0; cnt < attr_count; cnt++)
 		__attr_stop(&attrs[cnt]);
-	}
 
-	for (cnt = 0; cnt < uc_attr_count; cnt++) {
+	for (cnt = 0; cnt < uc_attr_count; cnt++)
 		__attr_stop(&uc_attrs[cnt]);
-	}
 }
 
 static void __read(int *const len, int cpu, struct gator_attr *const attr, struct gator_event *const event)
 {
 	int delta;
-
 	struct perf_event *const ev = event->pevent;
+
 	if (ev != NULL && ev->state == PERF_EVENT_STATE_ACTIVE) {
 		/* After creating the perf counter in __online_dispatch, there
 		 * is a race condition between gator_events_perf_pmu_online and
@@ -361,33 +341,29 @@ static void __read(int *const len, int cpu, struct gator_attr *const attr, struc
 				event->prev_delta = delta;
 				event->prev = event->curr;
 				per_cpu(perf_cnt, cpu)[(*len)++] = attr->key;
-				if (delta < 0) {
+				if (delta < 0)
 					delta *= -1;
-				}
 				per_cpu(perf_cnt, cpu)[(*len)++] = delta;
 			}
 		}
 	}
 }
 
-static int gator_events_perf_pmu_read(int **buffer)
+static int gator_events_perf_pmu_read(int **buffer, bool sched_switch)
 {
 	int cnt, len = 0;
 	const int cpu = get_logical_cpu();
 
-	for (cnt = 0; cnt < attr_count; cnt++) {
+	for (cnt = 0; cnt < attr_count; cnt++)
 		__read(&len, cpu, &attrs[cnt], &per_cpu(events, cpu)[cnt]);
-	}
 
 	if (cpu == 0) {
-		for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		for (cnt = 0; cnt < uc_attr_count; cnt++)
 			__read(&len, cpu, &uc_attrs[cnt], &uc_events[cnt]);
-		}
 	}
 
-	if (buffer) {
+	if (buffer)
 		*buffer = per_cpu(perf_cnt, cpu);
-	}
 
 	return len;
 }
@@ -428,23 +404,20 @@ static int probe_cci_revision(void)
 	int ret = DEFAULT_CCI_REVISION;
 
 	np = of_find_matching_node(NULL, arm_cci_matches);
-	if (!np) {
+	if (!np)
 		return ret;
-	}
 
-	if (of_address_to_resource(np, 0, &res)) {
+	if (of_address_to_resource(np, 0, &res))
 		goto node_put;
-	}
 
 	cci_ctrl_base = ioremap(res.start, resource_size(&res));
 
 	rev = (readl_relaxed(cci_ctrl_base + 0xfe8) >> 4) & 0xf;
 
-	if (rev <= 4) {
+	if (rev <= 4)
 		ret = 0;
-	} else if (rev <= 6) {
+	else if (rev <= 6)
 		ret = 1;
-	}
 
 	iounmap(cci_ctrl_base);
 
@@ -463,9 +436,24 @@ static int probe_cci_revision(void)
 
 #endif
 
-static void gator_events_perf_pmu_cci_init(const int type)
+static void gator_events_perf_pmu_uncore_init(const char *const name, const int type, const int count)
 {
 	int cnt;
+
+	snprintf(uc_attrs[uc_attr_count].name, sizeof(uc_attrs[uc_attr_count].name), "%s_ccnt", name);
+	uc_attrs[uc_attr_count].type = type;
+	++uc_attr_count;
+
+	for (cnt = 0; cnt < count; ++cnt, ++uc_attr_count) {
+		struct gator_attr *const attr = &uc_attrs[uc_attr_count];
+
+		snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", name, cnt);
+		attr->type = type;
+	}
+}
+
+static void gator_events_perf_pmu_cci_init(const int type)
+{
 	const char *cci_name;
 
 	switch (probe_cci_revision()) {
@@ -480,15 +468,7 @@ static void gator_events_perf_pmu_cci_init(const int type)
 		return;
 	}
 
-	snprintf(uc_attrs[uc_attr_count].name, sizeof(uc_attrs[uc_attr_count].name), "%s_ccnt", cci_name);
-	uc_attrs[uc_attr_count].type = type;
-	++uc_attr_count;
-
-	for (cnt = 0; cnt < CCI_400; ++cnt, ++uc_attr_count) {
-		struct gator_attr *const attr = &uc_attrs[uc_attr_count];
-		snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", cci_name, cnt);
-		attr->type = type;
-	}
+	gator_events_perf_pmu_uncore_init(cci_name, type, CCI_400);
 }
 
 static void gator_events_perf_pmu_cpu_init(const struct gator_cpu *const gator_cpu, const int type)
@@ -501,6 +481,7 @@ static void gator_events_perf_pmu_cpu_init(const struct gator_cpu *const gator_c
 
 	for (cnt = 0; cnt < gator_cpu->pmnc_counters; ++cnt, ++attr_count) {
 		struct gator_attr *const attr = &attrs[attr_count];
+
 		snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", gator_cpu->pmnc_name, cnt);
 		attr->type = type;
 	}
@@ -516,12 +497,10 @@ int gator_events_perf_pmu_init(void)
 	int cnt;
 	bool found_cpu = false;
 
-	for (cnt = 0; cnt < CNTMAX; cnt++) {
+	for (cnt = 0; cnt < CNTMAX; cnt++)
 		__attr_init(&attrs[cnt]);
-	}
-	for (cnt = 0; cnt < UCCNT; cnt++) {
+	for (cnt = 0; cnt < UCCNT; cnt++)
 		__attr_init(&uc_attrs[cnt]);
-	}
 
 	memset(&pea, 0, sizeof(pea));
 	pea.size = sizeof(pea);
@@ -531,7 +510,7 @@ int gator_events_perf_pmu_init(void)
 	for (type = PERF_TYPE_MAX; type < 0x20; ++type) {
 		pea.type = type;
 
-		// A particular PMU may work on some but not all cores, so try on each core
+		/* A particular PMU may work on some but not all cores, so try on each core */
 		pe = NULL;
 		for_each_present_cpu(cpu) {
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
@@ -539,23 +518,31 @@ int gator_events_perf_pmu_init(void)
 #else
 			pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler, 0);
 #endif
-			if (!IS_ERR(pe)) {
+			if (!IS_ERR(pe))
 				break;
-			}
 		}
-		// Assume that valid PMUs are contiguous
+		/* Assume that valid PMUs are contiguous */
 		if (IS_ERR(pe)) {
-			break;
+			pea.config = 0xff00;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+			pe = perf_event_create_kernel_counter(&pea, 0, 0, dummy_handler);
+#else
+			pe = perf_event_create_kernel_counter(&pea, 0, 0, dummy_handler, 0);
+#endif
+			if (IS_ERR(pe))
+				break;
 		}
 
 		if (pe->pmu != NULL && type == pe->pmu->type) {
 			if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0 || strcmp("CCI_400-r1", pe->pmu->name) == 0) {
 				gator_events_perf_pmu_cci_init(type);
+			} else if (strcmp("ccn", pe->pmu->name) == 0) {
+				gator_events_perf_pmu_uncore_init("ARM_CCN_5XX", type, CCN_5XX);
 			} else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) {
 				found_cpu = true;
 				gator_events_perf_pmu_cpu_init(gator_cpu, type);
 			}
-			// Initialize gator_attrs for dynamic PMUs here
+			/* Initialize gator_attrs for dynamic PMUs here */
 		}
 
 		perf_event_release_kernel(pe);
@@ -563,21 +550,21 @@ int gator_events_perf_pmu_init(void)
 
 	if (!found_cpu) {
 		const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(gator_cpuid());
-		if (gator_cpu == NULL) {
+
+		if (gator_cpu == NULL)
 			return -1;
-		}
 		gator_events_perf_pmu_cpu_init(gator_cpu, PERF_TYPE_RAW);
 	}
 
-	// Initialize gator_attrs for non-dynamic PMUs here
+	/* Initialize gator_attrs for non-dynamic PMUs here */
 
 	if (attr_count > CNTMAX) {
-		printk(KERN_ERR "gator: Too many perf counters\n");
+		pr_err("gator: Too many perf counters\n");
 		return -1;
 	}
 
 	if (uc_attr_count > UCCNT) {
-		printk(KERN_ERR "gator: Too many perf uncore counters\n");
+		pr_err("gator: Too many perf uncore counters\n");
 		return -1;
 	}
 
diff --git a/drivers/gator/gator_events_sched.c b/drivers/gator/gator_events_sched.c
index 9e3915830182..637107d6af1d 100644
--- a/drivers/gator/gator_events_sched.c
+++ b/drivers/gator/gator_events_sched.c
@@ -26,8 +26,9 @@ GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_
 {
 	unsigned long flags;
 
-	// disable interrupts to synchronize with gator_events_sched_read()
-	// spinlocks not needed since percpu buffers are used
+	/* disable interrupts to synchronize with gator_events_sched_read()
+	 * spinlocks not needed since percpu buffers are used
+	 */
 	local_irq_save(flags);
 	per_cpu(schedCnt, get_physical_cpu())[SCHED_SWITCH]++;
 	local_irq_restore(flags);
@@ -39,9 +40,8 @@ static int gator_events_sched_create_files(struct super_block *sb, struct dentry
 
 	/* switch */
 	dir = gatorfs_mkdir(sb, root, "Linux_sched_switch");
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 	gatorfs_create_ulong(sb, dir, "enabled", &sched_switch_enabled);
 	gatorfs_create_ro_ulong(sb, dir, "key", &sched_switch_key);
 
@@ -50,7 +50,7 @@ static int gator_events_sched_create_files(struct super_block *sb, struct dentry
 
 static int gator_events_sched_start(void)
 {
-	// register tracepoints
+	/* register tracepoints */
 	if (sched_switch_enabled)
 		if (GATOR_REGISTER_TRACE(sched_switch))
 			goto sched_switch_exit;
@@ -58,7 +58,7 @@ static int gator_events_sched_start(void)
 
 	return 0;
 
-	// unregister tracepoints on error
+	/* unregister tracepoints on error */
 sched_switch_exit:
 	pr_err("gator: scheduler event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
 
@@ -74,7 +74,7 @@ static void gator_events_sched_stop(void)
 	sched_switch_enabled = 0;
 }
 
-static int gator_events_sched_read(int **buffer)
+static int gator_events_sched_read(int **buffer, bool sched_switch)
 {
 	unsigned long flags;
 	int len, value;
diff --git a/drivers/gator/gator_events_scorpion.c b/drivers/gator/gator_events_scorpion.c
index 2e5be8d50e9d..49219362db09 100644
--- a/drivers/gator/gator_events_scorpion.c
+++ b/drivers/gator/gator_events_scorpion.c
@@ -8,13 +8,13 @@
 
 #include "gator.h"
 
-// gator_events_perf_pmu.c is used if perf is supported
+/* gator_events_perf_pmu.c is used if perf is supported */
 #if GATOR_NO_PERF_SUPPORT
 
 static const char *pmnc_name;
 static int pmnc_counters;
 
-// Per-CPU PMNC: config reg
+/* Per-CPU PMNC: config reg */
 #define PMNC_E		(1 << 0)	/* Enable all counters */
 #define PMNC_P		(1 << 1)	/* Reset all counters */
 #define PMNC_C		(1 << 2)	/* Cycle counter reset */
@@ -23,7 +23,7 @@ static int pmnc_counters;
 #define PMNC_DP		(1 << 5)	/* Disable CCNT if non-invasive debug */
 #define	PMNC_MASK	0x3f	/* Mask for writable bits */
 
-// ccnt reg
+/* ccnt reg */
 #define CCNT_REG	(1 << 31)
 
 #define CCNT		0
@@ -243,6 +243,7 @@ static inline void scorpion_pmnc_write(u32 val)
 static inline u32 scorpion_pmnc_read(void)
 {
 	u32 val;
+
 	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
 	return val;
 }
@@ -250,6 +251,7 @@ static inline u32 scorpion_pmnc_read(void)
 static inline u32 scorpion_ccnt_read(void)
 {
 	u32 val;
+
 	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
 	return val;
 }
@@ -257,6 +259,7 @@ static inline u32 scorpion_ccnt_read(void)
 static inline u32 scorpion_cntn_read(void)
 {
 	u32 val;
+
 	asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
 	return val;
 }
@@ -317,6 +320,7 @@ static inline int scorpion_pmnc_select_counter(unsigned int cnt)
 static u32 scorpion_read_lpm0(void)
 {
 	u32 val;
+
 	asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
 	return val;
 }
@@ -329,6 +333,7 @@ static void scorpion_write_lpm0(u32 val)
 static u32 scorpion_read_lpm1(void)
 {
 	u32 val;
+
 	asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
 	return val;
 }
@@ -341,6 +346,7 @@ static void scorpion_write_lpm1(u32 val)
 static u32 scorpion_read_lpm2(void)
 {
 	u32 val;
+
 	asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
 	return val;
 }
@@ -353,6 +359,7 @@ static void scorpion_write_lpm2(u32 val)
 static u32 scorpion_read_l2lpm(void)
 {
 	u32 val;
+
 	asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
 	return val;
 }
@@ -365,6 +372,7 @@ static void scorpion_write_l2lpm(u32 val)
 static u32 scorpion_read_vlpm(void)
 {
 	u32 val;
+
 	asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
 	return val;
 }
@@ -375,7 +383,7 @@ static void scorpion_write_vlpm(u32 val)
 }
 
 struct scorpion_access_funcs {
-	u32(*read)(void);
+	u32 (*read)(void);
 	void (*write)(u32);
 };
 
@@ -420,17 +428,17 @@ static u32 scorpion_get_columnmask(u32 setval)
 {
 	if (setval & COLMN0MASK)
 		return 0xffffff00;
-	else if (setval & COLMN1MASK)
+	if (setval & COLMN1MASK)
 		return 0xffff00ff;
-	else if (setval & COLMN2MASK)
+	if (setval & COLMN2MASK)
 		return 0xff00ffff;
-	else
-		return 0x80ffffff;
+	return 0x80ffffff;
 }
 
 static void scorpion_evt_setup(u32 gr, u32 setval)
 {
 	u32 val;
+
 	if (gr == 4)
 		scorpion_pre_vlpm();
 	val = scorpion_get_columnmask(setval) & scor_func[gr].read();
@@ -443,6 +451,7 @@ static void scorpion_evt_setup(u32 gr, u32 setval)
 static int get_scorpion_evtinfo(unsigned int evt_type, struct scorp_evt *evtinfo)
 {
 	u32 idx;
+
 	if ((evt_type < 0x4c) || (evt_type >= MSM_MAX_EVT))
 		return 0;
 	idx = evt_type - 0x4c;
@@ -463,7 +472,7 @@ static inline void scorpion_pmnc_write_evtsel(unsigned int cnt, u32 val)
 		} else {
 			u32 zero = 0;
 			struct scorp_evt evtinfo;
-			// extract evtinfo.grp and evtinfo.tevt_type_act from val
+			/* extract evtinfo.grp and evtinfo.tevt_type_act from val */
 			if (get_scorpion_evtinfo(val, &evtinfo) == 0)
 				return;
 			asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (evtinfo.evt_type_act));
@@ -505,20 +514,18 @@ static int gator_events_scorpion_create_files(struct super_block *sb, struct den
 
 	for (i = 0; i < pmnc_counters; i++) {
 		char buf[40];
-		if (i == 0) {
-			snprintf(buf, sizeof buf, "%s_ccnt", pmnc_name);
-		} else {
-			snprintf(buf, sizeof buf, "%s_cnt%d", pmnc_name, i - 1);
-		}
+
+		if (i == 0)
+			snprintf(buf, sizeof(buf), "%s_ccnt", pmnc_name);
+		else
+			snprintf(buf, sizeof(buf), "%s_cnt%d", pmnc_name, i - 1);
 		dir = gatorfs_mkdir(sb, root, buf);
-		if (!dir) {
+		if (!dir)
 			return -1;
-		}
 		gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
 		gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
-		if (i > 0) {
+		if (i > 0)
 			gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
-		}
 	}
 
 	return 0;
@@ -528,9 +535,8 @@ static int gator_events_scorpion_online(int **buffer, bool migrate)
 {
 	unsigned int cnt, len = 0, cpu = smp_processor_id();
 
-	if (scorpion_pmnc_read() & PMNC_E) {
+	if (scorpion_pmnc_read() & PMNC_E)
 		scorpion_pmnc_write(scorpion_pmnc_read() & ~PMNC_E);
-	}
 
 	/* Initialize & Reset PMNC: C bit and P bit */
 	scorpion_pmnc_write(PMNC_P | PMNC_C);
@@ -541,33 +547,32 @@ static int gator_events_scorpion_online(int **buffer, bool migrate)
 		if (!pmnc_enabled[cnt])
 			continue;
 
-		// disable counter
+		/* disable counter */
 		scorpion_pmnc_disable_counter(cnt);
 
 		event = pmnc_event[cnt] & 255;
 
-		// Set event (if destined for PMNx counters), We don't need to set the event if it's a cycle count
+		/* Set event (if destined for PMNx counters), We don't need to set the event if it's a cycle count */
 		if (cnt != CCNT)
 			scorpion_pmnc_write_evtsel(cnt, event);
 
-		// reset counter
+		/* reset counter */
 		scorpion_pmnc_reset_counter(cnt);
 
-		// Enable counter, do not enable interrupt for this counter
+		/* Enable counter, do not enable interrupt for this counter */
 		scorpion_pmnc_enable_counter(cnt);
 	}
 
-	// enable
+	/* enable */
 	scorpion_pmnc_write(scorpion_pmnc_read() | PMNC_E);
 
-	// read the counters and toss the invalid data, return zero instead
+	/* read the counters and toss the invalid data, return zero instead */
 	for (cnt = 0; cnt < pmnc_counters; cnt++) {
 		if (pmnc_enabled[cnt]) {
-			if (cnt == CCNT) {
+			if (cnt == CCNT)
 				scorpion_ccnt_read();
-			} else if (scorpion_pmnc_select_counter(cnt) == cnt) {
+			else if (scorpion_pmnc_select_counter(cnt) == cnt)
 				scorpion_cntn_read();
-			}
 			scorpion_pmnc_reset_counter(cnt);
 
 			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
@@ -597,26 +602,25 @@ static void gator_events_scorpion_stop(void)
 	}
 }
 
-static int gator_events_scorpion_read(int **buffer)
+static int gator_events_scorpion_read(int **buffer, bool sched_switch)
 {
 	int cnt, len = 0;
 	int cpu = smp_processor_id();
 
-	// a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled
-	if (!(scorpion_pmnc_read() & PMNC_E)) {
+	/* a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled */
+	if (!(scorpion_pmnc_read() & PMNC_E))
 		return 0;
-	}
 
 	for (cnt = 0; cnt < pmnc_counters; cnt++) {
 		if (pmnc_enabled[cnt]) {
 			int value;
-			if (cnt == CCNT) {
+
+			if (cnt == CCNT)
 				value = scorpion_ccnt_read();
-			} else if (scorpion_pmnc_select_counter(cnt) == cnt) {
+			else if (scorpion_pmnc_select_counter(cnt) == cnt)
 				value = scorpion_cntn_read();
-			} else {
+			else
 				value = 0;
-			}
 			scorpion_pmnc_reset_counter(cnt);
 
 			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
@@ -655,7 +659,8 @@ int gator_events_scorpion_init(void)
 		return -1;
 	}
 
-	pmnc_counters++;	// CNT[n] + CCNT
+	/* CNT[n] + CCNT */
+	pmnc_counters++;
 
 	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
 		pmnc_enabled[cnt] = 0;
diff --git a/drivers/gator/gator_events_threads.c b/drivers/gator/gator_events_threads.c
deleted file mode 100644
index 9de85862fe6c..000000000000
--- a/drivers/gator/gator_events_threads.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Sample activity provider
- *
- * Copyright (C) ARM Limited 2014. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * See gator_events_mmapped.c for additional directions and
- * troubleshooting.
- *
- * For this sample to work these entries must be present in the
- * events.xml file. So create an events-threads.xml in the gator
- * daemon source directory with the following contents and rebuild
- * gatord:
- *
- * <category name="threads">
- *   <event counter="Linux_threads" title="Linux" name="Threads" class="activity" activity1="odd" activity_color1="0x000000ff" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" description="Linux syscall activity"/>
- * </category>
- */
-
-#include <trace/events/sched.h>
-
-#include "gator.h"
-
-static ulong threads_enabled;
-static ulong threads_key;
-static ulong threads_cores;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
-GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
-#else
-GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
-#endif
-{
-	int cpu = get_physical_cpu();
-	int pid = next->pid;
-	if (pid == 0) {
-		// idle
-		gator_marshal_activity_switch(cpu, threads_key, 0, 0);
-	} else if (pid & 1) {
-		// odd
-		gator_marshal_activity_switch(cpu, threads_key, 1, pid);
-	} else {
-		// even
-		//gator_marshal_activity_switch(cpu, threads_key, 2, current->pid);
-		// Multiple activities are not yet supported so emit idle
-		gator_marshal_activity_switch(cpu, threads_key, 0, 0);
-	}
-}
-
-// Adds Linux_threads directory and enabled, key, and cores files to /dev/gator/events
-static int gator_events_threads_create_files(struct super_block *sb, struct dentry *root)
-{
-	struct dentry *dir;
-
-	dir = gatorfs_mkdir(sb, root, "Linux_threads");
-	if (!dir) {
-		return -1;
-	}
-	gatorfs_create_ulong(sb, dir, "enabled", &threads_enabled);
-	gatorfs_create_ro_ulong(sb, dir, "key", &threads_key);
-	// Number of cores associated with this activity
-	gatorfs_create_ro_ulong(sb, dir, "cores", &threads_cores);
-
-	return 0;
-}
-
-static int gator_events_threads_start(void)
-{
-	int cpu;
-
-	if (threads_enabled) {
-		preempt_disable();
-		for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
-			gator_marshal_activity_switch(cpu, threads_key, 0, 0);
-		}
-		preempt_enable();
-
-		if (GATOR_REGISTER_TRACE(sched_switch)) {
-			goto fail_sched_switch;
-		}
-	}
-
-	return 0;
-
-fail_sched_switch:
-	return -1;
-}
-
-static void gator_events_threads_stop(void)
-{
-	if (threads_enabled) {
-		GATOR_UNREGISTER_TRACE(sched_switch);
-	}
-
-	threads_enabled = 0;
-}
-
-static struct gator_interface gator_events_threads_interface = {
-	.create_files = gator_events_threads_create_files,
-	.start = gator_events_threads_start,
-	.stop = gator_events_threads_stop,
-};
-
-// Must not be static. Ensure that this init function is added to GATOR_EVENTS_LIST in gator_main.c
-int __init gator_events_threads_init(void)
-{
-	threads_enabled = 0;
-	threads_key = gator_events_get_key();
-	threads_cores = nr_cpu_ids;
-
-	return gator_events_install(&gator_events_threads_interface);
-}
diff --git a/drivers/gator/gator_fs.c b/drivers/gator/gator_fs.c
index 166cfe7d681d..d8fb357b9eda 100644
--- a/drivers/gator/gator_fs.c
+++ b/drivers/gator/gator_fs.c
@@ -14,7 +14,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define gatorfs_MAGIC 0x24051020
 #define TMPBUFSIZE 50
@@ -43,6 +43,7 @@ static ssize_t gatorfs_ulong_to_user(unsigned long val, char __user *buf, size_t
 {
 	char tmpbuf[TMPBUFSIZE];
 	size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", val);
+
 	if (maxlen > TMPBUFSIZE)
 		maxlen = TMPBUFSIZE;
 	return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen);
@@ -52,6 +53,7 @@ static ssize_t gatorfs_u64_to_user(u64 val, char __user *buf, size_t count, loff
 {
 	char tmpbuf[TMPBUFSIZE];
 	size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%llu\n", val);
+
 	if (maxlen > TMPBUFSIZE)
 		maxlen = TMPBUFSIZE;
 	return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen);
@@ -104,12 +106,14 @@ static int gatorfs_u64_from_user(u64 *val, char const __user *buf, size_t count)
 static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset)
 {
 	unsigned long *val = file->private_data;
+
 	return gatorfs_ulong_to_user(*val, buf, count, offset);
 }
 
 static ssize_t u64_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset)
 {
 	u64 *val = file->private_data;
+
 	return gatorfs_u64_to_user(*val, buf, count, offset);
 }
 
@@ -231,7 +235,7 @@ int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
 }
 
 static int gatorfs_create_ro_u64(struct super_block *sb, struct dentry *root,
-				 char const *name, u64 * val)
+				 char const *name, u64 *val)
 {
 	struct dentry *d =
 	    __gatorfs_create_file(sb, root, name, &u64_ro_fops, 0444);
@@ -245,6 +249,7 @@ static int gatorfs_create_ro_u64(struct super_block *sb, struct dentry *root,
 static ssize_t atomic_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset)
 {
 	atomic_t *val = file->private_data;
+
 	return gatorfs_ulong_to_user(atomic_read(val), buf, count, offset);
 }
 
diff --git a/drivers/gator/gator_hrtimer_gator.c b/drivers/gator/gator_hrtimer_gator.c
index 76584554b00f..c1525e10a8da 100644
--- a/drivers/gator/gator_hrtimer_gator.c
+++ b/drivers/gator/gator_hrtimer_gator.c
@@ -18,6 +18,7 @@ static void gator_hrtimer_offline(void);
 static enum hrtimer_restart gator_hrtimer_notify(struct hrtimer *hrtimer)
 {
 	int cpu = get_logical_cpu();
+
 	hrtimer_forward(hrtimer, per_cpu(hrtimer_expire, cpu), profiling_interval);
 	per_cpu(hrtimer_expire, cpu) = ktime_add(per_cpu(hrtimer_expire, cpu), profiling_interval);
 	(*callback)();
@@ -64,12 +65,11 @@ static int gator_hrtimer_init(int interval, void (*func)(void))
 		per_cpu(hrtimer_is_active, cpu) = 0;
 	}
 
-	// calculate profiling interval
-	if (interval > 0) {
+	/* calculate profiling interval */
+	if (interval > 0)
 		profiling_interval = ns_to_ktime(1000000000UL / interval);
-	} else {
+	else
 		profiling_interval.tv64 = 0;
-	}
 
 	return 0;
 }
diff --git a/drivers/gator/gator_iks.c b/drivers/gator/gator_iks.c
index 9180b874457a..fb78c10fd987 100644
--- a/drivers/gator/gator_iks.c
+++ b/drivers/gator/gator_iks.c
@@ -16,7 +16,7 @@
 
 static bool map_cpuids;
 static int mpidr_cpuids[NR_CPUS];
-static const struct gator_cpu * mpidr_cpus[NR_CPUS];
+static const struct gator_cpu *mpidr_cpus[NR_CPUS];
 static int __lcpu_to_pcpu[NR_CPUS];
 
 static const struct gator_cpu *gator_find_cpu_by_dt_name(const char *const name)
@@ -25,9 +25,9 @@ static const struct gator_cpu *gator_find_cpu_by_dt_name(const char *const name)
 
 	for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
 		const struct gator_cpu *const gator_cpu = &gator_cpus[i];
-		if (gator_cpu->dt_name != NULL && strcmp(gator_cpu->dt_name, name) == 0) {
+
+		if (gator_cpu->dt_name != NULL && strcmp(gator_cpu->dt_name, name) == 0)
 			return gator_cpu;
-		}
 	}
 
 	return NULL;
@@ -41,7 +41,7 @@ static void calc_first_cluster_size(void)
 	struct device_node *cn = NULL;
 	int mpidr_cpuids_count = 0;
 
-	// Zero is a valid cpuid, so initialize the array to 0xff's
+	/* Zero is a valid cpuid, so initialize the array to 0xff's */
 	memset(&mpidr_cpuids, 0xff, sizeof(mpidr_cpuids));
 	memset(&mpidr_cpus, 0, sizeof(mpidr_cpus));
 
@@ -70,10 +70,10 @@ static void calc_first_cluster_size(void)
 static int linearize_mpidr(int mpidr)
 {
 	int i;
+
 	for (i = 0; i < nr_cpu_ids; ++i) {
-		if (mpidr_cpuids[i] == mpidr) {
+		if (mpidr_cpuids[i] == mpidr)
 			return i;
-		}
 	}
 
 	BUG();
@@ -113,6 +113,7 @@ static void gator_update_cpu_mapping(u32 cpu_hwid)
 {
 	int lcpu = smp_processor_id();
 	int pcpu = linearize_mpidr(cpu_hwid & MPIDR_HWID_BITMASK);
+
 	BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0);
 	BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
 	__lcpu_to_pcpu[lcpu] = pcpu;
@@ -132,7 +133,7 @@ GATOR_DEFINE_PROBE(cpu_migrate_finish, TP_PROTO(u64 timestamp, u32 cpu_hwid))
 
 	gator_update_cpu_mapping(cpu_hwid);
 
-	// get_physical_cpu must be called after gator_update_cpu_mapping
+	/* get_physical_cpu must be called after gator_update_cpu_mapping */
 	cpu = get_physical_cpu();
 	gator_timer_online_dispatch(cpu, true);
 	gator_timer_online((void *)1);
@@ -146,12 +147,11 @@ GATOR_DEFINE_PROBE(cpu_migrate_current, TP_PROTO(u64 timestamp, u32 cpu_hwid))
 static void gator_send_iks_core_names(void)
 {
 	int cpu;
-	// Send the cpu names
+	/* Send the cpu names */
 	preempt_disable();
 	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
-		if (mpidr_cpus[cpu] != NULL) {
+		if (mpidr_cpus[cpu] != NULL)
 			gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid);
-		}
 	}
 	preempt_enable();
 }
@@ -170,7 +170,7 @@ static int gator_migrate_start(void)
 	if (retval == 0)
 		retval = GATOR_REGISTER_TRACE(cpu_migrate_current);
 	if (retval == 0) {
-		// Initialize the logical to physical cpu mapping
+		/* Initialize the logical to physical cpu mapping */
 		memset(&__lcpu_to_pcpu, 0xff, sizeof(__lcpu_to_pcpu));
 		bL_switcher_trace_trigger();
 	}
diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c
index 0d867f22364f..30bf60d95286 100644
--- a/drivers/gator/gator_main.c
+++ b/drivers/gator/gator_main.c
@@ -7,8 +7,8 @@
  *
  */
 
-// This version must match the gator daemon version
-#define PROTOCOL_VERSION 19
+/* This version must match the gator daemon version */
+#define PROTOCOL_VERSION 20
 static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 
 #include <linux/slab.h>
@@ -25,7 +25,7 @@ static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 #include <linux/utsname.h>
 #include <linux/kthread.h>
 #include <asm/stacktrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include "gator.h"
 
@@ -67,11 +67,11 @@ static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 #define SUMMARY_BUFFER_SIZE       (1*1024)
 #define BACKTRACE_BUFFER_SIZE     (128*1024)
 #define NAME_BUFFER_SIZE          (64*1024)
-#define COUNTER_BUFFER_SIZE       (64*1024)	// counters have the core as part of the data and the core value in the frame header may be discarded
+#define COUNTER_BUFFER_SIZE       (64*1024)	/* counters have the core as part of the data and the core value in the frame header may be discarded */
 #define BLOCK_COUNTER_BUFFER_SIZE (128*1024)
-#define ANNOTATE_BUFFER_SIZE      (128*1024)	// annotate counters have the core as part of the data and the core value in the frame header may be discarded
+#define ANNOTATE_BUFFER_SIZE      (128*1024)	/* annotate counters have the core as part of the data and the core value in the frame header may be discarded */
 #define SCHED_TRACE_BUFFER_SIZE   (128*1024)
-#define IDLE_BUFFER_SIZE          (32*1024)	// idle counters have the core as part of the data and the core value in the frame header may be discarded
+#define IDLE_BUFFER_SIZE          (32*1024)	/* idle counters have the core as part of the data and the core value in the frame header may be discarded */
 #define ACTIVITY_BUFFER_SIZE      (128*1024)
 
 #define NO_COOKIE      0U
@@ -89,24 +89,24 @@ static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 
 #define MESSAGE_END_BACKTRACE 1
 
-// Name Frame Messages
+/* Name Frame Messages */
 #define MESSAGE_COOKIE      1
 #define MESSAGE_THREAD_NAME 2
 #define MESSAGE_LINK        4
 
-// Scheduler Trace Frame Messages
+/* Scheduler Trace Frame Messages */
 #define MESSAGE_SCHED_SWITCH 1
 #define MESSAGE_SCHED_EXIT   2
 
-// Idle Frame Messages
+/* Idle Frame Messages */
 #define MESSAGE_IDLE_ENTER 1
 #define MESSAGE_IDLE_EXIT  2
 
-// Summary Frame Messages
+/* Summary Frame Messages */
 #define MESSAGE_SUMMARY   1
 #define MESSAGE_CORE_NAME 3
 
-// Activity Frame Messages
+/* Activity Frame Messages */
 #define MESSAGE_SWITCH 2
 #define MESSAGE_EXIT   3
 
@@ -140,14 +140,15 @@ enum {
  * Globals
  ******************************************************************************/
 static unsigned long gator_cpu_cores;
-// Size of the largest buffer. Effectively constant, set in gator_op_create_files
+/* Size of the largest buffer. Effectively constant, set in gator_op_create_files */
 static unsigned long userspace_buffer_size;
 static unsigned long gator_backtrace_depth;
-// How often to commit the buffers for live in nanoseconds
+/* How often to commit the buffers for live in nanoseconds */
 static u64 gator_live_rate;
 
 static unsigned long gator_started;
 static u64 gator_monotonic_started;
+static u64 gator_sync_time;
 static u64 gator_hibernate_time;
 static unsigned long gator_buffer_opened;
 static unsigned long gator_timer_count;
@@ -161,7 +162,7 @@ static DECLARE_WAIT_QUEUE_HEAD(gator_buffer_wait);
 static DECLARE_WAIT_QUEUE_HEAD(gator_annotate_wait);
 static struct timer_list gator_buffer_wake_up_timer;
 static bool gator_buffer_wake_run;
-// Initialize semaphore unlocked to initialize memory values
+/* Initialize semaphore unlocked to initialize memory values */
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
 static DECLARE_MUTEX(gator_buffer_wake_sem);
 #else
@@ -183,33 +184,43 @@ static DEFINE_PER_CPU(bool, in_scheduler_context);
  * Prototypes
  ******************************************************************************/
 static u64 gator_get_time(void);
+static void gator_emit_perf_time(u64 time);
 static void gator_op_create_files(struct super_block *sb, struct dentry *root);
 
-// gator_buffer is protected by being per_cpu and by having IRQs disabled when writing to it.
-// Most marshal_* calls take care of this except for marshal_cookie*, marshal_backtrace* and marshal_frame where the caller is responsible for doing so.
-// No synchronization is needed with the backtrace buffer as it is per cpu and is only used from the hrtimer.
-// The annotate_lock must be held when using the annotation buffer as it is not per cpu.
-// collect_counters which is the sole writer to the block counter frame is additionally protected by the per cpu collecting flag
+/* gator_buffer is protected by being per_cpu and by having IRQs
+ * disabled when writing to it. Most marshal_* calls take care of this
+ * except for marshal_cookie*, marshal_backtrace* and marshal_frame
+ * where the caller is responsible for doing so. No synchronization is
+ * needed with the backtrace buffer as it is per cpu and is only used
+ * from the hrtimer. The annotate_lock must be held when using the
+ * annotation buffer as it is not per cpu. collect_counters which is
+ * the sole writer to the block counter frame is additionally
+ * protected by the per cpu collecting flag.
+ */
 
-// Size of the buffer, must be a power of 2. Effectively constant, set in gator_op_setup.
+/* Size of the buffer, must be a power of 2. Effectively constant, set in gator_op_setup. */
 static uint32_t gator_buffer_size[NUM_GATOR_BUFS];
-// gator_buffer_size - 1, bitwise and with pos to get offset into the array. Effectively constant, set in gator_op_setup.
+/* gator_buffer_size - 1, bitwise and with pos to get offset into the array. Effectively constant, set in gator_op_setup. */
 static uint32_t gator_buffer_mask[NUM_GATOR_BUFS];
-// Read position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are read by userspace in userspace_buffer_read
+/* Read position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are read by userspace in userspace_buffer_read */
 static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_read);
-// Write position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are written to the buffer
+/* Write position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are written to the buffer */
 static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_write);
-// Commit position in the buffer. Initialized to zero in gator_op_setup and incremented after a frame is ready to be read by userspace
+/* Commit position in the buffer. Initialized to zero in gator_op_setup and incremented after a frame is ready to be read by userspace */
 static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_commit);
-// If set to false, decreases the number of bytes returned by buffer_bytes_available. Set in buffer_check_space if no space is remaining. Initialized to true in gator_op_setup
-// This means that if we run out of space, continue to report that no space is available until bytes are read by userspace
+/* If set to false, decreases the number of bytes returned by
+ * buffer_bytes_available. Set in buffer_check_space if no space is
+ * remaining. Initialized to true in gator_op_setup. This means that
+ * if we run out of space, continue to report that no space is
+ * available until bytes are read by userspace
+ */
 static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], buffer_space_available);
-// The buffer. Allocated in gator_op_setup
+/* The buffer. Allocated in gator_op_setup */
 static DEFINE_PER_CPU(char *[NUM_GATOR_BUFS], gator_buffer);
-// The time after which the buffer should be committed for live display
+/* The time after which the buffer should be committed for live display */
 static DEFINE_PER_CPU(u64, gator_buffer_commit_time);
 
-// List of all gator events - new events must be added to this list
+/* List of all gator events - new events must be added to this list */
 #define GATOR_EVENTS_LIST \
 	GATOR_EVENT(gator_events_armv6_init) \
 	GATOR_EVENT(gator_events_armv7_init) \
@@ -218,15 +229,14 @@ static DEFINE_PER_CPU(u64, gator_buffer_commit_time);
 	GATOR_EVENT(gator_events_irq_init) \
 	GATOR_EVENT(gator_events_l2c310_init) \
 	GATOR_EVENT(gator_events_mali_init) \
-	GATOR_EVENT(gator_events_mali_t6xx_hw_init) \
-	GATOR_EVENT(gator_events_mali_t6xx_init) \
+	GATOR_EVENT(gator_events_mali_midgard_hw_init) \
+	GATOR_EVENT(gator_events_mali_midgard_init) \
 	GATOR_EVENT(gator_events_meminfo_init) \
 	GATOR_EVENT(gator_events_mmapped_init) \
 	GATOR_EVENT(gator_events_net_init) \
 	GATOR_EVENT(gator_events_perf_pmu_init) \
 	GATOR_EVENT(gator_events_sched_init) \
 	GATOR_EVENT(gator_events_scorpion_init) \
-	GATOR_EVENT(gator_events_threads_init) \
 
 #define GATOR_EVENT(EVENT_INIT) __weak int EVENT_INIT(void);
 GATOR_EVENTS_LIST
@@ -314,13 +324,6 @@ static const struct gator_cpu gator_cpus[] = {
 		.dt_name = "arm,cortex-a9",
 		.pmnc_counters = 6,
 	},
-	{
-		.cpuid = CORTEX_A12,
-		.core_name = "Cortex-A12",
-		.pmnc_name = "ARMv7_Cortex_A12",
-		.dt_name = "arm,cortex-a12",
-		.pmnc_counters = 6,
-	},
 	{
 		.cpuid = CORTEX_A15,
 		.core_name = "Cortex-A15",
@@ -400,23 +403,32 @@ const struct gator_cpu *gator_find_cpu_by_cpuid(const u32 cpuid)
 
 	for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
 		const struct gator_cpu *const gator_cpu = &gator_cpus[i];
-		if (gator_cpu->cpuid == cpuid) {
+
+		if (gator_cpu->cpuid == cpuid)
 			return gator_cpu;
-		}
 	}
 
 	return NULL;
 }
 
+static const char OLD_PMU_PREFIX[] = "ARMv7 Cortex-";
+static const char NEW_PMU_PREFIX[] = "ARMv7_Cortex_";
+
 const struct gator_cpu *gator_find_cpu_by_pmu_name(const char *const name)
 {
 	int i;
 
 	for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
 		const struct gator_cpu *const gator_cpu = &gator_cpus[i];
-		if (gator_cpu->pmnc_name != NULL && strcmp(gator_cpu->pmnc_name, name) == 0) {
+
+		if (gator_cpu->pmnc_name != NULL &&
+		    /* Do the names match exactly? */
+		    (strcasecmp(gator_cpu->pmnc_name, name) == 0 ||
+		     /* Do these names match but have the old vs new prefix? */
+		     ((strncasecmp(name, OLD_PMU_PREFIX, sizeof(OLD_PMU_PREFIX) - 1) == 0 &&
+		       strncasecmp(gator_cpu->pmnc_name, NEW_PMU_PREFIX, sizeof(NEW_PMU_PREFIX) - 1) == 0 &&
+		       strcasecmp(name + sizeof(OLD_PMU_PREFIX) - 1, gator_cpu->pmnc_name + sizeof(NEW_PMU_PREFIX) - 1) == 0))))
 			return gator_cpu;
-		}
 	}
 
 	return NULL;
@@ -445,16 +457,15 @@ static void gator_buffer_wake_up(unsigned long data)
 static int gator_buffer_wake_func(void *data)
 {
 	for (;;) {
-		if (down_killable(&gator_buffer_wake_sem)) {
+		if (down_killable(&gator_buffer_wake_sem))
 			break;
-		}
 
-		// Eat up any pending events
-		while (!down_trylock(&gator_buffer_wake_sem));
+		/* Eat up any pending events */
+		while (!down_trylock(&gator_buffer_wake_sem))
+			;
 
-		if (!gator_buffer_wake_run) {
+		if (!gator_buffer_wake_run)
 			break;
-		}
 
 		gator_buffer_wake_up(0);
 	}
@@ -468,6 +479,7 @@ static int gator_buffer_wake_func(void *data)
 static bool buffer_commit_ready(int *cpu, int *buftype)
 {
 	int cpu_x, x;
+
 	for_each_present_cpu(cpu_x) {
 		for (x = 0; x < NUM_GATOR_BUFS; x++)
 			if (per_cpu(gator_buffer_commit, cpu_x)[x] != per_cpu(gator_buffer_read, cpu_x)[x]) {
@@ -487,6 +499,7 @@ static bool buffer_commit_ready(int *cpu, int *buftype)
 static void gator_timer_interrupt(void)
 {
 	struct pt_regs *const regs = get_irq_regs();
+
 	gator_backtrace_handler(regs);
 }
 
@@ -495,15 +508,14 @@ void gator_backtrace_handler(struct pt_regs *const regs)
 	u64 time = gator_get_time();
 	int cpu = get_physical_cpu();
 
-	// Output backtrace
+	/* Output backtrace */
 	gator_add_sample(cpu, regs, time);
 
-	// Collect counters
-	if (!per_cpu(collecting, cpu)) {
-		collect_counters(time, NULL);
-	}
+	/* Collect counters */
+	if (!per_cpu(collecting, cpu))
+		collect_counters(time, current, false);
 
-	// No buffer flushing occurs during sched switch for RT-Preempt full. The block counter frame will be flushed by collect_counters, but the sched buffer needs to be explicitly flushed
+	/* No buffer flushing occurs during sched switch for RT-Preempt full. The block counter frame will be flushed by collect_counters, but the sched buffer needs to be explicitly flushed */
 #ifdef CONFIG_PREEMPT_RT_FULL
 	buffer_check(cpu, SCHED_TRACE_BUF, time);
 #endif
@@ -511,7 +523,7 @@ void gator_backtrace_handler(struct pt_regs *const regs)
 
 static int gator_running;
 
-// This function runs in interrupt context and on the appropriate core
+/* This function runs in interrupt context and on the appropriate core */
 static void gator_timer_offline(void *migrate)
 {
 	struct gator_interface *gi;
@@ -522,11 +534,10 @@ static void gator_timer_offline(void *migrate)
 	gator_trace_sched_offline();
 	gator_trace_power_offline();
 
-	if (!migrate) {
+	if (!migrate)
 		gator_hrtimer_offline();
-	}
 
-	// Offline any events and output counters
+	/* Offline any events and output counters */
 	time = gator_get_time();
 	if (marshal_event_header(time)) {
 		list_for_each_entry(gi, &gator_events, list) {
@@ -535,24 +546,23 @@ static void gator_timer_offline(void *migrate)
 				marshal_event(len, buffer);
 			}
 		}
-		// Only check after writing all counters so that time and corresponding counters appear in the same frame
+		/* Only check after writing all counters so that time and corresponding counters appear in the same frame */
 		buffer_check(cpu, BLOCK_COUNTER_BUF, time);
 	}
 
-	// Flush all buffers on this core
+	/* Flush all buffers on this core */
 	for (i = 0; i < NUM_GATOR_BUFS; i++)
 		gator_commit_buffer(cpu, i, time);
 }
 
-// This function runs in interrupt context and may be running on a core other than core 'cpu'
+/* This function runs in interrupt context and may be running on a core other than core 'cpu' */
 static void gator_timer_offline_dispatch(int cpu, bool migrate)
 {
 	struct gator_interface *gi;
 
 	list_for_each_entry(gi, &gator_events, list) {
-		if (gi->offline_dispatch) {
+		if (gi->offline_dispatch)
 			gi->offline_dispatch(cpu, migrate);
-		}
 	}
 }
 
@@ -579,16 +589,15 @@ static void gator_send_core_name(const int cpu, const u32 cpuid)
 		const char *core_name = NULL;
 		char core_name_buf[32];
 
-		// Save off this cpuid
+		/* Save off this cpuid */
 		gator_cpuids[cpu] = cpuid;
 		if (gator_cpu != NULL) {
 			core_name = gator_cpu->core_name;
 		} else {
-			if (cpuid == -1) {
+			if (cpuid == -1)
 				snprintf(core_name_buf, sizeof(core_name_buf), "Unknown");
-			} else {
+			else
 				snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
-			}
 			core_name = core_name_buf;
 		}
 
@@ -598,12 +607,12 @@ static void gator_send_core_name(const int cpu, const u32 cpuid)
 #endif
 }
 
-static void gator_read_cpuid(void * arg)
+static void gator_read_cpuid(void *arg)
 {
 	gator_cpuids[get_physical_cpu()] = gator_cpuid();
 }
 
-// This function runs in interrupt context and on the appropriate core
+/* This function runs in interrupt context and on the appropriate core */
 static void gator_timer_online(void *migrate)
 {
 	struct gator_interface *gi;
@@ -611,12 +620,12 @@ static void gator_timer_online(void *migrate)
 	int *buffer;
 	u64 time;
 
-	// Send what is currently running on this core
+	/* Send what is currently running on this core */
 	marshal_sched_trace_switch(current->pid, 0);
 
 	gator_trace_power_online();
 
-	// online any events and output counters
+	/* online any events and output counters */
 	time = gator_get_time();
 	if (marshal_event_header(time)) {
 		list_for_each_entry(gi, &gator_events, list) {
@@ -625,26 +634,24 @@ static void gator_timer_online(void *migrate)
 				marshal_event(len, buffer);
 			}
 		}
-		// Only check after writing all counters so that time and corresponding counters appear in the same frame
+		/* Only check after writing all counters so that time and corresponding counters appear in the same frame */
 		buffer_check(cpu, BLOCK_COUNTER_BUF, time);
 	}
 
-	if (!migrate) {
+	if (!migrate)
 		gator_hrtimer_online();
-	}
 
 	gator_send_core_name(cpu, gator_cpuid());
 }
 
-// This function runs in interrupt context and may be running on a core other than core 'cpu'
+/* This function runs in interrupt context and may be running on a core other than core 'cpu' */
 static void gator_timer_online_dispatch(int cpu, bool migrate)
 {
 	struct gator_interface *gi;
 
 	list_for_each_entry(gi, &gator_events, list) {
-		if (gi->online_dispatch) {
+		if (gi->online_dispatch)
 			gi->online_dispatch(cpu, migrate);
-		}
 	}
 }
 
@@ -661,15 +668,14 @@ static int gator_timer_start(unsigned long sample_rate)
 
 	gator_running = 1;
 
-	// event based sampling trumps hr timer based sampling
-	if (event_based_sampling) {
+	/* event based sampling trumps hr timer based sampling */
+	if (event_based_sampling)
 		sample_rate = 0;
-	}
 
 	if (gator_hrtimer_init(sample_rate, gator_timer_interrupt) == -1)
 		return -1;
 
-	// Send off the previously saved cpuids
+	/* Send off the previously saved cpuids */
 	for_each_present_cpu(cpu) {
 		preempt_disable();
 		gator_send_core_name(cpu, gator_cpuids[cpu]);
@@ -693,21 +699,24 @@ static u64 gator_get_time(void)
 	u64 delta;
 	int cpu = smp_processor_id();
 
-	// Match clock_gettime(CLOCK_MONOTONIC_RAW, &ts) from userspace
+	/* Match clock_gettime(CLOCK_MONOTONIC_RAW, &ts) from userspace */
 	getrawmonotonic(&ts);
 	timestamp = timespec_to_ns(&ts);
 
-	// getrawmonotonic is not monotonic on all systems. Detect and attempt to correct these cases.
-	// up to 0.5ms delta has been seen on some systems, which can skew Streamline data when viewing at high resolution.
-	// This doesn't work well with interrupts, but that it's OK - the real concern is to catch big jumps in time
+	/* getrawmonotonic is not monotonic on all systems. Detect and
+	 * attempt to correct these cases. up to 0.5ms delta has been seen
+	 * on some systems, which can skew Streamline data when viewing at
+	 * high resolution. This doesn't work well with interrupts, but that
+	 * it's OK - the real concern is to catch big jumps in time
+	 */
 	prev_timestamp = per_cpu(last_timestamp, cpu);
 	if (prev_timestamp <= timestamp) {
 		per_cpu(last_timestamp, cpu) = timestamp;
 	} else {
 		delta = prev_timestamp - timestamp;
-		// Log the error once
+		/* Log the error once */
 		if (!printed_monotonic_warning && delta > 500000) {
-			printk(KERN_ERR "%s: getrawmonotonic is not monotonic  cpu: %i  delta: %lli\nSkew in Streamline data may be present at the fine zoom levels\n", __FUNCTION__, cpu, delta);
+			pr_err("%s: getrawmonotonic is not monotonic  cpu: %i  delta: %lli\nSkew in Streamline data may be present at the fine zoom levels\n", __func__, cpu, delta);
 			printed_monotonic_warning = true;
 		}
 		timestamp = prev_timestamp;
@@ -716,6 +725,19 @@ static u64 gator_get_time(void)
 	return timestamp - gator_monotonic_started;
 }
 
+static void gator_emit_perf_time(u64 time)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
+	if (time >= gator_sync_time) {
+		int cpu = get_physical_cpu();
+
+		marshal_event_single64(0, -1, local_clock());
+		gator_sync_time += NSEC_PER_SEC;
+		gator_commit_buffer(cpu, COUNTER_BUF, time);
+	}
+#endif
+}
+
 /******************************************************************************
  * cpu hotplug and pm notifiers
  ******************************************************************************/
@@ -743,8 +765,10 @@ static struct notifier_block __refdata gator_hotcpu_notifier = {
 	.notifier_call = gator_hotcpu_notify,
 };
 
-// n.b. calling "on_each_cpu" only runs on those that are online
-// Registered linux events are not disabled, so their counters will continue to collect
+/* n.b. calling "on_each_cpu" only runs on those that are online.
+ * Registered linux events are not disabled, so their counters will
+ * continue to collect
+ */
 static int gator_pm_notify(struct notifier_block *nb, unsigned long event, void *dummy)
 {
 	int cpu;
@@ -760,13 +784,13 @@ static int gator_pm_notify(struct notifier_block *nb, unsigned long event, void
 			gator_timer_offline_dispatch(lcpu_to_pcpu(cpu), false);
 		}
 
-		// Record the wallclock hibernate time
+		/* Record the wallclock hibernate time */
 		getnstimeofday(&ts);
 		gator_hibernate_time = timespec_to_ns(&ts) - gator_get_time();
 		break;
 	case PM_POST_HIBERNATION:
 	case PM_POST_SUSPEND:
-		// Adjust gator_monotonic_started for the time spent sleeping, as gator_get_time does not account for it
+		/* Adjust gator_monotonic_started for the time spent sleeping, as gator_get_time does not account for it */
 		if (gator_hibernate_time > 0) {
 			getnstimeofday(&ts);
 			gator_monotonic_started += gator_hibernate_time + gator_get_time() - timespec_to_ns(&ts);
@@ -792,6 +816,7 @@ static struct notifier_block gator_pm_notifier = {
 static int gator_notifier_start(void)
 {
 	int retval;
+
 	retval = register_hotcpu_notifier(&gator_hotcpu_notifier);
 	if (retval == 0)
 		retval = register_pm_notifier(&gator_pm_notifier);
@@ -812,28 +837,37 @@ static void gator_summary(void)
 	u64 timestamp, uptime;
 	struct timespec ts;
 	char uname_buf[512];
-	void (*m2b)(struct timespec *ts);
 
 	snprintf(uname_buf, sizeof(uname_buf), "%s %s %s %s %s GNU/Linux", utsname()->sysname, utsname()->nodename, utsname()->release, utsname()->version, utsname()->machine);
 
 	getnstimeofday(&ts);
 	timestamp = timespec_to_ns(&ts);
 
-	do_posix_clock_monotonic_gettime(&ts);
-	// monotonic_to_bootbased is not defined for some versions of Android
-	m2b = symbol_get(monotonic_to_bootbased);
-	if (m2b) {
-		m2b(&ts);
+	/* Similar to reading /proc/uptime from fs/proc/uptime.c, calculate uptime */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0)
+	{
+		void (*m2b)(struct timespec *ts);
+
+		do_posix_clock_monotonic_gettime(&ts);
+		/* monotonic_to_bootbased is not defined for some versions of Android */
+		m2b = symbol_get(monotonic_to_bootbased);
+		if (m2b)
+			m2b(&ts);
 	}
+#else
+	get_monotonic_boottime(&ts);
+#endif
 	uptime = timespec_to_ns(&ts);
 
-	// Disable preemption as gator_get_time calls smp_processor_id to verify time is monotonic
+	/* Disable preemption as gator_get_time calls smp_processor_id to verify time is monotonic */
 	preempt_disable();
-	// Set monotonic_started to zero as gator_get_time is uptime minus monotonic_started
+	/* Set monotonic_started to zero as gator_get_time is uptime minus monotonic_started */
 	gator_monotonic_started = 0;
 	gator_monotonic_started = gator_get_time();
 
 	marshal_summary(timestamp, uptime, gator_monotonic_started, uname_buf);
+	gator_sync_time = 0;
+	gator_emit_perf_time(gator_monotonic_started);	
 	preempt_enable();
 }
 
@@ -846,12 +880,14 @@ int gator_events_install(struct gator_interface *interface)
 
 int gator_events_get_key(void)
 {
-	// key 0 is reserved as a timestamp
-	// key 1 is reserved as the marker for thread specific counters
-	// Odd keys are assigned by the driver, even keys by the daemon
+	/* key 0 is reserved as a timestamp. key 1 is reserved as the marker
+	 * for thread specific counters. key 2 is reserved as the marker for
+	 * core. Odd keys are assigned by the driver, even keys by the
+	 * daemon.
+	 */
 	static int key = 3;
-
 	const int ret = key;
+
 	key += 2;
 	return ret;
 }
@@ -862,7 +898,7 @@ static int gator_init(void)
 
 	calc_first_cluster_size();
 
-	// events sources
+	/* events sources */
 	for (i = 0; i < ARRAY_SIZE(gator_events_list); i++)
 		if (gator_events_list[i])
 			gator_events_list[i]();
@@ -888,26 +924,25 @@ static int gator_start(void)
 	struct gator_interface *gi;
 
 	gator_buffer_wake_run = true;
-	if (IS_ERR(gator_buffer_wake_thread = kthread_run(gator_buffer_wake_func, NULL, "gator_bwake"))) {
+	gator_buffer_wake_thread = kthread_run(gator_buffer_wake_func, NULL, "gator_bwake");
+	if (IS_ERR(gator_buffer_wake_thread))
 		goto bwake_failure;
-	}
 
 	if (gator_migrate_start())
 		goto migrate_failure;
 
-	// Initialize the buffer with the frame type and core
+	/* Initialize the buffer with the frame type and core */
 	for_each_present_cpu(cpu) {
-		for (i = 0; i < NUM_GATOR_BUFS; i++) {
+		for (i = 0; i < NUM_GATOR_BUFS; i++)
 			marshal_frame(cpu, i);
-		}
 		per_cpu(last_timestamp, cpu) = 0;
 	}
 	printed_monotonic_warning = false;
 
-	// Capture the start time
+	/* Capture the start time */
 	gator_summary();
 
-	// start all events
+	/* start all events */
 	list_for_each_entry(gi, &gator_events, list) {
 		if (gi->start && gi->start() != 0) {
 			struct list_head *ptr = gi->list.prev;
@@ -924,7 +959,7 @@ static int gator_start(void)
 		}
 	}
 
-	// cookies shall be initialized before trace_sched_start() and gator_timer_start()
+	/* cookies shall be initialized before trace_sched_start() and gator_timer_start() */
 	if (cookies_initialize())
 		goto cookies_failure;
 	if (gator_annotate_start())
@@ -955,7 +990,7 @@ static int gator_start(void)
 annotate_failure:
 	cookies_release();
 cookies_failure:
-	// stop all events
+	/* stop all events */
 	list_for_each_entry(gi, &gator_events, list)
 		if (gi->stop)
 			gi->stop();
@@ -979,11 +1014,11 @@ static void gator_stop(void)
 	gator_trace_power_stop();
 	gator_trace_gpu_stop();
 
-	// stop all interrupt callback reads before tearing down other interfaces
-	gator_notifier_stop();	// should be called before gator_timer_stop to avoid re-enabling the hrtimer after it has been offlined
+	/* stop all interrupt callback reads before tearing down other interfaces */
+	gator_notifier_stop();	/* should be called before gator_timer_stop to avoid re-enabling the hrtimer after it has been offlined */
 	gator_timer_stop();
 
-	// stop all events
+	/* stop all events */
 	list_for_each_entry(gi, &gator_events, list)
 		if (gi->stop)
 			gi->stop();
@@ -1033,9 +1068,9 @@ static int gator_op_setup(void)
 	gator_buffer_size[ACTIVITY_BUF] = ACTIVITY_BUFFER_SIZE;
 	gator_buffer_mask[ACTIVITY_BUF] = ACTIVITY_BUFFER_SIZE - 1;
 
-	// Initialize percpu per buffer variables
+	/* Initialize percpu per buffer variables */
 	for (i = 0; i < NUM_GATOR_BUFS; i++) {
-		// Verify buffers are a power of 2
+		/* Verify buffers are a power of 2 */
 		if (gator_buffer_size[i] & (gator_buffer_size[i] - 1)) {
 			err = -ENOEXEC;
 			goto setup_error;
@@ -1048,7 +1083,7 @@ static int gator_op_setup(void)
 			per_cpu(buffer_space_available, cpu)[i] = true;
 			per_cpu(gator_buffer_commit_time, cpu) = gator_live_rate;
 
-			// Annotation is a special case that only uses a single buffer
+			/* Annotation is a special case that only uses a single buffer */
 			if (cpu > 0 && i == ANNOTATE_BUF) {
 				per_cpu(gator_buffer, cpu)[i] = NULL;
 				continue;
@@ -1188,7 +1223,8 @@ static int userspace_buffer_open(struct inode *inode, struct file *file)
 	if (test_and_set_bit_lock(0, &gator_buffer_opened))
 		return -EBUSY;
 
-	if ((err = gator_op_setup()))
+	err = gator_op_setup();
+	if (err)
 		goto fail;
 
 	/* NB: the actual start happens from userspace
@@ -1218,22 +1254,20 @@ static ssize_t userspace_buffer_read(struct file *file, char __user *buf, size_t
 	int cpu, buftype;
 	int written = 0;
 
-	// ensure there is enough space for a whole frame
-	if (count < userspace_buffer_size || *offset) {
+	/* ensure there is enough space for a whole frame */
+	if (count < userspace_buffer_size || *offset)
 		return -EINVAL;
-	}
 
-	// sleep until the condition is true or a signal is received
-	// the condition is checked each time gator_buffer_wait is woken up
+	/* sleep until the condition is true or a signal is received the
+	 * condition is checked each time gator_buffer_wait is woken up
+	 */
 	wait_event_interruptible(gator_buffer_wait, buffer_commit_ready(&cpu, &buftype) || !gator_started);
 
-	if (signal_pending(current)) {
+	if (signal_pending(current))
 		return -EINTR;
-	}
 
-	if (buftype == -1 || cpu == -1) {
+	if (buftype == -1 || cpu == -1)
 		return 0;
-	}
 
 	mutex_lock(&gator_buffer_mutex);
 
@@ -1241,12 +1275,11 @@ static ssize_t userspace_buffer_read(struct file *file, char __user *buf, size_t
 		read = per_cpu(gator_buffer_read, cpu)[buftype];
 		commit = per_cpu(gator_buffer_commit, cpu)[buftype];
 
-		// May happen if the buffer is freed during pending reads.
-		if (!per_cpu(gator_buffer, cpu)[buftype]) {
+		/* May happen if the buffer is freed during pending reads. */
+		if (!per_cpu(gator_buffer, cpu)[buftype])
 			break;
-		}
 
-		// determine the size of two halves
+		/* determine the size of two halves */
 		length1 = commit - read;
 		length2 = 0;
 		buffer1 = &(per_cpu(gator_buffer, cpu)[buftype][read]);
@@ -1256,32 +1289,28 @@ static ssize_t userspace_buffer_read(struct file *file, char __user *buf, size_t
 			length2 = commit;
 		}
 
-		if (length1 + length2 > count - written) {
+		if (length1 + length2 > count - written)
 			break;
-		}
 
-		// start, middle or end
-		if (length1 > 0 && copy_to_user(&buf[written], buffer1, length1)) {
+		/* start, middle or end */
+		if (length1 > 0 && copy_to_user(&buf[written], buffer1, length1))
 			break;
-		}
 
-		// possible wrap around
-		if (length2 > 0 && copy_to_user(&buf[written + length1], buffer2, length2)) {
+		/* possible wrap around */
+		if (length2 > 0 && copy_to_user(&buf[written + length1], buffer2, length2))
 			break;
-		}
 
 		per_cpu(gator_buffer_read, cpu)[buftype] = commit;
 		written += length1 + length2;
 
-		// Wake up annotate_write if more space is available
-		if (buftype == ANNOTATE_BUF) {
+		/* Wake up annotate_write if more space is available */
+		if (buftype == ANNOTATE_BUF)
 			wake_up(&gator_annotate_wait);
-		}
 	} while (buffer_commit_ready(&cpu, &buftype));
 
 	mutex_unlock(&gator_buffer_mutex);
 
-	// kick just in case we've lost an SMP event
+	/* kick just in case we've lost an SMP event */
 	wake_up(&gator_buffer_wait);
 
 	return written > 0 ? written : -EFAULT;
@@ -1348,19 +1377,19 @@ static void gator_op_create_files(struct super_block *sb, struct dentry *root)
 	gatorfs_create_ro_u64(sb, root, "started", &gator_monotonic_started);
 	gatorfs_create_u64(sb, root, "live_rate", &gator_live_rate);
 
-	// Annotate interface
+	/* Annotate interface */
 	gator_annotate_create_files(sb, root);
 
-	// Linux Events
+	/* Linux Events */
 	dir = gatorfs_mkdir(sb, root, "events");
 	list_for_each_entry(gi, &gator_events, list)
 		if (gi->create_files)
 			gi->create_files(sb, dir);
 
-	// Sched Events
+	/* Sched Events */
 	sched_trace_create_files(sb, dir);
 
-	// Power interface
+	/* Power interface */
 	gator_trace_power_create_files(sb, dir);
 }
 
@@ -1396,19 +1425,22 @@ static void gator_op_create_files(struct super_block *sb, struct dentry *root)
 	GATOR_HANDLE_TRACEPOINT(sched_process_free); \
 	GATOR_HANDLE_TRACEPOINT(sched_switch); \
 	GATOR_HANDLE_TRACEPOINT(softirq_exit); \
+	GATOR_HANDLE_TRACEPOINT(task_rename); \
 
 #define GATOR_HANDLE_TRACEPOINT(probe_name) \
 	struct tracepoint *gator_tracepoint_##probe_name
 GATOR_TRACEPOINTS;
 #undef GATOR_HANDLE_TRACEPOINT
 
-static void gator_fct(struct tracepoint *tp, void *priv)
+static void gator_save_tracepoint(struct tracepoint *tp, void *priv)
 {
 #define GATOR_HANDLE_TRACEPOINT(probe_name) \
-	if (strcmp(tp->name, #probe_name) == 0) { \
-		gator_tracepoint_##probe_name = tp; \
-		return; \
-	}
+	do { \
+		if (strcmp(tp->name, #probe_name) == 0) { \
+			gator_tracepoint_##probe_name = tp; \
+			return; \
+		} \
+	} while (0)
 GATOR_TRACEPOINTS;
 #undef GATOR_HANDLE_TRACEPOINT
 }
@@ -1421,11 +1453,10 @@ GATOR_TRACEPOINTS;
 
 static int __init gator_module_init(void)
 {
-	for_each_kernel_tracepoint(gator_fct, NULL);
+	for_each_kernel_tracepoint(gator_save_tracepoint, NULL);
 
-	if (gatorfs_register()) {
+	if (gatorfs_register())
 		return -1;
-	}
 
 	if (gator_init()) {
 		gatorfs_unregister();
@@ -1434,7 +1465,7 @@ static int __init gator_module_init(void)
 
 	setup_timer(&gator_buffer_wake_up_timer, gator_buffer_wake_up, 0);
 
-	// Initialize the list of cpuids
+	/* Initialize the list of cpuids */
 	memset(gator_cpuids, -1, sizeof(gator_cpuids));
 	on_each_cpu(gator_read_cpuid, NULL, 1);
 
diff --git a/drivers/gator/gator_marshaling.c b/drivers/gator/gator_marshaling.c
index 97b4ae6f9d4d..0d1167643642 100644
--- a/drivers/gator/gator_marshaling.c
+++ b/drivers/gator/gator_marshaling.c
@@ -23,7 +23,7 @@
 #include "gator_events_mali_common.h"
 #endif
 
-static void marshal_summary(long long timestamp, long long uptime, long long monotonic_delta, const char * uname)
+static void marshal_summary(long long timestamp, long long uptime, long long monotonic_delta, const char *uname)
 {
 	unsigned long flags;
 	int cpu = 0;
@@ -40,19 +40,27 @@ static void marshal_summary(long long timestamp, long long uptime, long long mon
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "iks");
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
 #endif
-	// Let Streamline know which GPU is used so that it can label the GPU Activity appropriately. This is a temporary fix, to be improved in a future release.
+#ifdef CONFIG_PREEMPT_RTB
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "preempt_rtb");
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
+#endif
+#ifdef CONFIG_PREEMPT_RT_FULL
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "preempt_rt_full");
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
+#endif
+	/* Let Streamline know which GPU is used so that it can label the GPU Activity appropriately. This is a temporary fix, to be improved in a future release. */
 #ifdef MALI_SUPPORT
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "mali_type");
 #if (MALI_SUPPORT == MALI_4xx)
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "4xx");
-#elif (MALI_SUPPORT == MALI_T6xx)
+#elif (MALI_SUPPORT == MALI_MIDGARD)
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "6xx");
 #else
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "unknown");
 #endif
 #endif
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
-	// Commit the buffer now so it can be one of the first frames read by Streamline
+	/* Commit the buffer now so it can be one of the first frames read by Streamline */
 	local_irq_restore(flags);
 	gator_commit_buffer(cpu, SUMMARY_BUF, gator_get_time());
 }
@@ -60,13 +68,14 @@ static void marshal_summary(long long timestamp, long long uptime, long long mon
 static bool marshal_cookie_header(const char *text)
 {
 	int cpu = get_physical_cpu();
+
 	return buffer_check_space(cpu, NAME_BUF, strlen(text) + 3 * MAXSIZE_PACK32);
 }
 
 static void marshal_cookie(int cookie, const char *text)
 {
 	int cpu = get_physical_cpu();
-	// buffer_check_space already called by marshal_cookie_header
+	/* buffer_check_space already called by marshal_cookie_header */
 	gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_COOKIE);
 	gator_buffer_write_packed_int(cpu, NAME_BUF, cookie);
 	gator_buffer_write_string(cpu, NAME_BUF, text);
@@ -77,6 +86,7 @@ static void marshal_thread_name(int pid, char *name)
 {
 	unsigned long flags, cpu;
 	u64 time;
+
 	local_irq_save(flags);
 	cpu = get_physical_cpu();
 	time = gator_get_time();
@@ -105,15 +115,16 @@ static void marshal_link(int cookie, int tgid, int pid)
 		gator_buffer_write_packed_int(cpu, NAME_BUF, pid);
 	}
 	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
 	buffer_check(cpu, NAME_BUF, time);
 }
 
 static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, u64 time)
 {
 	int cpu = get_physical_cpu();
+
 	if (!buffer_check_space(cpu, BACKTRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32 + gator_backtrace_depth * 2 * MAXSIZE_PACK32)) {
-		// Check and commit; commit is set to occur once buffer is 3/4 full
+		/* Check and commit; commit is set to occur once buffer is 3/4 full */
 		buffer_check(cpu, BACKTRACE_BUF, time);
 
 		return false;
@@ -130,9 +141,9 @@ static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, u64 tim
 static void marshal_backtrace(unsigned long address, int cookie, int in_kernel)
 {
 	int cpu = get_physical_cpu();
-	if (cookie == 0 && !in_kernel) {
+
+	if (cookie == 0 && !in_kernel)
 		cookie = UNRESOLVED_COOKIE;
-	}
 	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, cookie);
 	gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, address);
 }
@@ -140,9 +151,10 @@ static void marshal_backtrace(unsigned long address, int cookie, int in_kernel)
 static void marshal_backtrace_footer(u64 time)
 {
 	int cpu = get_physical_cpu();
+
 	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, MESSAGE_END_BACKTRACE);
 
-	// Check and commit; commit is set to occur once buffer is 3/4 full
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
 	buffer_check(cpu, BACKTRACE_BUF, time);
 }
 
@@ -153,7 +165,7 @@ static bool marshal_event_header(u64 time)
 
 	local_irq_save(flags);
 	if (buffer_check_space(cpu, BLOCK_COUNTER_BUF, MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
-		gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, 0);	// key of zero indicates a timestamp
+		gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, 0);	/* key of zero indicates a timestamp */
 		gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, time);
 		retval = true;
 	}
@@ -169,18 +181,17 @@ static void marshal_event(int len, int *buffer)
 	if (len <= 0)
 		return;
 
-	// length must be even since all data is a (key, value) pair
+	/* length must be even since all data is a (key, value) pair */
 	if (len & 0x1) {
-		pr_err("gator: invalid counter data detected and discarded");
+		pr_err("gator: invalid counter data detected and discarded\n");
 		return;
 	}
 
-	// events must be written in key,value pairs
+	/* events must be written in key,value pairs */
 	local_irq_save(flags);
 	for (i = 0; i < len; i += 2) {
-		if (!buffer_check_space(cpu, BLOCK_COUNTER_BUF, 2 * MAXSIZE_PACK32)) {
+		if (!buffer_check_space(cpu, BLOCK_COUNTER_BUF, 2 * MAXSIZE_PACK32))
 			break;
-		}
 		gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, buffer[i]);
 		gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, buffer[i + 1]);
 	}
@@ -194,26 +205,24 @@ static void marshal_event64(int len, long long *buffer64)
 	if (len <= 0)
 		return;
 
-	// length must be even since all data is a (key, value) pair
+	/* length must be even since all data is a (key, value) pair */
 	if (len & 0x1) {
-		pr_err("gator: invalid counter data detected and discarded");
+		pr_err("gator: invalid counter data detected and discarded\n");
 		return;
 	}
 
-	// events must be written in key,value pairs
+	/* events must be written in key,value pairs */
 	local_irq_save(flags);
 	for (i = 0; i < len; i += 2) {
-		if (!buffer_check_space(cpu, BLOCK_COUNTER_BUF, 2 * MAXSIZE_PACK64)) {
+		if (!buffer_check_space(cpu, BLOCK_COUNTER_BUF, 2 * MAXSIZE_PACK64))
 			break;
-		}
 		gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, buffer64[i]);
 		gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, buffer64[i + 1]);
 	}
 	local_irq_restore(flags);
 }
 
-#if GATOR_CPU_FREQ_SUPPORT
-static void marshal_event_single(int core, int key, int value)
+static void __maybe_unused marshal_event_single(int core, int key, int value)
 {
 	unsigned long flags, cpu;
 	u64 time;
@@ -228,11 +237,11 @@ static void marshal_event_single(int core, int key, int value)
 		gator_buffer_write_packed_int(cpu, COUNTER_BUF, value);
 	}
 	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
 	buffer_check(cpu, COUNTER_BUF, time);
 }
 
-static void marshal_event_single64(int core, int key, long long value)
+static void __maybe_unused marshal_event_single64(int core, int key, long long value)
 {
 	unsigned long flags, cpu;
 	u64 time;
@@ -247,10 +256,9 @@ static void marshal_event_single64(int core, int key, long long value)
 		gator_buffer_write_packed_int64(cpu, COUNTER_BUF, value);
 	}
 	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
 	buffer_check(cpu, COUNTER_BUF, time);
 }
-#endif
 
 static void marshal_sched_trace_switch(int pid, int state)
 {
@@ -269,7 +277,7 @@ static void marshal_sched_trace_switch(int pid, int state)
 		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, state);
 	}
 	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
 	buffer_check(cpu, SCHED_TRACE_BUF, time);
 }
 
@@ -289,7 +297,7 @@ static void marshal_sched_trace_exit(int tgid, int pid)
 		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
 	}
 	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
 	buffer_check(cpu, SCHED_TRACE_BUF, time);
 }
 
@@ -308,7 +316,7 @@ static void marshal_idle(int core, int state)
 		gator_buffer_write_packed_int(cpu, IDLE_BUF, core);
 	}
 	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
 	buffer_check(cpu, IDLE_BUF, time);
 }
 #endif
@@ -318,6 +326,7 @@ static void marshal_core_name(const int core, const int cpuid, const char *name)
 {
 	int cpu = get_physical_cpu();
 	unsigned long flags;
+
 	local_irq_save(flags);
 	if (buffer_check_space(cpu, SUMMARY_BUF, MAXSIZE_PACK32 + MAXSIZE_CORE_NAME)) {
 		gator_buffer_write_packed_int(cpu, SUMMARY_BUF, MESSAGE_CORE_NAME);
@@ -325,7 +334,7 @@ static void marshal_core_name(const int core, const int cpuid, const char *name)
 		gator_buffer_write_packed_int(cpu, SUMMARY_BUF, cpuid);
 		gator_buffer_write_string(cpu, SUMMARY_BUF, name);
 	}
-	// Commit core names now so that they can show up in live
+	/* Commit core names now so that they can show up in live */
 	local_irq_restore(flags);
 	gator_commit_buffer(cpu, SUMMARY_BUF, gator_get_time());
 }
@@ -351,12 +360,12 @@ static void marshal_activity_switch(int core, int key, int activity, int pid, in
 		gator_buffer_write_packed_int(cpu, ACTIVITY_BUF, state);
 	}
 	local_irq_restore(flags);
-	// Check and commit; commit is set to occur once buffer is 3/4 full
+	/* Check and commit; commit is set to occur once buffer is 3/4 full */
 	buffer_check(cpu, ACTIVITY_BUF, time);
 }
 
 void gator_marshal_activity_switch(int core, int key, int activity, int pid)
 {
-	// state is reserved for cpu use only
+	/* state is reserved for cpu use only */
 	marshal_activity_switch(core, key, activity, pid, 0);
 }
diff --git a/drivers/gator/gator_trace_gpu.c b/drivers/gator/gator_trace_gpu.c
index a8b9e7d61ece..5de9152e365a 100644
--- a/drivers/gator/gator_trace_gpu.c
+++ b/drivers/gator/gator_trace_gpu.c
@@ -58,11 +58,12 @@ struct mali_activity {
 static struct mali_activity mali_activities[NUMBER_OF_GPU_UNITS*NUMBER_OF_GPU_CORES];
 static DEFINE_SPINLOCK(mali_activities_lock);
 
-/* Only one event should be running on a unit and core at a time (ie, a start
- * event can only be followed by a stop and vice versa), but because the kernel
- * only knows when a job is enqueued and not started, it is possible for a
- * start1, start2, stop1, stop2. Change it back into start1, stop1, start2,
- * stop2 by queueing up start2 and releasing it when stop1 is received.
+/* Only one event should be running on a unit and core at a time (ie,
+ * a start event can only be followed by a stop and vice versa), but
+ * because the kernel only knows when a job is enqueued and not
+ * started, it is possible for a start1, start2, stop1, stop2. Change
+ * it back into start1, stop1, start2, stop2 by queueing up start2 and
+ * releasing it when stop1 is received.
  */
 
 static int mali_activity_index(int core, int key)
@@ -70,9 +71,8 @@ static int mali_activity_index(int core, int key)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(mali_activities); ++i) {
-		if ((mali_activities[i].core == core) && (mali_activities[i].key == key)) {
+		if ((mali_activities[i].core == core) && (mali_activities[i].key == key))
 			break;
-		}
 		if ((mali_activities[i].core == 0) && (mali_activities[i].key == 0)) {
 			mali_activities[i].core = core;
 			mali_activities[i].key = key;
@@ -101,9 +101,8 @@ static void mali_activity_enqueue(int core, int key, int activity, int pid)
 	}
 	spin_unlock(&mali_activities_lock);
 
-	if (!count) {
+	if (!count)
 		gator_marshal_activity_switch(core, key, activity, pid);
-	}
 }
 
 static void mali_activity_stop(int core, int key)
@@ -129,12 +128,11 @@ static void mali_activity_stop(int core, int key)
 	spin_unlock(&mali_activities_lock);
 
 	gator_marshal_activity_switch(core, key, 0, 0);
-	if (count) {
+	if (count)
 		gator_marshal_activity_switch(core, key, last_activity, last_pid);
-	}
 }
 
-void mali_activity_clear(mali_counter mali_activity[], size_t mali_activity_size)
+void mali_activity_clear(struct mali_counter mali_activity[], size_t mali_activity_size)
 {
 	int activity;
 	int cores;
@@ -142,12 +140,13 @@ void mali_activity_clear(mali_counter mali_activity[], size_t mali_activity_size
 
 	for (activity = 0; activity < mali_activity_size; ++activity) {
 		cores = mali_activity[activity].cores;
-		if (cores < 0) {
+		if (cores < 0)
 			cores = 1;
-		}
 		for (core = 0; core < cores; ++core) {
 			if (mali_activity[activity].enabled) {
+				preempt_disable();
 				gator_marshal_activity_switch(core, mali_activity[activity].key, 0, 0);
+				preempt_enable();
 			}
 		}
 	}
@@ -155,7 +154,7 @@ void mali_activity_clear(mali_counter mali_activity[], size_t mali_activity_size
 
 #endif
 
-#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_MIDGARD)
 #include "gator_events_mali_4xx.h"
 
 /*
@@ -183,40 +182,36 @@ enum {
 	EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE = 1,
 };
 
-mali_counter mali_activity[2];
+struct mali_counter mali_activity[2];
 
 GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned int d2, unsigned int d3, unsigned int d4))
 {
 	unsigned int component, state;
 
-	// do as much work as possible before disabling interrupts
-	component = (event_id >> 16) & 0xFF;	// component is an 8-bit field
-	state = (event_id >> 24) & 0xF;	// state is a 4-bit field
+	/* do as much work as possible before disabling interrupts */
+	component = (event_id >> 16) & 0xFF;	/* component is an 8-bit field */
+	state = (event_id >> 24) & 0xF;	/* state is a 4-bit field */
 
 	switch (state) {
 	case EVENT_TYPE_START:
 		if (component == EVENT_CHANNEL_VP0) {
 			/* tgid = d0; pid = d1; */
-			if (mali_activity[1].enabled) {
+			if (mali_activity[1].enabled)
 				mali_activity_enqueue(0, mali_activity[1].key, 1, d1);
-			}
 		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
 			/* tgid = d0; pid = d1; */
-			if (mali_activity[0].enabled) {
+			if (mali_activity[0].enabled)
 				mali_activity_enqueue(component - EVENT_CHANNEL_FP0, mali_activity[0].key, 1, d1);
-			}
 		}
 		break;
 
 	case EVENT_TYPE_STOP:
 		if (component == EVENT_CHANNEL_VP0) {
-			if (mali_activity[1].enabled) {
+			if (mali_activity[1].enabled)
 				mali_activity_stop(0, mali_activity[1].key);
-			}
 		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
-			if (mali_activity[0].enabled) {
+			if (mali_activity[0].enabled)
 				mali_activity_stop(component - EVENT_CHANNEL_FP0, mali_activity[0].key);
-			}
 		}
 		break;
 
@@ -224,9 +219,8 @@ GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned
 		if (component == EVENT_CHANNEL_GPU) {
 			unsigned int reason = (event_id & 0xffff);
 
-			if (reason == EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE) {
+			if (reason == EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE)
 				gator_events_mali_log_dvfs_event(d0, d1);
-			}
 		}
 		break;
 
@@ -236,9 +230,9 @@ GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned
 }
 #endif
 
-#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_MIDGARD)
 
-mali_counter mali_activity[3];
+struct mali_counter mali_activity[3];
 
 #if defined(MALI_JOB_SLOTS_EVENT_CHANGED)
 GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, unsigned char job_id))
@@ -251,8 +245,8 @@ GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigne
 	unsigned char job_id = 0;
 #endif
 
-	component = (event_id >> 16) & 0xFF;	// component is an 8-bit field
-	state = (event_id >> 24) & 0xF;	// state is a 4-bit field
+	component = (event_id >> 16) & 0xFF;	/* component is an 8-bit field */
+	state = (event_id >> 24) & 0xF;	/* state is a 4-bit field */
 
 	switch (component) {
 	case 0:
@@ -271,15 +265,13 @@ GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigne
 	if (unit != GPU_UNIT_NONE) {
 		switch (state) {
 		case EVENT_TYPE_START:
-			if (mali_activity[component].enabled) {
+			if (mali_activity[component].enabled)
 				mali_activity_enqueue(0, mali_activity[component].key, 1, (pid != 0 ? pid : tgid));
-			}
 			break;
 		case EVENT_TYPE_STOP:
-		default: // Some jobs can be soft-stopped, so ensure that this terminates the activity trace.
-			if (mali_activity[component].enabled) {
+		default: /* Some jobs can be soft-stopped, so ensure that this terminates the activity trace. */
+			if (mali_activity[component].enabled)
 				mali_activity_stop(0, mali_activity[component].key);
-			}
 			break;
 		}
 	}
@@ -298,18 +290,16 @@ static int gator_trace_gpu_start(void)
 #endif
 	mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
 
-#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_MIDGARD)
 	mali_activity_clear(mali_activity, ARRAY_SIZE(mali_activity));
-	if (!GATOR_REGISTER_TRACE(mali_timeline_event)) {
+	if (!GATOR_REGISTER_TRACE(mali_timeline_event))
 		mali_timeline_trace_registered = 1;
-	}
 #endif
 
-#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_MIDGARD)
 	mali_activity_clear(mali_activity, ARRAY_SIZE(mali_activity));
-	if (!GATOR_REGISTER_TRACE(mali_job_slots_event)) {
+	if (!GATOR_REGISTER_TRACE(mali_job_slots_event))
 		mali_job_slots_trace_registered = 1;
-	}
 #endif
 
 	return 0;
@@ -317,16 +307,14 @@ static int gator_trace_gpu_start(void)
 
 static void gator_trace_gpu_stop(void)
 {
-#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
-	if (mali_timeline_trace_registered) {
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_MIDGARD)
+	if (mali_timeline_trace_registered)
 		GATOR_UNREGISTER_TRACE(mali_timeline_event);
-	}
 #endif
 
-#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
-	if (mali_job_slots_trace_registered) {
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_MIDGARD)
+	if (mali_job_slots_trace_registered)
 		GATOR_UNREGISTER_TRACE(mali_job_slots_event);
-	}
 #endif
 
 	mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
diff --git a/drivers/gator/gator_trace_power.c b/drivers/gator/gator_trace_power.c
index f2754b1c2b56..46e04b29a187 100644
--- a/drivers/gator/gator_trace_power.c
+++ b/drivers/gator/gator_trace_power.c
@@ -22,18 +22,20 @@
 
 #endif
 
-// cpu_frequency and cpu_idle trace points were introduced in Linux kernel v2.6.38
-// the now deprecated power_frequency trace point was available prior to 2.6.38, but only for x86
+/* cpu_frequency and cpu_idle trace points were introduced in Linux
+ * kernel v2.6.38 the now deprecated power_frequency trace point was
+ * available prior to 2.6.38, but only for x86
+ */
 #if GATOR_CPU_FREQ_SUPPORT
 enum {
 	POWER_CPU_FREQ,
-	POWER_CPU_IDLE,
 	POWER_TOTAL
 };
 
 static DEFINE_PER_CPU(ulong, idle_prev_state);
 static ulong power_cpu_enabled[POWER_TOTAL];
 static ulong power_cpu_key[POWER_TOTAL];
+static ulong power_cpu_cores;
 
 static int gator_trace_power_create_files(struct super_block *sb, struct dentry *root)
 {
@@ -41,8 +43,9 @@ static int gator_trace_power_create_files(struct super_block *sb, struct dentry
 	int cpu;
 	bool found_nonzero_freq = false;
 
-	// Even if CONFIG_CPU_FREQ is defined, it still may not be used. Check
-	// for non-zero values from cpufreq_quick_get
+	/* Even if CONFIG_CPU_FREQ is defined, it still may not be
+	 * used. Check for non-zero values from cpufreq_quick_get
+	 */
 	for_each_online_cpu(cpu) {
 		if (cpufreq_quick_get(cpu) > 0) {
 			found_nonzero_freq = true;
@@ -51,27 +54,18 @@ static int gator_trace_power_create_files(struct super_block *sb, struct dentry
 	}
 
 	if (found_nonzero_freq) {
-		// cpu_frequency
+		/* cpu_frequency */
 		dir = gatorfs_mkdir(sb, root, "Linux_power_cpu_freq");
-		if (!dir) {
+		if (!dir)
 			return -1;
-		}
 		gatorfs_create_ulong(sb, dir, "enabled", &power_cpu_enabled[POWER_CPU_FREQ]);
 		gatorfs_create_ro_ulong(sb, dir, "key", &power_cpu_key[POWER_CPU_FREQ]);
 	}
 
-	// cpu_idle
-	dir = gatorfs_mkdir(sb, root, "Linux_power_cpu_idle");
-	if (!dir) {
-		return -1;
-	}
-	gatorfs_create_ulong(sb, dir, "enabled", &power_cpu_enabled[POWER_CPU_IDLE]);
-	gatorfs_create_ro_ulong(sb, dir, "key", &power_cpu_key[POWER_CPU_IDLE]);
-
 	return 0;
 }
 
-// 'cpu' may not equal smp_processor_id(), i.e. may not be running on the core that is having the freq/idle state change
+/* 'cpu' may not equal smp_processor_id(), i.e. may not be running on the core that is having the freq/idle state change */
 GATOR_DEFINE_PROBE(cpu_frequency, TP_PROTO(unsigned int frequency, unsigned int cpu))
 {
 	cpu = lcpu_to_pcpu(cpu);
@@ -82,56 +76,50 @@ GATOR_DEFINE_PROBE(cpu_idle, TP_PROTO(unsigned int state, unsigned int cpu))
 {
 	cpu = lcpu_to_pcpu(cpu);
 
-	if (state == per_cpu(idle_prev_state, cpu)) {
+	if (state == per_cpu(idle_prev_state, cpu))
 		return;
-	}
 
 	if (implements_wfi()) {
 		if (state == PWR_EVENT_EXIT) {
-			// transition from wfi to non-wfi
+			/* transition from wfi to non-wfi */
 			marshal_idle(cpu, MESSAGE_IDLE_EXIT);
 		} else {
-			// transition from non-wfi to wfi
+			/* transition from non-wfi to wfi */
 			marshal_idle(cpu, MESSAGE_IDLE_ENTER);
 		}
 	}
 
 	per_cpu(idle_prev_state, cpu) = state;
-
-	if (power_cpu_enabled[POWER_CPU_IDLE]) {
-		// Increment state so that no negative numbers are sent
-		marshal_event_single(cpu, power_cpu_key[POWER_CPU_IDLE], state + 1);
-	}
 }
 
 static void gator_trace_power_online(void)
 {
 	int pcpu = get_physical_cpu();
 	int lcpu = get_logical_cpu();
-	if (power_cpu_enabled[POWER_CPU_FREQ]) {
+
+	if (power_cpu_enabled[POWER_CPU_FREQ])
 		marshal_event_single64(pcpu, power_cpu_key[POWER_CPU_FREQ], cpufreq_quick_get(lcpu) * 1000L);
-	}
 }
 
 static void gator_trace_power_offline(void)
 {
-	// Set frequency to zero on an offline
+	/* Set frequency to zero on an offline */
 	int cpu = get_physical_cpu();
-	if (power_cpu_enabled[POWER_CPU_FREQ]) {
+
+	if (power_cpu_enabled[POWER_CPU_FREQ])
 		marshal_event_single(cpu, power_cpu_key[POWER_CPU_FREQ], 0);
-	}
 }
 
 static int gator_trace_power_start(void)
 {
 	int cpu;
 
-	// register tracepoints
+	/* register tracepoints */
 	if (power_cpu_enabled[POWER_CPU_FREQ])
 		if (GATOR_REGISTER_TRACE(cpu_frequency))
 			goto fail_cpu_frequency_exit;
 
-	// Always register for cpu:idle for detecting WFI, independent of power_cpu_enabled[POWER_CPU_IDLE]
+	/* Always register for cpu_idle for detecting WFI */
 	if (GATOR_REGISTER_TRACE(cpu_idle))
 		goto fail_cpu_idle_exit;
 	pr_debug("gator: registered power event tracepoints\n");
@@ -142,7 +130,7 @@ static int gator_trace_power_start(void)
 
 	return 0;
 
-	// unregister tracepoints on error
+	/* unregister tracepoints on error */
 fail_cpu_idle_exit:
 	if (power_cpu_enabled[POWER_CPU_FREQ])
 		GATOR_UNREGISTER_TRACE(cpu_frequency);
@@ -161,14 +149,15 @@ static void gator_trace_power_stop(void)
 	GATOR_UNREGISTER_TRACE(cpu_idle);
 	pr_debug("gator: unregistered power event tracepoints\n");
 
-	for (i = 0; i < POWER_TOTAL; i++) {
+	for (i = 0; i < POWER_TOTAL; i++)
 		power_cpu_enabled[i] = 0;
-	}
 }
 
 static void gator_trace_power_init(void)
 {
 	int i;
+
+	power_cpu_cores = nr_cpu_ids;
 	for (i = 0; i < POWER_TOTAL; i++) {
 		power_cpu_enabled[i] = 0;
 		power_cpu_key[i] = gator_events_get_key();
diff --git a/drivers/gator/gator_trace_sched.c b/drivers/gator/gator_trace_sched.c
index 655008628933..6d7cbd7348e1 100644
--- a/drivers/gator/gator_trace_sched.c
+++ b/drivers/gator/gator_trace_sched.c
@@ -8,6 +8,10 @@
  */
 
 #include <trace/events/sched.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+#include <trace/events/task.h>
+#endif
+
 #include "gator.h"
 
 #define TASK_MAP_ENTRIES		1024	/* must be power of 2 */
@@ -23,8 +27,10 @@ enum {
 static DEFINE_PER_CPU(uint64_t *, taskname_keys);
 static DEFINE_PER_CPU(int, collecting);
 
-// this array is never read as the cpu wait charts are derived counters
-// the files are needed, nonetheless, to show that these counters are available
+/* this array is never read as the cpu wait charts are derived
+ * counters the files are needed, nonetheless, to show that these
+ * counters are available
+ */
 static ulong cpu_wait_enabled[CPU_WAIT_TOTAL];
 static ulong sched_cpu_key[CPU_WAIT_TOTAL];
 
@@ -32,26 +38,24 @@ static int sched_trace_create_files(struct super_block *sb, struct dentry *root)
 {
 	struct dentry *dir;
 
-	// CPU Wait - Contention
+	/* CPU Wait - Contention */
 	dir = gatorfs_mkdir(sb, root, "Linux_cpu_wait_contention");
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 	gatorfs_create_ulong(sb, dir, "enabled", &cpu_wait_enabled[STATE_CONTENTION]);
 	gatorfs_create_ro_ulong(sb, dir, "key", &sched_cpu_key[STATE_CONTENTION]);
 
-	// CPU Wait - I/O
+	/* CPU Wait - I/O */
 	dir = gatorfs_mkdir(sb, root, "Linux_cpu_wait_io");
-	if (!dir) {
+	if (!dir)
 		return -1;
-	}
 	gatorfs_create_ulong(sb, dir, "enabled", &cpu_wait_enabled[STATE_WAIT_ON_IO]);
 	gatorfs_create_ro_ulong(sb, dir, "key", &sched_cpu_key[STATE_WAIT_ON_IO]);
 
 	return 0;
 }
 
-static void emit_pid_name(struct task_struct *task)
+static void emit_pid_name(const char *comm, struct task_struct *task)
 {
 	bool found = false;
 	char taskcomm[TASK_COMM_LEN + 3];
@@ -59,10 +63,10 @@ static void emit_pid_name(struct task_struct *task)
 	uint64_t *keys = &(per_cpu(taskname_keys, cpu)[(task->pid & 0xFF) * TASK_MAX_COLLISIONS]);
 	uint64_t value;
 
-	value = gator_chksum_crc32(task->comm);
+	value = gator_chksum_crc32(comm);
 	value = (value << 32) | (uint32_t)task->pid;
 
-	// determine if the thread name was emitted already
+	/* determine if the thread name was emitted already */
 	for (x = 0; x < TASK_MAX_COLLISIONS; x++) {
 		if (keys[x] == value) {
 			found = true;
@@ -71,17 +75,18 @@ static void emit_pid_name(struct task_struct *task)
 	}
 
 	if (!found) {
-		// shift values, new value always in front
+		/* shift values, new value always in front */
 		uint64_t oldv, newv = value;
+
 		for (x = 0; x < TASK_MAX_COLLISIONS; x++) {
 			oldv = keys[x];
 			keys[x] = newv;
 			newv = oldv;
 		}
 
-		// emit pid names, cannot use get_task_comm, as it's not exported on all kernel versions
-		if (strlcpy(taskcomm, task->comm, TASK_COMM_LEN) == TASK_COMM_LEN - 1) {
-			// append ellipses if task->comm has length of TASK_COMM_LEN - 1
+		/* emit pid names, cannot use get_task_comm, as it's not exported on all kernel versions */
+		if (strlcpy(taskcomm, comm, TASK_COMM_LEN) == TASK_COMM_LEN - 1) {
+			/* append ellipses if comm has length of TASK_COMM_LEN - 1 */
 			strcat(taskcomm, "...");
 		}
 
@@ -89,7 +94,7 @@ static void emit_pid_name(struct task_struct *task)
 	}
 }
 
-static void collect_counters(u64 time, struct task_struct *task)
+static void collect_counters(u64 time, struct task_struct *task, bool sched_switch)
 {
 	int *buffer, len, cpu = get_physical_cpu();
 	long long *buffer64;
@@ -98,7 +103,7 @@ static void collect_counters(u64 time, struct task_struct *task)
 	if (marshal_event_header(time)) {
 		list_for_each_entry(gi, &gator_events, list) {
 			if (gi->read) {
-				len = gi->read(&buffer);
+				len = gi->read(&buffer, sched_switch);
 				marshal_event(len, buffer);
 			} else if (gi->read64) {
 				len = gi->read64(&buffer64);
@@ -109,22 +114,26 @@ static void collect_counters(u64 time, struct task_struct *task)
 				marshal_event64(len, buffer64);
 			}
 		}
-		// Only check after writing all counters so that time and corresponding counters appear in the same frame
+		if (cpu == 0)
+			gator_emit_perf_time(time);
+		/* Only check after writing all counters so that time and corresponding counters appear in the same frame */
 		buffer_check(cpu, BLOCK_COUNTER_BUF, time);
 
-		// Commit buffers on timeout
+		/* Commit buffers on timeout */
 		if (gator_live_rate > 0 && time >= per_cpu(gator_buffer_commit_time, cpu)) {
 			static const int buftypes[] = { NAME_BUF, COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF, ACTIVITY_BUF };
 			int i;
 
-			for (i = 0; i < ARRAY_SIZE(buftypes); ++i) {
+			for (i = 0; i < ARRAY_SIZE(buftypes); ++i)
 				gator_commit_buffer(cpu, buftypes[i], time);
-			}
 
-			// spinlocks are noops on uniprocessor machines and mutexes do not work in sched_switch context in
-			// RT-Preempt full, so disable proactive flushing of the annotate frame on uniprocessor machines.
+			/* spinlocks are noops on uniprocessor machines and mutexes do
+			 * not work in sched_switch context in RT-Preempt full, so
+			 * disable proactive flushing of the annotate frame on
+			 * uniprocessor machines.
+			 */
 #ifdef CONFIG_SMP
-			// Try to preemptively flush the annotate buffer to reduce the chance of the buffer being full
+			/* Try to preemptively flush the annotate buffer to reduce the chance of the buffer being full */
 			if (on_primary_core() && spin_trylock(&annotate_lock)) {
 				gator_commit_buffer(0, ANNOTATE_BUF, time);
 				spin_unlock(&annotate_lock);
@@ -134,7 +143,7 @@ static void collect_counters(u64 time, struct task_struct *task)
 	}
 }
 
-// special case used during a suspend of the system
+/* special case used during a suspend of the system */
 static void trace_sched_insert_idle(void)
 {
 	marshal_sched_trace_switch(0, 0);
@@ -146,7 +155,7 @@ static void gator_trace_emit_link(struct task_struct *p)
 	int cpu = get_physical_cpu();
 
 	cookie = get_exec_cookie(cpu, p);
-	emit_pid_name(p);
+	emit_pid_name(p->comm, p);
 
 	marshal_link(cookie, p->tgid, p->pid);
 }
@@ -161,6 +170,15 @@ GATOR_DEFINE_PROBE(sched_process_exec, TP_PROTO(struct task_struct *p, pid_t old
 {
 	gator_trace_emit_link(p);
 }
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
+GATOR_DEFINE_PROBE(task_rename, TP_PROTO(struct task_struct *task, char *comm))
+#else
+GATOR_DEFINE_PROBE(task_rename, TP_PROTO(struct task_struct *task, const char *comm))
+#endif
+{
+	emit_pid_name(comm, task);
+}
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
@@ -174,17 +192,16 @@ GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_
 
 	per_cpu(in_scheduler_context, cpu) = true;
 
-	// do as much work as possible before disabling interrupts
-	if (prev->state == TASK_RUNNING) {
+	/* do as much work as possible before disabling interrupts */
+	if (prev->state == TASK_RUNNING)
 		state = STATE_CONTENTION;
-	} else if (prev->in_iowait) {
+	else if (prev->in_iowait)
 		state = STATE_WAIT_ON_IO;
-	} else {
+	else
 		state = STATE_WAIT_ON_OTHER;
-	}
 
 	per_cpu(collecting, cpu) = 1;
-	collect_counters(gator_get_time(), prev);
+	collect_counters(gator_get_time(), prev, true);
 	per_cpu(collecting, cpu) = 0;
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
@@ -202,18 +219,20 @@ GATOR_DEFINE_PROBE(sched_process_free, TP_PROTO(struct task_struct *p))
 
 static void do_nothing(void *info)
 {
-	// Intentionally do nothing
+	/* Intentionally do nothing */
 	(void)info;
 }
 
 static int register_scheduler_tracepoints(void)
 {
-	// register tracepoints
+	/* register tracepoints */
 	if (GATOR_REGISTER_TRACE(sched_process_fork))
 		goto fail_sched_process_fork;
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
 	if (GATOR_REGISTER_TRACE(sched_process_exec))
 		goto fail_sched_process_exec;
+	if (GATOR_REGISTER_TRACE(task_rename))
+		goto fail_task_rename;
 #endif
 	if (GATOR_REGISTER_TRACE(sched_switch))
 		goto fail_sched_switch;
@@ -221,21 +240,24 @@ static int register_scheduler_tracepoints(void)
 		goto fail_sched_process_free;
 	pr_debug("gator: registered tracepoints\n");
 
-	// Now that the scheduler tracepoint is registered, force a context switch
-	// on all cpus to capture what is currently running.
+	/* Now that the scheduler tracepoint is registered, force a context
+	 * switch on all cpus to capture what is currently running.
+	 */
 	on_each_cpu(do_nothing, NULL, 0);
 
 	return 0;
 
-	// unregister tracepoints on error
+	/* unregister tracepoints on error */
 fail_sched_process_free:
 	GATOR_UNREGISTER_TRACE(sched_switch);
 fail_sched_switch:
-	GATOR_UNREGISTER_TRACE(sched_process_fork);
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
-fail_sched_process_exec:
+	GATOR_UNREGISTER_TRACE(task_rename);
+fail_task_rename:
 	GATOR_UNREGISTER_TRACE(sched_process_exec);
+fail_sched_process_exec:
 #endif
+	GATOR_UNREGISTER_TRACE(sched_process_fork);
 fail_sched_process_fork:
 	pr_err("gator: tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
 
@@ -247,6 +269,7 @@ static void unregister_scheduler_tracepoints(void)
 	GATOR_UNREGISTER_TRACE(sched_process_fork);
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
 	GATOR_UNREGISTER_TRACE(sched_process_exec);
+	GATOR_UNREGISTER_TRACE(task_rename);
 #endif
 	GATOR_UNREGISTER_TRACE(sched_switch);
 	GATOR_UNREGISTER_TRACE(sched_process_free);
@@ -271,7 +294,7 @@ static int gator_trace_sched_start(void)
 
 	for_each_present_cpu(cpu) {
 		size = TASK_MAP_ENTRIES * TASK_MAX_COLLISIONS * sizeof(uint64_t);
-		per_cpu(taskname_keys, cpu) = (uint64_t *)kmalloc(size, GFP_KERNEL);
+		per_cpu(taskname_keys, cpu) = kmalloc(size, GFP_KERNEL);
 		if (!per_cpu(taskname_keys, cpu))
 			return -1;
 		memset(per_cpu(taskname_keys, cpu), 0, size);
@@ -290,6 +313,7 @@ static void gator_trace_sched_offline(void)
 static void gator_trace_sched_init(void)
 {
 	int i;
+
 	for (i = 0; i < CPU_WAIT_TOTAL; i++) {
 		cpu_wait_enabled[i] = 0;
 		sched_cpu_key[i] = gator_events_get_key();
diff --git a/drivers/gator/mali/mali_dd_gator_api.h b/drivers/gator/mali/mali_dd_gator_api.h
deleted file mode 100644
index 104b34f2d72a..000000000000
--- a/drivers/gator/mali/mali_dd_gator_api.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#if !defined(MALI_DDK_GATOR_API_VERSION)
-	#define MALI_DDK_GATOR_API_VERSION 3
-#endif
-#if !defined(MALI_TRUE)
-	#define MALI_TRUE                ((unsigned int)1)
-#endif
-
-#if !defined(MALI_FALSE)
-	#define MALI_FALSE               ((unsigned int)0)
-#endif
-
-struct mali_dd_hwcnt_info {
-
-	/* Passed from Gator to kbase */
-	//u32 in_mali_dd_hwcnt_version;
-	unsigned short int bitmask[4];
-
-	/* Passed from kbase to Gator */
-
-	/* ptr to counter dump memory */
-	void *kernel_dump_buffer;
-
-	/* size of counter dump memory */
-	unsigned int size;
-
-	unsigned int gpu_id;
-
-	unsigned int nr_cores;
-
-	unsigned int nr_core_groups;
-
-	/* The cached present bitmaps - these are the same as the corresponding hardware registers*/
-	unsigned long int shader_present_bitmap;
-};
-
-struct mali_dd_hwcnt_handles;
-extern struct mali_dd_hwcnt_handles* mali_dd_hwcnt_init(struct mali_dd_hwcnt_info *in_out_info);
-extern void mali_dd_hwcnt_clear(struct mali_dd_hwcnt_info *in_out_info, struct mali_dd_hwcnt_handles *opaque_handles);
-extern unsigned int kbase_dd_instr_hwcnt_dump_complete(struct mali_dd_hwcnt_handles *opaque_handles, unsigned int * const success);
-extern unsigned int kbase_dd_instr_hwcnt_dump_irq(struct mali_dd_hwcnt_handles *opaque_handles);
diff --git a/drivers/gator/mali/mali_kbase_gator_api.h b/drivers/gator/mali/mali_kbase_gator_api.h
new file mode 100644
index 000000000000..5ed069797e36
--- /dev/null
+++ b/drivers/gator/mali/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to collect hardware counters data from a Mali device.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling kbase_gator_hwcnt_term_names().
+ *    This function must only be called if init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+#if !defined(MALI_TRUE)
+	#define MALI_TRUE                ((uint32_t)1)
+#endif
+
+#if !defined(MALI_FALSE)
+	#define MALI_FALSE               ((uint32_t)0)
+#endif
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_number_of_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                              Pointer to an array of strings of length *total_number_of_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_number_of_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h
index ff00d90cee78..2bc0b037eee6 100644
--- a/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h
+++ b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h
@@ -23,83 +23,82 @@ extern "C"
 #define MAX_NUM_VP_CORES            (1)
 #define MAX_NUM_L2_CACHE_CORES      (1)
 
-enum counters
-{
-    /* Timeline activity */
-    ACTIVITY_VP_0 = 0,
-    ACTIVITY_FP_0,
-    ACTIVITY_FP_1,
-    ACTIVITY_FP_2,
-    ACTIVITY_FP_3,
+enum counters {
+	/* Timeline activity */
+	ACTIVITY_VP_0 = 0,
+	ACTIVITY_FP_0,
+	ACTIVITY_FP_1,
+	ACTIVITY_FP_2,
+	ACTIVITY_FP_3,
 
-    /* L2 cache counters */
-    COUNTER_L2_0_C0,
-    COUNTER_L2_0_C1,
+	/* L2 cache counters */
+	COUNTER_L2_0_C0,
+	COUNTER_L2_0_C1,
 
-    /* Vertex processor counters */
-    COUNTER_VP_0_C0,
-    COUNTER_VP_0_C1,
+	/* Vertex processor counters */
+	COUNTER_VP_0_C0,
+	COUNTER_VP_0_C1,
 
-    /* Fragment processor counters */
-    COUNTER_FP_0_C0,
-    COUNTER_FP_0_C1,
-    COUNTER_FP_1_C0,
-    COUNTER_FP_1_C1,
-    COUNTER_FP_2_C0,
-    COUNTER_FP_2_C1,
-    COUNTER_FP_3_C0,
-    COUNTER_FP_3_C1,
+	/* Fragment processor counters */
+	COUNTER_FP_0_C0,
+	COUNTER_FP_0_C1,
+	COUNTER_FP_1_C0,
+	COUNTER_FP_1_C1,
+	COUNTER_FP_2_C0,
+	COUNTER_FP_2_C1,
+	COUNTER_FP_3_C0,
+	COUNTER_FP_3_C1,
 
-    /* EGL Software Counters */
-    COUNTER_EGL_BLIT_TIME,
+	/* EGL Software Counters */
+	COUNTER_EGL_BLIT_TIME,
 
-    /* GLES Software Counters */
-    COUNTER_GLES_DRAW_ELEMENTS_CALLS,
-    COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
-    COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
-    COUNTER_GLES_DRAW_ARRAYS_CALLS,
-    COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
-    COUNTER_GLES_DRAW_POINTS,
-    COUNTER_GLES_DRAW_LINES,
-    COUNTER_GLES_DRAW_LINE_LOOP,
-    COUNTER_GLES_DRAW_LINE_STRIP,
-    COUNTER_GLES_DRAW_TRIANGLES,
-    COUNTER_GLES_DRAW_TRIANGLE_STRIP,
-    COUNTER_GLES_DRAW_TRIANGLE_FAN,
-    COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
-    COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
-    COUNTER_GLES_UPLOAD_TEXTURE_TIME,
-    COUNTER_GLES_UPLOAD_VBO_TIME,
-    COUNTER_GLES_NUM_FLUSHES,
-    COUNTER_GLES_NUM_VSHADERS_GENERATED,
-    COUNTER_GLES_NUM_FSHADERS_GENERATED,
-    COUNTER_GLES_VSHADER_GEN_TIME,
-    COUNTER_GLES_FSHADER_GEN_TIME,
-    COUNTER_GLES_INPUT_TRIANGLES,
-    COUNTER_GLES_VXCACHE_HIT,
-    COUNTER_GLES_VXCACHE_MISS,
-    COUNTER_GLES_VXCACHE_COLLISION,
-    COUNTER_GLES_CULLED_TRIANGLES,
-    COUNTER_GLES_CULLED_LINES,
-    COUNTER_GLES_BACKFACE_TRIANGLES,
-    COUNTER_GLES_GBCLIP_TRIANGLES,
-    COUNTER_GLES_GBCLIP_LINES,
-    COUNTER_GLES_TRIANGLES_DRAWN,
-    COUNTER_GLES_DRAWCALL_TIME,
-    COUNTER_GLES_TRIANGLES_COUNT,
-    COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
-    COUNTER_GLES_STRIP_TRIANGLES_COUNT,
-    COUNTER_GLES_FAN_TRIANGLES_COUNT,
-    COUNTER_GLES_LINES_COUNT,
-    COUNTER_GLES_INDEPENDENT_LINES_COUNT,
-    COUNTER_GLES_STRIP_LINES_COUNT,
-    COUNTER_GLES_LOOP_LINES_COUNT,
+	/* GLES Software Counters */
+	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_ARRAYS_CALLS,
+	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_POINTS,
+	COUNTER_GLES_DRAW_LINES,
+	COUNTER_GLES_DRAW_LINE_LOOP,
+	COUNTER_GLES_DRAW_LINE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLES,
+	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLE_FAN,
+	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+	COUNTER_GLES_UPLOAD_VBO_TIME,
+	COUNTER_GLES_NUM_FLUSHES,
+	COUNTER_GLES_NUM_VSHADERS_GENERATED,
+	COUNTER_GLES_NUM_FSHADERS_GENERATED,
+	COUNTER_GLES_VSHADER_GEN_TIME,
+	COUNTER_GLES_FSHADER_GEN_TIME,
+	COUNTER_GLES_INPUT_TRIANGLES,
+	COUNTER_GLES_VXCACHE_HIT,
+	COUNTER_GLES_VXCACHE_MISS,
+	COUNTER_GLES_VXCACHE_COLLISION,
+	COUNTER_GLES_CULLED_TRIANGLES,
+	COUNTER_GLES_CULLED_LINES,
+	COUNTER_GLES_BACKFACE_TRIANGLES,
+	COUNTER_GLES_GBCLIP_TRIANGLES,
+	COUNTER_GLES_GBCLIP_LINES,
+	COUNTER_GLES_TRIANGLES_DRAWN,
+	COUNTER_GLES_DRAWCALL_TIME,
+	COUNTER_GLES_TRIANGLES_COUNT,
+	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+	COUNTER_GLES_FAN_TRIANGLES_COUNT,
+	COUNTER_GLES_LINES_COUNT,
+	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+	COUNTER_GLES_STRIP_LINES_COUNT,
+	COUNTER_GLES_LOOP_LINES_COUNT,
 
-    COUNTER_FILMSTRIP,
-    COUNTER_FREQUENCY,
-    COUNTER_VOLTAGE,
+	COUNTER_FILMSTRIP,
+	COUNTER_FREQUENCY,
+	COUNTER_VOLTAGE,
 
-    NUMBER_OF_EVENTS
+	NUMBER_OF_EVENTS
 };
 
 #define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
@@ -117,34 +116,31 @@ enum counters
 /**
  * Structure to pass performance counter data of a Mali core
  */
-typedef struct _mali_profiling_core_counters
-{
-    u32 source0;
-    u32 value0;
-    u32 source1;
-    u32 value1;
-} _mali_profiling_core_counters;
+struct _mali_profiling_core_counters {
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+};
 
 /*
  * For compatibility with utgard.
  */
-typedef struct _mali_profiling_l2_counter_values
-{
-    struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
-} _mali_profiling_l2_counter_values;
+struct _mali_profiling_l2_counter_values {
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+};
 
-typedef struct _mali_profiling_mali_version
-{
-    u32 mali_product_id;
-    u32 mali_version_major;
-    u32 mali_version_minor;
-    u32 num_of_l2_cores;
-    u32 num_of_fp_cores;
-    u32 num_of_vp_cores;
-} _mali_profiling_mali_version;
+struct _mali_profiling_mali_version {
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+};
 
 extern void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
-extern u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+extern u32 _mali_profiling_get_l2_counters(struct _mali_profiling_l2_counter_values *values);
 
 /*
  * List of possible actions allowing DDK to be controlled by Streamline.
diff --git a/drivers/gator/mali/mali_utgard_profiling_gator_api.h b/drivers/gator/mali/mali_utgard_profiling_gator_api.h
index 43c576042880..d6465312628e 100644
--- a/drivers/gator/mali/mali_utgard_profiling_gator_api.h
+++ b/drivers/gator/mali/mali_utgard_profiling_gator_api.h
@@ -22,105 +22,104 @@ extern "C"
 #define MAX_NUM_VP_CORES 1
 
 /** The list of events supported by the Mali DDK. */
-typedef enum
-{
-    /* Vertex processor activity */
-    ACTIVITY_VP_0 = 0,
+enum {
+	/* Vertex processor activity */
+	ACTIVITY_VP_0 = 0,
 
-    /* Fragment processor activity */
-    ACTIVITY_FP_0, /* 1 */
-    ACTIVITY_FP_1,
-    ACTIVITY_FP_2,
-    ACTIVITY_FP_3,
-    ACTIVITY_FP_4,
-    ACTIVITY_FP_5,
-    ACTIVITY_FP_6,
-    ACTIVITY_FP_7,
+	/* Fragment processor activity */
+	ACTIVITY_FP_0, /* 1 */
+	ACTIVITY_FP_1,
+	ACTIVITY_FP_2,
+	ACTIVITY_FP_3,
+	ACTIVITY_FP_4,
+	ACTIVITY_FP_5,
+	ACTIVITY_FP_6,
+	ACTIVITY_FP_7,
 
-    /* L2 cache counters */
-    COUNTER_L2_0_C0,
-    COUNTER_L2_0_C1,
-    COUNTER_L2_1_C0,
-    COUNTER_L2_1_C1,
-    COUNTER_L2_2_C0,
-    COUNTER_L2_2_C1,
+	/* L2 cache counters */
+	COUNTER_L2_0_C0,
+	COUNTER_L2_0_C1,
+	COUNTER_L2_1_C0,
+	COUNTER_L2_1_C1,
+	COUNTER_L2_2_C0,
+	COUNTER_L2_2_C1,
 
-    /* Vertex processor counters */
-    COUNTER_VP_0_C0, /*15*/
-    COUNTER_VP_0_C1,
+	/* Vertex processor counters */
+	COUNTER_VP_0_C0, /*15*/
+	COUNTER_VP_0_C1,
 
-    /* Fragment processor counters */
-    COUNTER_FP_0_C0,
-    COUNTER_FP_0_C1,
-    COUNTER_FP_1_C0,
-    COUNTER_FP_1_C1,
-    COUNTER_FP_2_C0,
-    COUNTER_FP_2_C1,
-    COUNTER_FP_3_C0,
-    COUNTER_FP_3_C1,
-    COUNTER_FP_4_C0,
-    COUNTER_FP_4_C1,
-    COUNTER_FP_5_C0,
-    COUNTER_FP_5_C1,
-    COUNTER_FP_6_C0,
-    COUNTER_FP_6_C1,
-    COUNTER_FP_7_C0,
-    COUNTER_FP_7_C1, /* 32 */
+	/* Fragment processor counters */
+	COUNTER_FP_0_C0,
+	COUNTER_FP_0_C1,
+	COUNTER_FP_1_C0,
+	COUNTER_FP_1_C1,
+	COUNTER_FP_2_C0,
+	COUNTER_FP_2_C1,
+	COUNTER_FP_3_C0,
+	COUNTER_FP_3_C1,
+	COUNTER_FP_4_C0,
+	COUNTER_FP_4_C1,
+	COUNTER_FP_5_C0,
+	COUNTER_FP_5_C1,
+	COUNTER_FP_6_C0,
+	COUNTER_FP_6_C1,
+	COUNTER_FP_7_C0,
+	COUNTER_FP_7_C1, /* 32 */
 
-    /*
-     * If more hardware counters are added, the _mali_osk_hw_counter_table
-     * below should also be updated.
-     */
+	/*
+	 * If more hardware counters are added, the _mali_osk_hw_counter_table
+	 * below should also be updated.
+	 */
 
-    /* EGL software counters */
-    COUNTER_EGL_BLIT_TIME,
+	/* EGL software counters */
+	COUNTER_EGL_BLIT_TIME,
 
-    /* GLES software counters */
-    COUNTER_GLES_DRAW_ELEMENTS_CALLS,
-    COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
-    COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
-    COUNTER_GLES_DRAW_ARRAYS_CALLS,
-    COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
-    COUNTER_GLES_DRAW_POINTS,
-    COUNTER_GLES_DRAW_LINES,
-    COUNTER_GLES_DRAW_LINE_LOOP,
-    COUNTER_GLES_DRAW_LINE_STRIP,
-    COUNTER_GLES_DRAW_TRIANGLES,
-    COUNTER_GLES_DRAW_TRIANGLE_STRIP,
-    COUNTER_GLES_DRAW_TRIANGLE_FAN,
-    COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
-    COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
-    COUNTER_GLES_UPLOAD_TEXTURE_TIME,
-    COUNTER_GLES_UPLOAD_VBO_TIME,
-    COUNTER_GLES_NUM_FLUSHES,
-    COUNTER_GLES_NUM_VSHADERS_GENERATED,
-    COUNTER_GLES_NUM_FSHADERS_GENERATED,
-    COUNTER_GLES_VSHADER_GEN_TIME,
-    COUNTER_GLES_FSHADER_GEN_TIME,
-    COUNTER_GLES_INPUT_TRIANGLES,
-    COUNTER_GLES_VXCACHE_HIT,
-    COUNTER_GLES_VXCACHE_MISS,
-    COUNTER_GLES_VXCACHE_COLLISION,
-    COUNTER_GLES_CULLED_TRIANGLES,
-    COUNTER_GLES_CULLED_LINES,
-    COUNTER_GLES_BACKFACE_TRIANGLES,
-    COUNTER_GLES_GBCLIP_TRIANGLES,
-    COUNTER_GLES_GBCLIP_LINES,
-    COUNTER_GLES_TRIANGLES_DRAWN,
-    COUNTER_GLES_DRAWCALL_TIME,
-    COUNTER_GLES_TRIANGLES_COUNT,
-    COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
-    COUNTER_GLES_STRIP_TRIANGLES_COUNT,
-    COUNTER_GLES_FAN_TRIANGLES_COUNT,
-    COUNTER_GLES_LINES_COUNT,
-    COUNTER_GLES_INDEPENDENT_LINES_COUNT,
-    COUNTER_GLES_STRIP_LINES_COUNT,
-    COUNTER_GLES_LOOP_LINES_COUNT,
+	/* GLES software counters */
+	COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+	COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_ARRAYS_CALLS,
+	COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+	COUNTER_GLES_DRAW_POINTS,
+	COUNTER_GLES_DRAW_LINES,
+	COUNTER_GLES_DRAW_LINE_LOOP,
+	COUNTER_GLES_DRAW_LINE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLES,
+	COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+	COUNTER_GLES_DRAW_TRIANGLE_FAN,
+	COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+	COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+	COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+	COUNTER_GLES_UPLOAD_VBO_TIME,
+	COUNTER_GLES_NUM_FLUSHES,
+	COUNTER_GLES_NUM_VSHADERS_GENERATED,
+	COUNTER_GLES_NUM_FSHADERS_GENERATED,
+	COUNTER_GLES_VSHADER_GEN_TIME,
+	COUNTER_GLES_FSHADER_GEN_TIME,
+	COUNTER_GLES_INPUT_TRIANGLES,
+	COUNTER_GLES_VXCACHE_HIT,
+	COUNTER_GLES_VXCACHE_MISS,
+	COUNTER_GLES_VXCACHE_COLLISION,
+	COUNTER_GLES_CULLED_TRIANGLES,
+	COUNTER_GLES_CULLED_LINES,
+	COUNTER_GLES_BACKFACE_TRIANGLES,
+	COUNTER_GLES_GBCLIP_TRIANGLES,
+	COUNTER_GLES_GBCLIP_LINES,
+	COUNTER_GLES_TRIANGLES_DRAWN,
+	COUNTER_GLES_DRAWCALL_TIME,
+	COUNTER_GLES_TRIANGLES_COUNT,
+	COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+	COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+	COUNTER_GLES_FAN_TRIANGLES_COUNT,
+	COUNTER_GLES_LINES_COUNT,
+	COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+	COUNTER_GLES_STRIP_LINES_COUNT,
+	COUNTER_GLES_LOOP_LINES_COUNT,
 
-    /* Framebuffer capture pseudo-counter */
-    COUNTER_FILMSTRIP,
+	/* Framebuffer capture pseudo-counter */
+	COUNTER_FILMSTRIP,
 
-    NUMBER_OF_EVENTS
+	NUMBER_OF_EVENTS
 } _mali_osk_counter_id;
 
 #define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
@@ -138,21 +137,19 @@ typedef enum
 /**
  * Structure to pass performance counter data of a Mali core
  */
-typedef struct _mali_profiling_core_counters
-{
+struct _mali_profiling_core_counters {
 	u32 source0;
 	u32 value0;
 	u32 source1;
 	u32 value1;
-} _mali_profiling_core_counters;
+};
 
 /**
  * Structure to pass performance counter data of Mali L2 cache cores
  */
-typedef struct _mali_profiling_l2_counter_values
-{
+struct _mali_profiling_l2_counter_values {
 	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
-} _mali_profiling_l2_counter_values;
+};
 
 /**
  * Structure to pass data defining Mali instance in use:
@@ -164,15 +161,14 @@ typedef struct _mali_profiling_l2_counter_values
  * num_of_fp_cores - number of fragment processor cores
  * num_of_vp_cores - number of vertex processor cores
  */
-typedef struct _mali_profiling_mali_version
-{
+struct _mali_profiling_mali_version {
 	u32 mali_product_id;
 	u32 mali_version_major;
 	u32 mali_version_minor;
 	u32 num_of_l2_cores;
 	u32 num_of_fp_cores;
 	u32 num_of_vp_cores;
-} _mali_profiling_mali_version;
+};
 
 /*
  * List of possible actions to be controlled by Streamline.
@@ -186,7 +182,7 @@ typedef struct _mali_profiling_mali_version
 
 void _mali_profiling_control(u32 action, u32 value);
 
-u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+u32 _mali_profiling_get_l2_counters(struct _mali_profiling_l2_counter_values *values);
 
 int _mali_profiling_set_event(u32 counter_id, s32 event_id);
 
diff --git a/drivers/gator/mali_t6xx.mk b/drivers/gator/mali_midgard.mk
similarity index 91%
rename from drivers/gator/mali_t6xx.mk
rename to drivers/gator/mali_midgard.mk
index fa7571ded17b..1b784d5c3d58 100644
--- a/drivers/gator/mali_t6xx.mk
+++ b/drivers/gator/mali_midgard.mk
@@ -1,4 +1,4 @@
-# Defines for Mali-T6xx driver
+# Defines for Mali-Midgard driver
 EXTRA_CFLAGS += -DMALI_USE_UMP=1 \
                 -DMALI_LICENSE_IS_GPL=1 \
                 -DMALI_BASE_TRACK_MEMLEAK=0 \
@@ -21,7 +21,7 @@ OSK_DIR = $(DDK_DIR)/drivers/gpu/arm/midgard/osk
 EXTRA_CFLAGS += -DMALI_DIR_MIDGARD=1
 endif
 
-ifneq ($(wildcard $(DDK_DIR)/drivers/gpu/arm/midgard/mali_dd_gator_api.h),)
+ifneq ($(wildcard $(DDK_DIR)/drivers/gpu/arm/midgard/mali_kbase_gator_api.h),)
 EXTRA_CFLAGS += -DMALI_SIMPLE_API=1
 endif
 
diff --git a/tools/gator/daemon/Android.mk b/tools/gator/daemon/Android.mk
index 44c069cc7e24..970ac6946150 100644
--- a/tools/gator/daemon/Android.mk
+++ b/tools/gator/daemon/Android.mk
@@ -3,13 +3,16 @@ include $(CLEAR_VARS)
 
 XML_H := $(shell cd $(LOCAL_PATH) && make events_xml.h defaults_xml.h)
 
-LOCAL_CFLAGS += -Wall -O3 -mthumb-interwork -fno-exceptions -pthread -DETCDIR=\"/etc\" -Ilibsensors
-
 LOCAL_SRC_FILES := \
+	AnnotateListener.cpp \
 	Buffer.cpp \
+	CCNDriver.cpp \
+	CPUFreqDriver.cpp \
 	CapturedXML.cpp \
 	Child.cpp \
+	Command.cpp \
 	ConfigurationXML.cpp \
+	DiskIODriver.cpp \
 	Driver.cpp \
 	DriverSource.cpp \
 	DynBuf.cpp \
@@ -17,13 +20,17 @@ LOCAL_SRC_FILES := \
 	ExternalSource.cpp \
 	FSDriver.cpp \
 	Fifo.cpp \
-	Hwmon.cpp \
+	FtraceDriver.cpp \
+	FtraceSource.cpp \
+	HwmonDriver.cpp \
 	KMod.cpp \
 	LocalCapture.cpp \
 	Logging.cpp \
 	main.cpp \
 	MaliVideoDriver.cpp \
+	MemInfoDriver.cpp\
 	Monitor.cpp \
+	NetDriver.cpp \
 	OlySocket.cpp \
 	OlyUtility.cpp \
 	PerfBuffer.cpp \
@@ -34,6 +41,7 @@ LOCAL_SRC_FILES := \
 	Sender.cpp \
 	SessionData.cpp \
 	SessionXML.cpp \
+	Setup.cpp \
 	Source.cpp \
 	StreamlineSetup.cpp \
 	UEvent.cpp \
@@ -57,6 +65,9 @@ LOCAL_SRC_FILES := \
 	mxml/mxml-set.c \
 	mxml/mxml-string.c
 
+LOCAL_CFLAGS += -Wall -O3 -fno-exceptions -pthread -DETCDIR=\"/etc\" -Ilibsensors -fPIE
+LOCAL_LDFLAGS += -fPIE -pie
+
 LOCAL_C_INCLUDES := $(LOCAL_PATH)
 
 LOCAL_MODULE := gatord
diff --git a/tools/gator/daemon/AnnotateListener.cpp b/tools/gator/daemon/AnnotateListener.cpp
new file mode 100644
index 000000000000..50110b4dc84c
--- /dev/null
+++ b/tools/gator/daemon/AnnotateListener.cpp
@@ -0,0 +1,69 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "AnnotateListener.h"
+
+#include <unistd.h>
+
+#include "OlySocket.h"
+
+struct AnnotateClient {
+	AnnotateClient *next;
+	int fd;
+};
+
+AnnotateListener::AnnotateListener() : mClients(NULL), mSock(NULL) {
+}
+
+AnnotateListener::~AnnotateListener() {
+	close();
+	delete mSock;
+}
+
+void AnnotateListener::setup() {
+	mSock = new OlyServerSocket(8082);
+}
+
+int AnnotateListener::getFd() {
+	return mSock->getFd();
+}
+
+void AnnotateListener::handle() {
+	AnnotateClient *const client = new AnnotateClient();
+	client->fd = mSock->acceptConnection();
+	client->next = mClients;
+	mClients = client;
+}
+
+void AnnotateListener::close() {
+	mSock->closeServerSocket();
+	while (mClients != NULL) {
+		::close(mClients->fd);
+		AnnotateClient *next = mClients->next;
+		delete mClients;
+		mClients = next;
+	}
+}
+
+void AnnotateListener::signal() {
+	const char ch = 0;
+	AnnotateClient **ptr = &mClients;
+	AnnotateClient *client = mClients;
+	while (client != NULL) {
+		if (write(client->fd, &ch, sizeof(ch)) != 1) {
+			::close(client->fd);
+			AnnotateClient *next = client->next;
+			delete client;
+			*ptr = next;
+			client = next;
+			continue;
+		}
+		ptr = &client->next;
+		client = client->next;
+	}
+}
diff --git a/tools/gator/daemon/AnnotateListener.h b/tools/gator/daemon/AnnotateListener.h
new file mode 100644
index 000000000000..cdefef12db22
--- /dev/null
+++ b/tools/gator/daemon/AnnotateListener.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+class AnnotateClient;
+class OlyServerSocket;
+
+class AnnotateListener {
+public:
+	AnnotateListener();
+	~AnnotateListener();
+
+	void setup();
+	int getFd();
+
+	void handle();
+	void close();
+	void signal();
+
+private:
+	AnnotateClient *mClients;
+	OlyServerSocket *mSock;
+
+	// Intentionally unimplemented
+	AnnotateListener(const AnnotateListener &);
+	AnnotateListener &operator=(const AnnotateListener &);
+};
diff --git a/tools/gator/daemon/Application.mk b/tools/gator/daemon/Application.mk
index 631ba54148d1..3ada471cac19 100644
--- a/tools/gator/daemon/Application.mk
+++ b/tools/gator/daemon/Application.mk
@@ -1 +1,3 @@
 APP_PLATFORM := android-8
+# Replace armeabi-v7a with arm64-v8a to build an arm64 gatord or with armeabi to build an ARM11 gatord
+APP_ABI := armeabi-v7a
diff --git a/tools/gator/daemon/Buffer.cpp b/tools/gator/daemon/Buffer.cpp
index dd19f7f8be76..8fa628015069 100644
--- a/tools/gator/daemon/Buffer.cpp
+++ b/tools/gator/daemon/Buffer.cpp
@@ -15,12 +15,15 @@
 #define mask (mSize - 1)
 
 enum {
-	CODE_PEA      = 1,
-	CODE_KEYS     = 2,
-	CODE_FORMAT   = 3,
-	CODE_MAPS     = 4,
-	CODE_COMM     = 5,
-	CODE_KEYS_OLD = 6,
+	CODE_PEA         = 1,
+	CODE_KEYS        = 2,
+	CODE_FORMAT      = 3,
+	CODE_MAPS        = 4,
+	CODE_COMM        = 5,
+	CODE_KEYS_OLD    = 6,
+	CODE_ONLINE_CPU  = 7,
+	CODE_OFFLINE_CPU = 8,
+	CODE_KALLSYMS    = 9,
 };
 
 // Summary Frame Messages
@@ -42,16 +45,18 @@ enum {
 	/* Add another character so the length isn't 0x0a bytes */ \
 	"5"
 
-Buffer::Buffer(const int32_t core, const int32_t buftype, const int size, sem_t *const readerSem) : mCore(core), mBufType(buftype), mSize(size), mReadPos(0), mWritePos(0), mCommitPos(0), mAvailable(true), mIsDone(false), mBuf(new char[mSize]), mCommitTime(gSessionData->mLiveRate), mReaderSem(readerSem) {
+Buffer::Buffer(const int32_t core, const int32_t buftype, const int size, sem_t *const readerSem) : mBuf(new char[size]), mReaderSem(readerSem), mCommitTime(gSessionData->mLiveRate), mSize(size), mReadPos(0), mWritePos(0), mCommitPos(0), mAvailable(true), mIsDone(false), mCore(core), mBufType(buftype) {
 	if ((mSize & mask) != 0) {
 		logg->logError(__FILE__, __LINE__, "Buffer size is not a power of 2");
 		handleException();
 	}
+	sem_init(&mWriterSem, 0, 0);
 	frame();
 }
 
 Buffer::~Buffer() {
 	delete [] mBuf;
+	sem_destroy(&mWriterSem);
 }
 
 void Buffer::write(Sender *const sender) {
@@ -59,14 +64,18 @@ void Buffer::write(Sender *const sender) {
 		return;
 	}
 
+	// commit and read are updated by the writer, only read them once
+	int commitPos = mCommitPos;
+	int readPos = mReadPos;
+
 	// determine the size of two halves
-	int length1 = mCommitPos - mReadPos;
-	char *buffer1 = mBuf + mReadPos;
+	int length1 = commitPos - readPos;
+	char *buffer1 = mBuf + readPos;
 	int length2 = 0;
 	char *buffer2 = mBuf;
 	if (length1 < 0) {
-		length1 = mSize - mReadPos;
-		length2 = mCommitPos;
+		length1 = mSize - readPos;
+		length2 = commitPos;
 	}
 
 	logg->logMessage("Sending data length1: %i length2: %i", length1, length2);
@@ -81,7 +90,10 @@ void Buffer::write(Sender *const sender) {
 		sender->writeData(buffer2, length2, RESPONSE_APC_DATA);
 	}
 
-	mReadPos = mCommitPos;
+	mReadPos = commitPos;
+
+	// send a notification that space is available
+	sem_post(&mWriterSem);
 }
 
 bool Buffer::commitReady() const {
@@ -193,7 +205,7 @@ void Buffer::packInt(int32_t x) {
 	packInt(mBuf, mSize, mWritePos, x);
 }
 
-void Buffer::packInt64(int64_t x) {
+void Buffer::packInt64(char *const buf, const int size, int &writePos, int64_t x) {
 	int packedBytes = 0;
 	int more = true;
 	while (more) {
@@ -207,11 +219,15 @@ void Buffer::packInt64(int64_t x) {
 			b |= 0x80;
 		}
 
-		mBuf[(mWritePos + packedBytes) & mask] = b;
+		buf[(writePos + packedBytes) & /*mask*/(size - 1)] = b;
 		packedBytes++;
 	}
 
-	mWritePos = (mWritePos + packedBytes) & mask;
+	writePos = (writePos + packedBytes) & /*mask*/(size - 1);
+}
+
+void Buffer::packInt64(int64_t x) {
+	packInt64(mBuf, mSize, mWritePos, x);
 }
 
 void Buffer::writeBytes(const void *const data, size_t count) {
@@ -236,10 +252,12 @@ void Buffer::frame() {
 	// Reserve space for the length
 	mWritePos += sizeof(int32_t);
 	packInt(mBufType);
-	packInt(mCore);
+	if ((mBufType == FRAME_BLOCK_COUNTER) || (mBufType == FRAME_PERF_ATTRS) || (mBufType == FRAME_PERF)) {
+		packInt(mCore);
+	}
 }
 
-void Buffer::summary(const int64_t timestamp, const int64_t uptime, const int64_t monotonicDelta, const char *const uname) {
+void Buffer::summary(const uint64_t currTime, const int64_t timestamp, const int64_t uptime, const int64_t monotonicDelta, const char *const uname) {
 	packInt(MESSAGE_SUMMARY);
 	writeString(NEWLINE_CANARY);
 	packInt64(timestamp);
@@ -248,23 +266,24 @@ void Buffer::summary(const int64_t timestamp, const int64_t uptime, const int64_
 	writeString("uname");
 	writeString(uname);
 	writeString("");
-	check(1);
+	check(currTime);
 }
 
-void Buffer::coreName(const int core, const int cpuid, const char *const name) {
+void Buffer::coreName(const uint64_t currTime, const int core, const int cpuid, const char *const name) {
 	if (checkSpace(3 * MAXSIZE_PACK32 + 0x100)) {
 		packInt(MESSAGE_CORE_NAME);
 		packInt(core);
 		packInt(cpuid);
 		writeString(name);
 	}
-	check(1);
+	check(currTime);
 }
 
 bool Buffer::eventHeader(const uint64_t curr_time) {
 	bool retval = false;
 	if (checkSpace(MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
-		packInt(0);	// key of zero indicates a timestamp
+		// key of zero indicates a timestamp
+		packInt(0);
 		packInt64(curr_time);
 		retval = true;
 	}
@@ -275,7 +294,8 @@ bool Buffer::eventHeader(const uint64_t curr_time) {
 bool Buffer::eventTid(const int tid) {
 	bool retval = false;
 	if (checkSpace(2 * MAXSIZE_PACK32)) {
-		packInt(1);	// key of 1 indicates a tid
+		// key of 1 indicates a tid
+		packInt(1);
 		packInt(tid);
 		retval = true;
 	}
@@ -283,102 +303,119 @@ bool Buffer::eventTid(const int tid) {
 	return retval;
 }
 
-void Buffer::event(const int32_t key, const int32_t value) {
+void Buffer::event(const int key, const int32_t value) {
 	if (checkSpace(2 * MAXSIZE_PACK32)) {
 		packInt(key);
 		packInt(value);
 	}
 }
 
-void Buffer::event64(const int64_t key, const int64_t value) {
-	if (checkSpace(2 * MAXSIZE_PACK64)) {
-		packInt64(key);
+void Buffer::event64(const int key, const int64_t value) {
+	if (checkSpace(MAXSIZE_PACK64 + MAXSIZE_PACK32)) {
+		packInt(key);
 		packInt64(value);
 	}
 }
 
-void Buffer::pea(const struct perf_event_attr *const pea, int key) {
-	if (checkSpace(2 * MAXSIZE_PACK32 + pea->size)) {
-		packInt(CODE_PEA);
-		writeBytes(pea, pea->size);
-		packInt(key);
-	} else {
-		logg->logError(__FILE__, __LINE__, "Ran out of buffer space for perf attrs");
-		handleException();
+void Buffer::pea(const uint64_t currTime, const struct perf_event_attr *const pea, int key) {
+	while (!checkSpace(2 * MAXSIZE_PACK32 + pea->size)) {
+		sem_wait(&mWriterSem);
 	}
-	// Don't know the real perf time so use 1 as it will work for now
-	check(1);
+	packInt(CODE_PEA);
+	writeBytes(pea, pea->size);
+	packInt(key);
+	check(currTime);
 }
 
-void Buffer::keys(const int count, const __u64 *const ids, const int *const keys) {
-	if (checkSpace(2 * MAXSIZE_PACK32 + count * (MAXSIZE_PACK32 + MAXSIZE_PACK64))) {
-		packInt(CODE_KEYS);
-		packInt(count);
-		for (int i = 0; i < count; ++i) {
-			packInt64(ids[i]);
-			packInt(keys[i]);
-		}
-	} else {
-		logg->logError(__FILE__, __LINE__, "Ran out of buffer space for perf attrs");
-		handleException();
+void Buffer::keys(const uint64_t currTime, const int count, const __u64 *const ids, const int *const keys) {
+	while (!checkSpace(2 * MAXSIZE_PACK32 + count * (MAXSIZE_PACK32 + MAXSIZE_PACK64))) {
+		sem_wait(&mWriterSem);
 	}
-	check(1);
+	packInt(CODE_KEYS);
+	packInt(count);
+	for (int i = 0; i < count; ++i) {
+		packInt64(ids[i]);
+		packInt(keys[i]);
+	}
+	check(currTime);
 }
 
-void Buffer::keysOld(const int keyCount, const int *const keys, const int bytes, const char *const buf) {
-	if (checkSpace((2 + keyCount) * MAXSIZE_PACK32 + bytes)) {
-		packInt(CODE_KEYS_OLD);
-		packInt(keyCount);
-		for (int i = 0; i < keyCount; ++i) {
-			packInt(keys[i]);
-		}
-		writeBytes(buf, bytes);
-	} else {
-		logg->logError(__FILE__, __LINE__, "Ran out of buffer space for perf attrs");
-		handleException();
+void Buffer::keysOld(const uint64_t currTime, const int keyCount, const int *const keys, const int bytes, const char *const buf) {
+	while (!checkSpace((2 + keyCount) * MAXSIZE_PACK32 + bytes)) {
+		sem_wait(&mWriterSem);
 	}
-	check(1);
+	packInt(CODE_KEYS_OLD);
+	packInt(keyCount);
+	for (int i = 0; i < keyCount; ++i) {
+		packInt(keys[i]);
+	}
+	writeBytes(buf, bytes);
+	check(currTime);
 }
 
-void Buffer::format(const int length, const char *const format) {
-	if (checkSpace(MAXSIZE_PACK32 + length + 1)) {
-		packInt(CODE_FORMAT);
-		writeBytes(format, length + 1);
-	} else {
-		logg->logError(__FILE__, __LINE__, "Ran out of buffer space for perf attrs");
-		handleException();
+void Buffer::format(const uint64_t currTime, const int length, const char *const format) {
+	while (!checkSpace(MAXSIZE_PACK32 + length + 1)) {
+		sem_wait(&mWriterSem);
 	}
-	check(1);
+	packInt(CODE_FORMAT);
+	writeBytes(format, length + 1);
+	check(currTime);
 }
 
-void Buffer::maps(const int pid, const int tid, const char *const maps) {
+void Buffer::maps(const uint64_t currTime, const int pid, const int tid, const char *const maps) {
 	const int mapsLen = strlen(maps) + 1;
-	if (checkSpace(3 * MAXSIZE_PACK32 + mapsLen)) {
-		packInt(CODE_MAPS);
-		packInt(pid);
-		packInt(tid);
-		writeBytes(maps, mapsLen);
-	} else {
-		logg->logError(__FILE__, __LINE__, "Ran out of buffer space for perf attrs");
-		handleException();
+	while (!checkSpace(3 * MAXSIZE_PACK32 + mapsLen)) {
+		sem_wait(&mWriterSem);
 	}
-	check(1);
+	packInt(CODE_MAPS);
+	packInt(pid);
+	packInt(tid);
+	writeBytes(maps, mapsLen);
+	check(currTime);
 }
 
-void Buffer::comm(const int pid, const int tid, const char *const image, const char *const comm) {
+void Buffer::comm(const uint64_t currTime, const int pid, const int tid, const char *const image, const char *const comm) {
 	const int imageLen = strlen(image) + 1;
 	const int commLen = strlen(comm) + 1;
-	if (checkSpace(3 * MAXSIZE_PACK32 + imageLen + commLen)) {
-		packInt(CODE_COMM);
-		packInt(pid);
-		packInt(tid);
-		writeBytes(image, imageLen);
-		writeBytes(comm, commLen);
-	} else {
-		logg->logError(__FILE__, __LINE__, "Ran out of buffer space for perf attrs");
-		handleException();
+	while (!checkSpace(3 * MAXSIZE_PACK32 + imageLen + commLen)) {
+		sem_wait(&mWriterSem);
 	}
-	check(1);
+	packInt(CODE_COMM);
+	packInt(pid);
+	packInt(tid);
+	writeBytes(image, imageLen);
+	writeBytes(comm, commLen);
+	check(currTime);
+}
+
+void Buffer::onlineCPU(const uint64_t currTime, const uint64_t time, const int cpu) {
+	while (!checkSpace(MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_ONLINE_CPU);
+	packInt64(time);
+	packInt(cpu);
+	check(currTime);
+}
+
+void Buffer::offlineCPU(const uint64_t currTime, const uint64_t time, const int cpu) {
+	while (!checkSpace(MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_OFFLINE_CPU);
+	packInt64(time);
+	packInt(cpu);
+	check(currTime);
+}
+
+void Buffer::kallsyms(const uint64_t currTime, const char *const kallsyms) {
+	const int kallsymsLen = strlen(kallsyms) + 1;
+	while (!checkSpace(3 * MAXSIZE_PACK32 + kallsymsLen)) {
+		sem_wait(&mWriterSem);
+	}
+	packInt(CODE_KALLSYMS);
+	writeBytes(kallsyms, kallsymsLen);
+	check(currTime);
 }
 
 void Buffer::setDone() {
diff --git a/tools/gator/daemon/Buffer.h b/tools/gator/daemon/Buffer.h
index 2de1b97ac091..6cffd8e39a36 100644
--- a/tools/gator/daemon/Buffer.h
+++ b/tools/gator/daemon/Buffer.h
@@ -39,25 +39,26 @@ class Buffer {
 	void commit(const uint64_t time);
 	void check(const uint64_t time);
 
-	void frame();
-
 	// Summary messages
-	void summary(const int64_t timestamp, const int64_t uptime, const int64_t monotonicDelta, const char *const uname);
-	void coreName(const int core, const int cpuid, const char *const name);
+	void summary(const uint64_t currTime, const int64_t timestamp, const int64_t uptime, const int64_t monotonicDelta, const char *const uname);
+	void coreName(const uint64_t currTime, const int core, const int cpuid, const char *const name);
 
 	// Block Counter messages
 	bool eventHeader(uint64_t curr_time);
 	bool eventTid(int tid);
-	void event(int32_t key, int32_t value);
-	void event64(int64_t key, int64_t value);
+	void event(int key, int32_t value);
+	void event64(int key, int64_t value);
 
 	// Perf Attrs messages
-	void pea(const struct perf_event_attr *const pea, int key);
-	void keys(const int count, const __u64 *const ids, const int *const keys);
-	void keysOld(const int keyCount, const int *const keys, const int bytes, const char *const buf);
-	void format(const int length, const char *const format);
-	void maps(const int pid, const int tid, const char *const maps);
-	void comm(const int pid, const int tid, const char *const image, const char *const comm);
+	void pea(const uint64_t currTime, const struct perf_event_attr *const pea, int key);
+	void keys(const uint64_t currTime, const int count, const __u64 *const ids, const int *const keys);
+	void keysOld(const uint64_t currTime, const int keyCount, const int *const keys, const int bytes, const char *const buf);
+	void format(const uint64_t currTime, const int length, const char *const format);
+	void maps(const uint64_t currTime, const int pid, const int tid, const char *const maps);
+	void comm(const uint64_t currTime, const int pid, const int tid, const char *const image, const char *const comm);
+	void onlineCPU(const uint64_t currTime, const uint64_t time, const int cpu);
+	void offlineCPU(const uint64_t currTime, const uint64_t time, const int cpu);
+	void kallsyms(const uint64_t currTime, const char *const kallsyms);
 
 	void setDone();
 	bool isDone() const;
@@ -67,6 +68,7 @@ class Buffer {
 	void advanceWrite(int bytes) { mWritePos = (mWritePos + bytes) & /*mask*/(mSize - 1); }
 	static void packInt(char *const buf, const int size, int &writePos, int32_t x);
 	void packInt(int32_t x);
+	static void packInt64(char *const buf, const int size, int &writePos, int64_t x);
 	void packInt64(int64_t x);
 	void writeBytes(const void *const data, size_t count);
 	void writeString(const char *const str);
@@ -79,20 +81,22 @@ class Buffer {
 	}
 
 private:
+	void frame();
 	bool commitReady() const;
 	bool checkSpace(int bytes);
 
-	const int32_t mCore;
-	const int32_t mBufType;
+	char *const mBuf;
+	sem_t *const mReaderSem;
+	uint64_t mCommitTime;
+	sem_t mWriterSem;
 	const int mSize;
 	int mReadPos;
 	int mWritePos;
 	int mCommitPos;
 	bool mAvailable;
 	bool mIsDone;
-	char *const mBuf;
-	uint64_t mCommitTime;
-	sem_t *const mReaderSem;
+	const int32_t mCore;
+	const int32_t mBufType;
 
 	// Intentionally unimplemented
 	Buffer(const Buffer &);
diff --git a/tools/gator/daemon/CCNDriver.cpp b/tools/gator/daemon/CCNDriver.cpp
new file mode 100644
index 000000000000..dd1a2b133842
--- /dev/null
+++ b/tools/gator/daemon/CCNDriver.cpp
@@ -0,0 +1,295 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "CCNDriver.h"
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "k/perf_event.h"
+
+#include "Config.h"
+#include "DriverSource.h"
+#include "Logging.h"
+
+static const char TAG_CATEGORY[] = "category";
+static const char TAG_COUNTER_SET[] = "counter_set";
+static const char TAG_EVENT[] = "event";
+static const char TAG_OPTION[] = "option";
+static const char TAG_OPTION_SET[] = "option_set";
+
+static const char ATTR_AVERAGE_SELECTION[] = "average_selection";
+static const char ATTR_COUNTER[] = "counter";
+static const char ATTR_COUNTER_SET[] = "counter_set";
+static const char ATTR_COUNT[] = "count";
+static const char ATTR_DESCRIPTION[] = "description";
+static const char ATTR_DISPLAY[] = "display";
+static const char ATTR_EVENT[] = "event";
+static const char ATTR_EVENT_DELTA[] = "event_delta";
+static const char ATTR_NAME[] = "name";
+static const char ATTR_OPTION_SET[] = "option_set";
+static const char ATTR_TITLE[] = "title";
+static const char ATTR_UNITS[] = "units";
+
+static const char XP_REGION[] = "XP_Region";
+static const char HNF_REGION[] = "HN-F_Region";
+static const char RNI_REGION[] = "RN-I_Region";
+static const char SBAS_REGION[] = "SBAS_Region";
+static const char CCN_5XX[] = "CCN-5xx";
+#define ARM_CCN_5XX "ARM_CCN_5XX_"
+
+static const char *const VC_TYPES[] = { "REQ", "RSP", "SNP", "DAT" };
+static const char *const XP_EVENT_NAMES[] = { NULL, "H-bit", "S-bit", "P-Cnt", "TknV" };
+static const char *const XP_EVENT_DESCRIPTIONS[] = { NULL, "Set H-bit, signaled when this XP sets the H-bit.", "Set S-bit, signaled when this XP sets the S-bit.", "Set P-Cnt, signaled when this XP sets the P-Cnt. This is not applicable for the SNP VC.", "No TknV, signaled when this XP transmits a valid packet." };
+static const char *const HNF_EVENT_NAMES[] = { NULL, "Cache Miss", "L3 SF Cache Access", "Cache Fill", "POCQ Retry", "POCQ Reqs Recvd", "SF Hit", "SF Evictions", "Snoops Sent", "Snoops Broadcast", "L3 Eviction", "L3 Fill Invalid Way", "MC Retries", "MC Reqs", "QOS HH Retry" };
+static const char *const HNF_EVENT_DESCRIPTIONS[] = { NULL, "Counts the total cache misses. This is the first time lookup result, and is high priority.", "Counts the number of cache accesses. This is the first time access, and is high priority.", "Counts the total allocations in the HN L3 cache, and all cache line allocations to the L3 cache.", "Counts the number of requests that have been retried.", "Counts the number of requests received by HN.", "Counts the number of snoop filter hits.", "Counts the number of snoop filter evictions. Cache invalidations are initiated.", "Counts the number of snoops sent. Does not differentiate between broadcast or directed snoops.", "Counts the number of snoop broadcasts sent.", "Counts the number of L3 evictions.", "Counts the number of L3 fills to an invalid way.", "Counts the number of transactions retried by the memory controller.", "Counts the number of requests to the memory controller.", "Counts the number of times a highest-priority QoS class was retried at the HN-F." };
+static const char *const RNI_EVENT_NAMES[] = { NULL, "S0 RDataBeats", "S1 RDataBeats", "S2 RDataBeats", "RXDAT Flits received", "TXDAT Flits sent", "Total TXREQ Flits sent", "Retried TXREQ Flits sent", "RRT full", "WRT full", "Replayed TXREQ Flits" };
+static const char *const RNI_EVENT_DESCRIPTIONS[] = { NULL, "S0 RDataBeats.", "S1 RDataBeats.", "S2 RDataBeats.", "RXDAT Flits received.", "TXDAT Flits sent.", "Total TXREQ Flits sent.", "Retried TXREQ Flits sent.", "RRT full.", "WRT full.", "Replayed TXREQ Flits." };
+static const char *const SBAS_EVENT_NAMES[] = { NULL, "S0 RDataBeats", NULL, NULL, "RXDAT Flits received", "TXDAT Flits sent", "Total TXREQ Flits sent", "Retried TXREQ Flits sent", "RRT full", "WRT full", "Replayed TXREQ Flits" };
+static const char *const SBAS_EVENT_DESCRIPTIONS[] = { NULL, "S0 RDataBeats.", NULL, NULL, "RXDAT Flits received.", "TXDAT Flits sent.", "Total TXREQ Flits sent.", "Retried TXREQ Flits sent.", "RRT full.", "WRT full.", "Replayed TXREQ Flits." };
+
+// This class is used only to poll for CCN-5xx configuration and emit events XML for it. All other operations are handled by PerfDriver
+
+static int sys_perf_event_open(struct perf_event_attr *const attr, const pid_t pid, const int cpu, const int group_fd, const unsigned long flags) {
+	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static unsigned int getConfig(unsigned int node, unsigned int type, unsigned int event, unsigned int port, unsigned int vc) {
+  return
+    ((node  & 0xff) <<  0) |
+    ((type  & 0xff) <<  8) |
+    ((event & 0xff) << 16) |
+    ((port  & 0x03) << 24) |
+    ((vc    & 0x07) << 26) |
+    0;
+}
+
+static bool perfPoll(struct perf_event_attr *const pea) {
+	int fd = sys_perf_event_open(pea, -1, 0, -1, 0);
+	if (fd < 0) {
+		return false;
+	}
+	close(fd);
+	return true;
+}
+
+CCNDriver::CCNDriver() : mNodeTypes(NULL), mXpCount(0) {
+}
+
+CCNDriver::~CCNDriver() {
+	delete mNodeTypes;
+}
+
+bool CCNDriver::claimCounter(const Counter &) const {
+	// Handled by PerfDriver
+	return false;
+}
+
+void CCNDriver::resetCounters() {
+	// Handled by PerfDriver
+}
+
+void CCNDriver::setupCounter(Counter &) {
+	// Handled by PerfDriver
+}
+
+void CCNDriver::readEvents(mxml_node_t *const) {
+	struct stat st;
+	if (stat("/sys/bus/event_source/devices/ccn", &st) != 0) {
+		// Not found
+		return;
+	}
+
+	int type;
+	if (DriverSource::readIntDriver("/sys/bus/event_source/devices/ccn/type", &type) != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to read CCN-5xx type");
+		handleException();
+	}
+
+	// Detect number of xps
+	struct perf_event_attr pea;
+	memset(&pea, 0, sizeof(pea));
+	pea.type = type;
+	pea.size = sizeof(pea);
+
+	mXpCount = 1;
+	while (true) {
+		pea.config = getConfig(0, 0x08, 1, 0, 1) | mXpCount;
+		if (!perfPoll(&pea)) {
+			break;
+		}
+		mXpCount *= 2;
+	};
+	{
+		int lower = mXpCount/2 + 1;
+		while (lower < mXpCount) {
+			int mid = (lower + mXpCount)/2;
+			pea.config = getConfig(0, 0x08, 1, 0, 1) | mid;
+			if (perfPoll(&pea)) {
+				lower = mid + 1;
+			} else {
+				mXpCount = mid;
+			}
+		}
+	}
+
+	mNodeTypes = new NodeType[2*mXpCount];
+
+	// Detect node types
+	for (int i = 0; i < 2*mXpCount; ++i) {
+		pea.config = getConfig(0, 0x04, 1, 0, 0) | i;
+		if (perfPoll(&pea)) {
+			mNodeTypes[i] = NT_HNF;
+			continue;
+		}
+
+		pea.config = getConfig(0, 0x16, 1, 0, 0) | i;
+		if (perfPoll(&pea)) {
+			mNodeTypes[i] = NT_RNI;
+			continue;
+		}
+
+		pea.config = getConfig(0, 0x10, 1, 0, 0) | i;
+		if (perfPoll(&pea)) {
+			mNodeTypes[i] = NT_SBAS;
+			continue;
+		}
+
+		mNodeTypes[i] = NT_UNKNOWN;
+	}
+}
+
+int CCNDriver::writeCounters(mxml_node_t *const) const {
+	// Handled by PerfDriver
+	return 0;
+}
+
+void CCNDriver::writeEvents(mxml_node_t *const root) const {
+	mxml_node_t *const counter_set = mxmlNewElement(root, TAG_COUNTER_SET);
+	mxmlElementSetAttr(counter_set, ATTR_NAME, ARM_CCN_5XX "cnt");
+	mxmlElementSetAttr(counter_set, ATTR_COUNT, "8");
+
+	mxml_node_t *const category = mxmlNewElement(root, TAG_CATEGORY);
+	mxmlElementSetAttr(category, ATTR_NAME, CCN_5XX);
+	mxmlElementSetAttr(category, TAG_COUNTER_SET, ARM_CCN_5XX "cnt");
+
+	mxml_node_t *const clock_event = mxmlNewElement(category, TAG_EVENT);
+	mxmlElementSetAttr(clock_event, ATTR_COUNTER, ARM_CCN_5XX "ccnt");
+	mxmlElementSetAttr(clock_event, ATTR_EVENT, "0xff00");
+	mxmlElementSetAttr(clock_event, ATTR_TITLE, "CCN-5xx Clock");
+	mxmlElementSetAttr(clock_event, ATTR_NAME, "Cycles");
+	mxmlElementSetAttr(clock_event, ATTR_DISPLAY, "hertz");
+	mxmlElementSetAttr(clock_event, ATTR_UNITS, "Hz");
+	mxmlElementSetAttr(clock_event, ATTR_AVERAGE_SELECTION, "yes");
+	mxmlElementSetAttr(clock_event, ATTR_DESCRIPTION, "The number of core clock cycles");
+
+	mxml_node_t *const xp_option_set = mxmlNewElement(category, TAG_OPTION_SET);
+	mxmlElementSetAttr(xp_option_set, ATTR_NAME, XP_REGION);
+
+	for (int i = 0; i < mXpCount; ++i) {
+		mxml_node_t *const option = mxmlNewElement(xp_option_set, TAG_OPTION);
+		mxmlElementSetAttrf(option, ATTR_EVENT_DELTA, "0x%x", getConfig(i, 0, 0, 0, 0));
+		mxmlElementSetAttrf(option, ATTR_NAME, "XP %i", i);
+		mxmlElementSetAttrf(option, ATTR_DESCRIPTION, "Crosspoint %i", i);
+	}
+
+	for (int vc = 0; vc < ARRAY_LENGTH(VC_TYPES); ++vc) {
+		if (VC_TYPES[vc] == NULL) {
+			continue;
+		}
+		for (int bus = 0; bus < 2; ++bus) {
+			for (int eventId = 0; eventId < ARRAY_LENGTH(XP_EVENT_NAMES); ++eventId) {
+				if (XP_EVENT_NAMES[eventId] == NULL) {
+					continue;
+				}
+				mxml_node_t *const event = mxmlNewElement(category, TAG_EVENT);
+				mxmlElementSetAttrf(event, ATTR_EVENT, "0x%x", getConfig(0, 0x08, eventId, bus, vc));
+				mxmlElementSetAttr(event, ATTR_OPTION_SET, XP_REGION);
+				mxmlElementSetAttr(event, ATTR_TITLE, CCN_5XX);
+				mxmlElementSetAttrf(event, ATTR_NAME, "Bus %i: %s: %s", bus, VC_TYPES[vc], XP_EVENT_NAMES[eventId]);
+				mxmlElementSetAttrf(event, ATTR_DESCRIPTION, "Bus %i: %s: %s", bus, VC_TYPES[vc], XP_EVENT_DESCRIPTIONS[eventId]);
+			}
+		}
+	}
+
+	mxml_node_t *const hnf_option_set = mxmlNewElement(category, TAG_OPTION_SET);
+	mxmlElementSetAttr(hnf_option_set, ATTR_NAME, HNF_REGION);
+
+	for (int eventId = 0; eventId < ARRAY_LENGTH(HNF_EVENT_NAMES); ++eventId) {
+		if (HNF_EVENT_NAMES[eventId] == NULL) {
+			continue;
+		}
+		mxml_node_t *const event = mxmlNewElement(category, TAG_EVENT);
+		mxmlElementSetAttrf(event, ATTR_EVENT, "0x%x", getConfig(0, 0x04, eventId, 0, 0));
+		mxmlElementSetAttr(event, ATTR_OPTION_SET, HNF_REGION);
+		mxmlElementSetAttr(event, ATTR_TITLE, CCN_5XX);
+		mxmlElementSetAttr(event, ATTR_NAME, HNF_EVENT_NAMES[eventId]);
+		mxmlElementSetAttr(event, ATTR_DESCRIPTION, HNF_EVENT_DESCRIPTIONS[eventId]);
+	}
+
+	mxml_node_t *const rni_option_set = mxmlNewElement(category, TAG_OPTION_SET);
+	mxmlElementSetAttr(rni_option_set, ATTR_NAME, RNI_REGION);
+
+	for (int eventId = 0; eventId < ARRAY_LENGTH(RNI_EVENT_NAMES); ++eventId) {
+		if (RNI_EVENT_NAMES[eventId] == NULL) {
+			continue;
+		}
+		mxml_node_t *const event = mxmlNewElement(category, TAG_EVENT);
+		mxmlElementSetAttrf(event, ATTR_EVENT, "0x%x", getConfig(0, 0x16, eventId, 0, 0));
+		mxmlElementSetAttr(event, ATTR_OPTION_SET, RNI_REGION);
+		mxmlElementSetAttr(event, ATTR_TITLE, CCN_5XX);
+		mxmlElementSetAttr(event, ATTR_NAME, RNI_EVENT_NAMES[eventId]);
+		mxmlElementSetAttr(event, ATTR_DESCRIPTION, RNI_EVENT_DESCRIPTIONS[eventId]);
+	}
+
+	mxml_node_t *const sbas_option_set = mxmlNewElement(category, TAG_OPTION_SET);
+	mxmlElementSetAttr(sbas_option_set, ATTR_NAME, SBAS_REGION);
+
+	for (int eventId = 0; eventId < ARRAY_LENGTH(SBAS_EVENT_NAMES); ++eventId) {
+		if (SBAS_EVENT_NAMES[eventId] == NULL) {
+			continue;
+		}
+		mxml_node_t *const event = mxmlNewElement(category, TAG_EVENT);
+		mxmlElementSetAttrf(event, ATTR_EVENT, "0x%x", getConfig(0, 0x10, eventId, 0, 0));
+		mxmlElementSetAttr(event, ATTR_OPTION_SET, SBAS_REGION);
+		mxmlElementSetAttr(event, ATTR_TITLE, CCN_5XX);
+		mxmlElementSetAttr(event, ATTR_NAME, SBAS_EVENT_NAMES[eventId]);
+		mxmlElementSetAttr(event, ATTR_DESCRIPTION, SBAS_EVENT_DESCRIPTIONS[eventId]);
+	}
+
+	for (int i = 0; i < 2*mXpCount; ++i) {
+		switch (mNodeTypes[i]) {
+		case NT_HNF: {
+			mxml_node_t *const option = mxmlNewElement(hnf_option_set, TAG_OPTION);
+			mxmlElementSetAttrf(option, ATTR_EVENT_DELTA, "0x%x", getConfig(i, 0, 0, 0, 0));
+			mxmlElementSetAttrf(option, ATTR_NAME, "HN-F %i", i);
+			mxmlElementSetAttrf(option, ATTR_DESCRIPTION, "Fully-coherent Home Node %i", i);
+			break;
+		}
+		case NT_RNI: {
+			mxml_node_t *const option = mxmlNewElement(rni_option_set, TAG_OPTION);
+			mxmlElementSetAttrf(option, ATTR_EVENT_DELTA, "0x%x", getConfig(i, 0, 0, 0, 0));
+			mxmlElementSetAttrf(option, ATTR_NAME, "RN-I %i", i);
+			mxmlElementSetAttrf(option, ATTR_DESCRIPTION, "I/O-coherent Requesting Node %i", i);
+			break;
+		}
+		case NT_SBAS: {
+			mxml_node_t *const option = mxmlNewElement(sbas_option_set, TAG_OPTION);
+			mxmlElementSetAttrf(option, ATTR_EVENT_DELTA, "0x%x", getConfig(i, 0, 0, 0, 0));
+			mxmlElementSetAttrf(option, ATTR_NAME, "SBAS %i", i);
+			mxmlElementSetAttrf(option, ATTR_DESCRIPTION, "ACE master to CHI protocol bridge %i", i);
+			break;
+		}
+		default:
+			continue;
+		}
+	}
+}
diff --git a/tools/gator/daemon/CCNDriver.h b/tools/gator/daemon/CCNDriver.h
new file mode 100644
index 000000000000..fb4c717e969a
--- /dev/null
+++ b/tools/gator/daemon/CCNDriver.h
@@ -0,0 +1,43 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CCNDRIVER_H
+#define CCNDRIVER_H
+
+#include "Driver.h"
+
+class CCNDriver : public Driver {
+public:
+	CCNDriver();
+	~CCNDriver();
+
+	bool claimCounter(const Counter &counter) const;
+	void resetCounters();
+	void setupCounter(Counter &counter);
+
+	void readEvents(mxml_node_t *const);
+	int writeCounters(mxml_node_t *const root) const;
+	void writeEvents(mxml_node_t *const) const;
+
+private:
+	enum NodeType {
+		NT_UNKNOWN,
+		NT_HNF,
+		NT_RNI,
+		NT_SBAS,
+	};
+
+	NodeType *mNodeTypes;
+	int mXpCount;
+
+	// Intentionally unimplemented
+	CCNDriver(const CCNDriver &);
+	CCNDriver &operator=(const CCNDriver &);
+};
+
+#endif // CCNDRIVER_H
diff --git a/tools/gator/daemon/CPUFreqDriver.cpp b/tools/gator/daemon/CPUFreqDriver.cpp
new file mode 100644
index 000000000000..41f9d6f2b3f4
--- /dev/null
+++ b/tools/gator/daemon/CPUFreqDriver.cpp
@@ -0,0 +1,58 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "CPUFreqDriver.h"
+
+#include "Buffer.h"
+#include "DriverSource.h"
+#include "Logging.h"
+#include "SessionData.h"
+
+CPUFreqDriver::CPUFreqDriver() : mPrev() {
+}
+
+CPUFreqDriver::~CPUFreqDriver() {
+}
+
+void CPUFreqDriver::readEvents(mxml_node_t *const) {
+	// Only for use with perf
+	if (!gSessionData->perf.isSetup()) {
+		return;
+	}
+
+	setCounters(new DriverCounter(getCounters(), strdup("Linux_power_cpu_freq")));
+}
+
+void CPUFreqDriver::read(Buffer *const buffer) {
+	char buf[64];
+	const DriverCounter *const counter = getCounters();
+	if ((counter == NULL) || !counter->isEnabled()) {
+		return;
+	}
+
+	const int key = getCounters()->getKey();
+	bool resetCores = false;
+	for (int i = 0; i < gSessionData->mCores; ++i) {
+		snprintf(buf, sizeof(buf), "/sys/devices/system/cpu/cpu%i/cpufreq/cpuinfo_cur_freq", i);
+		int64_t freq;
+		if (DriverSource::readInt64Driver(buf, &freq) != 0) {
+			freq = 0;
+		}
+		if (mPrev[i] != freq) {
+			mPrev[i] = freq;
+			// Change cores
+			buffer->event64(2, i);
+			resetCores = true;
+			buffer->event64(key, 1000*freq);
+		}
+	}
+	if (resetCores) {
+		// Revert cores, UserSpaceSource is all on core 0
+		buffer->event64(2, 0);
+	}
+}
diff --git a/tools/gator/daemon/CPUFreqDriver.h b/tools/gator/daemon/CPUFreqDriver.h
new file mode 100644
index 000000000000..ad8c9aaa9e7d
--- /dev/null
+++ b/tools/gator/daemon/CPUFreqDriver.h
@@ -0,0 +1,34 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CPUFREQDRIVER_H
+#define CPUFREQDRIVER_H
+
+#include "Config.h"
+#include "Driver.h"
+
+class CPUFreqDriver : public PolledDriver {
+private:
+	typedef PolledDriver super;
+
+public:
+	CPUFreqDriver();
+	~CPUFreqDriver();
+
+	void readEvents(mxml_node_t *const root);
+	void read(Buffer *const buffer);
+
+private:
+	int64_t mPrev[NR_CPUS];
+
+	// Intentionally unimplemented
+	CPUFreqDriver(const CPUFreqDriver &);
+	CPUFreqDriver &operator=(const CPUFreqDriver &);
+};
+
+#endif // CPUFREQDRIVER_H
diff --git a/tools/gator/daemon/CapturedXML.cpp b/tools/gator/daemon/CapturedXML.cpp
index 4a11415a00c9..0b5802c893bb 100644
--- a/tools/gator/daemon/CapturedXML.cpp
+++ b/tools/gator/daemon/CapturedXML.cpp
@@ -34,6 +34,7 @@ mxml_node_t* CapturedXML::getTree(bool includeTime) {
 	mxmlElementSetAttr(captured, "version", "1");
 	if (gSessionData->perf.isSetup()) {
 		mxmlElementSetAttr(captured, "type", "Perf");
+		mxmlElementSetAttr(captured, "perf_beta", "yes");
 	}
 	mxmlElementSetAttrf(captured, "protocol", "%d", PROTOCOL_VERSION);
 	if (includeTime) { // Send the following only after the capture is complete
@@ -113,32 +114,32 @@ const char * mxmlWhitespaceCB(mxml_node_t *node, int loc) {
 	if (loc == MXML_WS_BEFORE_OPEN) {
 		// Single indentation
 		if (!strcmp(name, "target") || !strcmp(name, "counters"))
-			return("\n  ");
+			return "\n  ";
 
 		// Double indentation
 		if (!strcmp(name, "counter"))
-			return("\n    ");
+			return "\n    ";
 
 		// Avoid a carriage return on the first line of the xml file
 		if (!strncmp(name, "?xml", 4))
-			return(NULL);
+			return NULL;
 
 		// Default - no indentation
-		return("\n");
+		return "\n";
 	}
 
 	if (loc == MXML_WS_BEFORE_CLOSE) {
 		// No indentation
 		if (!strcmp(name, "captured"))
-			return("\n");
+			return "\n";
 
 		// Single indentation
 		if (!strcmp(name, "counters"))
-			return("\n  ");
+			return "\n  ";
 
 		// Default - no carriage return
-		return(NULL);
+		return NULL;
 	}
 
-	return(NULL);
+	return NULL;
 }
diff --git a/tools/gator/daemon/CapturedXML.h b/tools/gator/daemon/CapturedXML.h
index ed08c44bc3ff..b704f6e53bb5 100644
--- a/tools/gator/daemon/CapturedXML.h
+++ b/tools/gator/daemon/CapturedXML.h
@@ -6,8 +6,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef	__CAPTURED_XML_H__
-#define	__CAPTURED_XML_H__
+#ifndef __CAPTURED_XML_H__
+#define __CAPTURED_XML_H__
 
 #include "mxml/mxml.h"
 
diff --git a/tools/gator/daemon/Child.cpp b/tools/gator/daemon/Child.cpp
index 1901ecc6a724..6b5bbb3bf6af 100644
--- a/tools/gator/daemon/Child.cpp
+++ b/tools/gator/daemon/Child.cpp
@@ -14,25 +14,28 @@
 #include <unistd.h>
 #include <sys/prctl.h>
 
-#include "Logging.h"
 #include "CapturedXML.h"
-#include "SessionData.h"
-#include "LocalCapture.h"
-#include "Sender.h"
-#include "OlyUtility.h"
-#include "OlySocket.h"
-#include "StreamlineSetup.h"
+#include "Command.h"
 #include "ConfigurationXML.h"
 #include "Driver.h"
-#include "PerfSource.h"
 #include "DriverSource.h"
 #include "ExternalSource.h"
+#include "FtraceSource.h"
+#include "LocalCapture.h"
+#include "Logging.h"
+#include "OlySocket.h"
+#include "OlyUtility.h"
+#include "PerfSource.h"
+#include "Sender.h"
+#include "SessionData.h"
+#include "StreamlineSetup.h"
 #include "UserSpaceSource.h"
 
 static sem_t haltPipeline, senderThreadStarted, startProfile, senderSem; // Shared by Child and spawned threads
 static Source *primarySource = NULL;
 static Source *externalSource = NULL;
 static Source *userSpaceSource = NULL;
+static Source *ftraceSource = NULL;
 static Sender* sender = NULL;        // Shared by Child.cpp and spawned threads
 Child* child = NULL;                 // shared by Child.cpp and main.cpp
 
@@ -149,7 +152,8 @@ static void *senderThread(void *) {
 
 	while (!primarySource->isDone() ||
 	       !externalSource->isDone() ||
-	       (userSpaceSource != NULL && !userSpaceSource->isDone())) {
+	       (userSpaceSource != NULL && !userSpaceSource->isDone()) ||
+	       (ftraceSource != NULL && !ftraceSource->isDone())) {
 		sem_wait(&senderSem);
 
 		primarySource->write(sender);
@@ -157,6 +161,9 @@ static void *senderThread(void *) {
 		if (userSpaceSource != NULL) {
 			userSpaceSource->write(sender);
 		}
+		if (ftraceSource != NULL) {
+			ftraceSource->write(sender);
+		}
 	}
 
 	// write end-of-capture sequence
@@ -206,6 +213,9 @@ void Child::endSession() {
 	if (userSpaceSource != NULL) {
 		userSpaceSource->interrupt();
 	}
+	if (ftraceSource != NULL) {
+		ftraceSource->interrupt();
+	}
 	sem_post(&haltPipeline);
 }
 
@@ -269,15 +279,32 @@ void Child::run() {
 		free(xmlString);
 	}
 
+	if (gSessionData->kmod.isMaliCapture() && (gSessionData->mSampleRate == 0)) {
+		logg->logError(__FILE__, __LINE__, "Mali counters are not supported with Sample Rate: None.");
+		handleException();
+	}
+
 	// Must be after session XML is parsed
 	if (!primarySource->prepare()) {
-		logg->logError(__FILE__, __LINE__, "Unable to prepare for capture");
+		if (gSessionData->perf.isSetup()) {
+			logg->logError(__FILE__, __LINE__, "Unable to prepare gator driver for capture");
+		} else {
+			logg->logError(__FILE__, __LINE__, "Unable to communicate with the perf API, please ensure that CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER are enabled. Please refer to README_Streamline.txt for more information.");
+		}
 		handleException();
 	}
 
 	// Sender thread shall be halted until it is signaled for one shot mode
 	sem_init(&haltPipeline, 0, gSessionData->mOneShot ? 0 : 2);
 
+	// Must be initialized before senderThread is started as senderThread checks externalSource
+	externalSource = new ExternalSource(&senderSem);
+	if (!externalSource->prepare()) {
+		logg->logError(__FILE__, __LINE__, "Unable to prepare external source for capture");
+		handleException();
+	}
+	externalSource->start();
+
 	// Create the duration, stop, and sender threads
 	bool thread_creation_success = true;
 	if (gSessionData->mDuration > 0 && pthread_create(&durationThreadID, NULL, durationThread, NULL)) {
@@ -288,22 +315,37 @@ void Child::run() {
 		thread_creation_success = false;
 	}
 
-	externalSource = new ExternalSource(&senderSem);
-	if (!externalSource->prepare()) {
-		logg->logError(__FILE__, __LINE__, "Unable to prepare for capture");
-		handleException();
+	bool startUSSource = false;
+	for (int i = 0; i < ARRAY_LENGTH(gSessionData->usDrivers); ++i) {
+		if (gSessionData->usDrivers[i]->countersEnabled()) {
+			startUSSource = true;
+		}
 	}
-	externalSource->start();
-
-	if (gSessionData->hwmon.countersEnabled() || gSessionData->fsDriver.countersEnabled()) {
+	if (startUSSource) {
 		userSpaceSource = new UserSpaceSource(&senderSem);
 		if (!userSpaceSource->prepare()) {
-			logg->logError(__FILE__, __LINE__, "Unable to prepare for capture");
+			logg->logError(__FILE__, __LINE__, "Unable to prepare userspace source for capture");
 			handleException();
 		}
 		userSpaceSource->start();
 	}
 
+	if (gSessionData->ftraceDriver.countersEnabled()) {
+		ftraceSource = new FtraceSource(&senderSem);
+		if (!ftraceSource->prepare()) {
+			logg->logError(__FILE__, __LINE__, "Unable to prepare userspace source for capture");
+			handleException();
+		}
+		ftraceSource->start();
+	}
+
+	if (gSessionData->mAllowCommands && (gSessionData->mCaptureCommand != NULL)) {
+		pthread_t thread;
+		if (pthread_create(&thread, NULL, commandThread, NULL)) {
+			thread_creation_success = false;
+		}
+	}
+
 	if (!thread_creation_success) {
 		logg->logError(__FILE__, __LINE__, "Failed to create gator threads");
 		handleException();
@@ -315,6 +357,9 @@ void Child::run() {
 	// Start profiling
 	primarySource->run();
 
+	if (ftraceSource != NULL) {
+		ftraceSource->join();
+	}
 	if (userSpaceSource != NULL) {
 		userSpaceSource->join();
 	}
@@ -338,6 +383,7 @@ void Child::run() {
 
 	logg->logMessage("Profiling ended.");
 
+	delete ftraceSource;
 	delete userSpaceSource;
 	delete externalSource;
 	delete primarySource;
diff --git a/tools/gator/daemon/Child.h b/tools/gator/daemon/Child.h
index a306a7760819..cc78202ceb5c 100644
--- a/tools/gator/daemon/Child.h
+++ b/tools/gator/daemon/Child.h
@@ -6,8 +6,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef	__CHILD_H__
-#define	__CHILD_H__
+#ifndef __CHILD_H__
+#define __CHILD_H__
 
 class OlySocket;
 
diff --git a/tools/gator/daemon/Command.cpp b/tools/gator/daemon/Command.cpp
new file mode 100644
index 000000000000..28d73cf5a905
--- /dev/null
+++ b/tools/gator/daemon/Command.cpp
@@ -0,0 +1,172 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Command.h"
+
+#include <fcntl.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "Logging.h"
+#include "SessionData.h"
+
+static int getUid(const char *const name, char *const shPath, const char *const tmpDir) {
+	// Lookups may fail when using a different libc or a statically compiled executable
+	char gatorTemp[32];
+	snprintf(gatorTemp, sizeof(gatorTemp), "%s/gator_temp", tmpDir);
+
+	const int fd = open(gatorTemp, 600, O_CREAT | O_CLOEXEC);
+	if (fd < 0) {
+		return -1;
+	}
+	close(fd);
+
+	char cmd[128];
+	snprintf(cmd, sizeof(cmd), "chown %s %s || rm %s", name, gatorTemp, gatorTemp);
+
+	const int pid = fork();
+	if (pid < 0) {
+		logg->logError(__FILE__, __LINE__, "fork failed");
+		handleException();
+	}
+	if (pid == 0) {
+		char cargv1[] = "-c";
+		char *cargv[] = {
+			shPath,
+			cargv1,
+			cmd,
+			NULL,
+		};
+
+		execv(cargv[0], cargv);
+		exit(-1);
+	}
+	while ((waitpid(pid, NULL, 0) < 0) && (errno == EINTR));
+
+	struct stat st;
+	int result = -1;
+	if (stat(gatorTemp, &st) == 0) {
+		result = st.st_uid;
+	}
+	unlink(gatorTemp);
+	return result;
+}
+
+static int getUid(const char *const name) {
+	// Look up the username
+	struct passwd *const user = getpwnam(name);
+	if (user != NULL) {
+		return user->pw_uid;
+	}
+
+
+	// Are we on Linux
+	char cargv0l[] = "/bin/sh";
+	if ((access(cargv0l, X_OK) == 0) && (access("/tmp", W_OK) == 0)) {
+		return getUid(name, cargv0l, "/tmp");
+	}
+
+	// Are we on android
+	char cargv0a[] = "/system/bin/sh";
+	if ((access(cargv0a, X_OK) == 0) && (access("/data", W_OK) == 0)) {
+		return getUid(name, cargv0a, "/data");
+	}
+
+	return -1;
+}
+
+void *commandThread(void *) {
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-command", 0, 0, 0);
+
+	const char *const name = gSessionData->mCaptureUser == NULL ? "nobody" : gSessionData->mCaptureUser;
+	const int uid = getUid(name);
+	if (uid < 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to lookup the user %s, please double check that the user exists", name);
+		handleException();
+	}
+
+	sleep(3);
+
+	char buf[128];
+	int pipefd[2];
+	if (pipe_cloexec(pipefd) != 0) {
+		logg->logError(__FILE__, __LINE__, "pipe failed");
+		handleException();
+	}
+
+	const int pid = fork();
+	if (pid < 0) {
+		logg->logError(__FILE__, __LINE__, "fork failed");
+		handleException();
+	}
+	if (pid == 0) {
+		char cargv0l[] = "/bin/sh";
+		char cargv0a[] = "/system/bin/sh";
+		char cargv1[] = "-c";
+		char *cargv[] = {
+			cargv0l,
+			cargv1,
+			gSessionData->mCaptureCommand,
+			NULL,
+		};
+
+		buf[0] = '\0';
+		close(pipefd[0]);
+
+		// Gator runs at a high priority, reset the priority to the default
+		if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), 0) == -1) {
+			snprintf(buf, sizeof(buf), "setpriority failed");
+			goto fail_exit;
+		}
+
+		if (setuid(uid) != 0) {
+			snprintf(buf, sizeof(buf), "setuid failed");
+			goto fail_exit;
+		}
+
+		{
+			const char *const path = gSessionData->mCaptureWorkingDir == NULL ? "/" : gSessionData->mCaptureWorkingDir;
+			if (chdir(path) != 0) {
+				snprintf(buf, sizeof(buf), "Unable to cd to %s, please verify the directory exists and is accessable to %s", path, name);
+				goto fail_exit;
+			}
+		}
+
+		execv(cargv[0], cargv);
+		cargv[0] = cargv0a;
+		execv(cargv[0], cargv);
+		snprintf(buf, sizeof(buf), "execv failed");
+
+	fail_exit:
+		if (buf[0] != '\0') {
+			const ssize_t bytes = write(pipefd[1], buf, sizeof(buf));
+			// Can't do anything if this fails
+			(void)bytes;
+		}
+
+		exit(-1);
+	}
+
+	close(pipefd[1]);
+	const ssize_t bytes = read(pipefd[0], buf, sizeof(buf));
+	if (bytes > 0) {
+		logg->logError(__FILE__, __LINE__, buf);
+		handleException();
+	}
+	close(pipefd[0]);
+
+	return NULL;
+}
diff --git a/tools/gator/daemon/Command.h b/tools/gator/daemon/Command.h
new file mode 100644
index 000000000000..17244b7aaebc
--- /dev/null
+++ b/tools/gator/daemon/Command.h
@@ -0,0 +1,14 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef COMMAND_H
+#define COMMAND_H
+
+void *commandThread(void *);
+
+#endif // COMMAND_H
diff --git a/tools/gator/daemon/Config.h b/tools/gator/daemon/Config.h
index 6f5e2aae50e1..bee383a1c797 100644
--- a/tools/gator/daemon/Config.h
+++ b/tools/gator/daemon/Config.h
@@ -10,8 +10,19 @@
 #define CONFIG_H
 
 #define ARRAY_LENGTH(A) static_cast<int>(sizeof(A)/sizeof((A)[0]))
+#define ACCESS_ONCE(x) (*(volatile typeof(x)*)&(x))
 
 #define MAX_PERFORMANCE_COUNTERS 50
-#define NR_CPUS 16
+#define NR_CPUS 32
+
+template<typename T>
+static inline T min(const T a, const T b) {
+	return (a < b ? a : b);
+}
+
+template<typename T>
+static inline T max(const T a, const T b) {
+	return (a > b ? a : b);
+}
 
 #endif // CONFIG_H
diff --git a/tools/gator/daemon/DiskIODriver.cpp b/tools/gator/daemon/DiskIODriver.cpp
new file mode 100644
index 000000000000..5deb0f375f3a
--- /dev/null
+++ b/tools/gator/daemon/DiskIODriver.cpp
@@ -0,0 +1,125 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+// Define to get format macros from inttypes.h
+#define __STDC_FORMAT_MACROS
+
+#include "DiskIODriver.h"
+
+#include <inttypes.h>
+
+#include "Logging.h"
+#include "SessionData.h"
+
+class DiskIOCounter : public DriverCounter {
+public:
+	DiskIOCounter(DriverCounter *next, char *const name, int64_t *const value);
+	~DiskIOCounter();
+
+	int64_t read();
+
+private:
+	int64_t *const mValue;
+	int64_t mPrev;
+
+	// Intentionally unimplemented
+	DiskIOCounter(const DiskIOCounter &);
+	DiskIOCounter &operator=(const DiskIOCounter &);
+};
+
+DiskIOCounter::DiskIOCounter(DriverCounter *next, char *const name, int64_t *const value) : DriverCounter(next, name), mValue(value), mPrev(0) {
+}
+
+DiskIOCounter::~DiskIOCounter() {
+}
+
+int64_t DiskIOCounter::read() {
+	int64_t result = *mValue - mPrev;
+	mPrev = *mValue;
+	// Kernel assumes a sector is 512 bytes
+	return result << 9;
+}
+
+DiskIODriver::DiskIODriver() : mBuf(), mReadBytes(0), mWriteBytes(0) {
+}
+
+DiskIODriver::~DiskIODriver() {
+}
+
+void DiskIODriver::readEvents(mxml_node_t *const) {
+	// Only for use with perf
+	if (!gSessionData->perf.isSetup()) {
+		return;
+	}
+
+	setCounters(new DiskIOCounter(getCounters(), strdup("Linux_block_rq_rd"), &mReadBytes));
+	setCounters(new DiskIOCounter(getCounters(), strdup("Linux_block_rq_wr"), &mWriteBytes));
+}
+
+void DiskIODriver::doRead() {
+	if (!countersEnabled()) {
+		return;
+	}
+
+	if (!mBuf.read("/proc/diskstats")) {
+		logg->logError(__FILE__, __LINE__, "Unable to read /proc/diskstats");
+		handleException();
+	}
+
+	mReadBytes = 0;
+	mWriteBytes = 0;
+
+	char *lastName = NULL;
+	int lastNameLen = -1;
+	char *start = mBuf.getBuf();
+	while (*start != '\0') {
+		char *end = strchr(start, '\n');
+		if (end != NULL) {
+			*end = '\0';
+		}
+
+		int nameStart = -1;
+		int nameEnd = -1;
+		int64_t readBytes = -1;
+		int64_t writeBytes = -1;
+		const int count = sscanf(start, "%*d %*d %n%*s%n %*u %*u %" SCNu64 " %*u %*u %*u %" SCNu64, &nameStart, &nameEnd, &readBytes, &writeBytes);
+		if (count != 2) {
+			logg->logError(__FILE__, __LINE__, "Unable to parse /proc/diskstats");
+			handleException();
+		}
+
+		// Skip partitions which are identified if the name is a substring of the last non-partition
+		if ((lastName == NULL) || (strncmp(lastName, start + nameStart, lastNameLen) != 0)) {
+			lastName = start + nameStart;
+			lastNameLen = nameEnd - nameStart;
+			mReadBytes += readBytes;
+			mWriteBytes += writeBytes;
+		}
+
+		if (end == NULL) {
+			break;
+		}
+		start = end + 1;
+	}
+}
+
+void DiskIODriver::start() {
+	doRead();
+	// Initialize previous values
+	for (DriverCounter *counter = getCounters(); counter != NULL; counter = counter->getNext()) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		counter->read();
+	}
+}
+
+void DiskIODriver::read(Buffer *const buffer) {
+	doRead();
+	super::read(buffer);
+}
diff --git a/tools/gator/daemon/DiskIODriver.h b/tools/gator/daemon/DiskIODriver.h
new file mode 100644
index 000000000000..d0db18c77d04
--- /dev/null
+++ b/tools/gator/daemon/DiskIODriver.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef DISKIODRIVER_H
+#define DISKIODRIVER_H
+
+#include "Driver.h"
+#include "DynBuf.h"
+
+class DiskIODriver : public PolledDriver {
+private:
+	typedef PolledDriver super;
+
+public:
+	DiskIODriver();
+	~DiskIODriver();
+
+	void readEvents(mxml_node_t *const root);
+	void start();
+	void read(Buffer *const buffer);
+
+private:
+	void doRead();
+
+	DynBuf mBuf;
+	int64_t mReadBytes;
+	int64_t mWriteBytes;
+
+	// Intentionally unimplemented
+	DiskIODriver(const DiskIODriver &);
+	DiskIODriver &operator=(const DiskIODriver &);
+};
+
+#endif // DISKIODRIVER_H
diff --git a/tools/gator/daemon/Driver.cpp b/tools/gator/daemon/Driver.cpp
index 09e040162912..275da31c7a0d 100644
--- a/tools/gator/daemon/Driver.cpp
+++ b/tools/gator/daemon/Driver.cpp
@@ -8,8 +8,89 @@
 
 #include "Driver.h"
 
+#include "Buffer.h"
+#include "SessionData.h"
+
+DriverCounter::DriverCounter(DriverCounter *const next, const char *const name) : mNext(next), mName(name), mKey(getEventKey()), mEnabled(false) {
+}
+
+DriverCounter::~DriverCounter() {
+	delete mName;
+}
+
 Driver *Driver::head = NULL;
 
 Driver::Driver() : next(head) {
 	head = this;
 }
+
+SimpleDriver::~SimpleDriver() {
+	DriverCounter *counters = mCounters;
+	while (counters != NULL) {
+		DriverCounter *counter = counters;
+		counters = counter->getNext();
+		delete counter;
+	}
+}
+
+DriverCounter *SimpleDriver::findCounter(const Counter &counter) const {
+	for (DriverCounter *driverCounter = mCounters; driverCounter != NULL; driverCounter = driverCounter->getNext()) {
+		if (strcmp(driverCounter->getName(), counter.getType()) == 0) {
+			return driverCounter;
+		}
+	}
+
+	return NULL;
+}
+
+bool SimpleDriver::claimCounter(const Counter &counter) const {
+	return findCounter(counter) != NULL;
+}
+
+bool SimpleDriver::countersEnabled() const {
+	for (DriverCounter *counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		if (counter->isEnabled()) {
+			return true;
+		}
+	}
+	return false;
+}
+
+void SimpleDriver::resetCounters() {
+	for (DriverCounter *counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		counter->setEnabled(false);
+	}
+}
+
+void SimpleDriver::setupCounter(Counter &counter) {
+	DriverCounter *const driverCounter = findCounter(counter);
+	if (driverCounter == NULL) {
+		counter.setEnabled(false);
+		return;
+	}
+	driverCounter->setEnabled(true);
+	counter.setKey(driverCounter->getKey());
+}
+
+int SimpleDriver::writeCounters(mxml_node_t *root) const {
+	int count = 0;
+	for (DriverCounter *counter = mCounters; counter != NULL; counter = counter->getNext()) {
+		mxml_node_t *node = mxmlNewElement(root, "counter");
+		mxmlElementSetAttr(node, "name", counter->getName());
+		++count;
+	}
+
+	return count;
+}
+
+PolledDriver::~PolledDriver() {
+}
+
+void PolledDriver::read(Buffer *const buffer) {
+	for (DriverCounter *counter = getCounters(); counter != NULL; counter = counter->getNext()) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		buffer->event64(counter->getKey(), counter->read());
+	}
+}
diff --git a/tools/gator/daemon/Driver.h b/tools/gator/daemon/Driver.h
index e5ed7b6c1295..72870e3dbca1 100644
--- a/tools/gator/daemon/Driver.h
+++ b/tools/gator/daemon/Driver.h
@@ -9,10 +9,36 @@
 #ifndef DRIVER_H
 #define DRIVER_H
 
+#include <stdint.h>
+
 #include "mxml/mxml.h"
 
+class Buffer;
 class Counter;
 
+class DriverCounter {
+public:
+	DriverCounter(DriverCounter *const next, const char *const name);
+	virtual ~DriverCounter();
+
+	DriverCounter *getNext() const { return mNext; }
+	const char *getName() const { return mName; }
+	int getKey() const { return mKey; }
+	bool isEnabled() const { return mEnabled; }
+	void setEnabled(const bool enabled) { mEnabled = enabled; }
+	virtual int64_t read() { return -1; }
+
+private:
+	DriverCounter *const mNext;
+	const char *const mName;
+	const int mKey;
+	bool mEnabled;
+
+	// Intentionally unimplemented
+	DriverCounter(const DriverCounter &);
+	DriverCounter &operator=(const DriverCounter &);
+};
+
 class Driver {
 public:
 	static Driver *getHead() { return head; }
@@ -26,15 +52,17 @@ class Driver {
 	// Enables and prepares the counter for capture
 	virtual void setupCounter(Counter &counter) = 0;
 
+	// Performs any actions needed for setup or based on eventsXML
+	virtual void readEvents(mxml_node_t *const) {}
 	// Emits available counters
-	virtual int writeCounters(mxml_node_t *root) const = 0;
+	virtual int writeCounters(mxml_node_t *const root) const = 0;
 	// Emits possible dynamically generated events/counters
-	virtual void writeEvents(mxml_node_t *) const {}
+	virtual void writeEvents(mxml_node_t *const) const {}
 
 	Driver *getNext() const { return next; }
 
 protected:
-	Driver ();
+	Driver();
 
 private:
 	static Driver *head;
@@ -45,4 +73,46 @@ class Driver {
 	Driver &operator=(const Driver &);
 };
 
+class SimpleDriver : public Driver {
+public:
+	virtual ~SimpleDriver();
+
+	bool claimCounter(const Counter &counter) const;
+	bool countersEnabled() const;
+	void resetCounters();
+	void setupCounter(Counter &counter);
+	int writeCounters(mxml_node_t *root) const;
+
+protected:
+	SimpleDriver() : mCounters(NULL) {}
+
+	DriverCounter *getCounters() const { return mCounters; }
+	void setCounters(DriverCounter *const counter) { mCounters = counter; }
+
+	DriverCounter *findCounter(const Counter &counter) const;
+
+private:
+	DriverCounter *mCounters;
+
+	// Intentionally unimplemented
+	SimpleDriver(const SimpleDriver &);
+	SimpleDriver &operator=(const SimpleDriver &);
+};
+
+class PolledDriver : public SimpleDriver {
+public:
+	virtual ~PolledDriver();
+
+	virtual void start() {}
+	virtual void read(Buffer *const buffer);
+
+protected:
+	PolledDriver() {}
+
+private:
+	// Intentionally unimplemented
+	PolledDriver(const PolledDriver &);
+	PolledDriver &operator=(const PolledDriver &);
+};
+
 #endif // DRIVER_H
diff --git a/tools/gator/daemon/DriverSource.cpp b/tools/gator/daemon/DriverSource.cpp
index 11d3095ef6d2..7f299b646952 100644
--- a/tools/gator/daemon/DriverSource.cpp
+++ b/tools/gator/daemon/DriverSource.cpp
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 
+// Define to get format macros from inttypes.h
 #define __STDC_FORMAT_MACROS
 
 #include "DriverSource.h"
@@ -93,19 +94,19 @@ bool DriverSource::prepare() {
 }
 
 void DriverSource::bootstrapThread() {
-	prctl(PR_SET_NAME, (unsigned long)&"gatord-bootstrap", 0, 0, 0);
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-proc", 0, 0, 0);
 
 	DynBuf printb;
 	DynBuf b1;
 	DynBuf b2;
-	DynBuf b3;
+	const uint64_t currTime = getTime();
 
-	if (!readProc(mBuffer, false, &printb, &b1, &b2, &b3)) {
-		logg->logMessage("%s(%s:%i): readProc failed", __FUNCTION__, __FILE__, __LINE__);
+	if (!readProcComms(currTime, mBuffer, &printb, &b1, &b2)) {
+		logg->logError(__FILE__, __LINE__, "readProcComms failed");
 		handleException();
 	}
 
-	mBuffer->commit(1);
+	mBuffer->commit(currTime);
 	mBuffer->setDone();
 }
 
@@ -128,7 +129,7 @@ void DriverSource::run() {
 	}
 
 	// open the buffer which calls userspace_buffer_open() in the driver
-	mBufferFD = open("/dev/gator/buffer", O_RDONLY);
+	mBufferFD = open("/dev/gator/buffer", O_RDONLY | O_CLOEXEC);
 	if (mBufferFD < 0) {
 		logg->logError(__FILE__, __LINE__, "The gator driver did not set up properly. Please view the linux console or dmesg log for more information on the failure.");
 		handleException();
@@ -232,7 +233,7 @@ void DriverSource::write(Sender *sender) {
 
 int DriverSource::readIntDriver(const char *fullpath, int *value) {
 	char data[40]; // Sufficiently large to hold any integer
-	const int fd = open(fullpath, O_RDONLY);
+	const int fd = open(fullpath, O_RDONLY | O_CLOEXEC);
 	if (fd < 0) {
 		return -1;
 	}
@@ -257,7 +258,7 @@ int DriverSource::readIntDriver(const char *fullpath, int *value) {
 
 int DriverSource::readInt64Driver(const char *fullpath, int64_t *value) {
 	char data[40]; // Sufficiently large to hold any integer
-	const int fd = open(fullpath, O_RDONLY);
+	const int fd = open(fullpath, O_RDONLY | O_CLOEXEC);
 	if (fd < 0) {
 		return -1;
 	}
@@ -281,7 +282,7 @@ int DriverSource::readInt64Driver(const char *fullpath, int64_t *value) {
 }
 
 int DriverSource::writeDriver(const char *fullpath, const char *data) {
-	int fd = open(fullpath, O_WRONLY);
+	int fd = open(fullpath, O_WRONLY | O_CLOEXEC);
 	if (fd < 0) {
 		return -1;
 	}
diff --git a/tools/gator/daemon/DynBuf.cpp b/tools/gator/daemon/DynBuf.cpp
index 6f92b336ae19..df20713ad63c 100644
--- a/tools/gator/daemon/DynBuf.cpp
+++ b/tools/gator/daemon/DynBuf.cpp
@@ -40,7 +40,7 @@ int DynBuf::resize(const size_t minCapacity) {
 bool DynBuf::read(const char *const path) {
 	int result = false;
 
-	const int fd = open(path, O_RDONLY);
+	const int fd = open(path, O_RDONLY | O_CLOEXEC);
 	if (fd < 0) {
 		logg->logMessage("%s(%s:%i): open failed", __FUNCTION__, __FILE__, __LINE__);
 		return false;
diff --git a/tools/gator/daemon/EventsXML.cpp b/tools/gator/daemon/EventsXML.cpp
index cf0192ef671f..d905bbabe988 100644
--- a/tools/gator/daemon/EventsXML.cpp
+++ b/tools/gator/daemon/EventsXML.cpp
@@ -47,7 +47,7 @@ char *EventsXML::getXML() {
 	// Add dynamic events from the drivers
 	mxml_node_t *events = mxmlFindElement(xml, xml, "events", NULL, NULL, MXML_DESCEND);
 	if (!events) {
-		logg->logMessage("Unable to find <events> node in the events.xml");
+		logg->logError(__FILE__, __LINE__, "Unable to find <events> node in the events.xml");
 		handleException();
 	}
 	for (Driver *driver = Driver::getHead(); driver != NULL; driver = driver->getNext()) {
diff --git a/tools/gator/daemon/ExternalSource.cpp b/tools/gator/daemon/ExternalSource.cpp
index b6ec301d0c08..8f5e6b684c53 100644
--- a/tools/gator/daemon/ExternalSource.cpp
+++ b/tools/gator/daemon/ExternalSource.cpp
@@ -19,6 +19,9 @@
 static const char MALI_VIDEO[] = "\0mali-video";
 static const char MALI_VIDEO_STARTUP[] = "\0mali-video-startup";
 static const char MALI_VIDEO_V1[] = "MALI_VIDEO 1\n";
+static const char MALI_GRAPHICS[] = "\0mali_thirdparty_server";
+static const char MALI_GRAPHICS_STARTUP[] = "\0mali_thirdparty_client";
+static const char MALI_GRAPHICS_V1[] = "MALI_GRAPHICS 1\n";
 
 static bool setNonblock(const int fd) {
 	int flags;
@@ -37,16 +40,15 @@ static bool setNonblock(const int fd) {
 	return true;
 }
 
-ExternalSource::ExternalSource(sem_t *senderSem) : mBuffer(0, FRAME_EXTERNAL, 128*1024, senderSem), mMonitor(), mMveStartupUds(MALI_VIDEO_STARTUP, sizeof(MALI_VIDEO_STARTUP)), mInterruptFd(-1), mMveUds(-1) {
+ExternalSource::ExternalSource(sem_t *senderSem) : mBuffer(0, FRAME_EXTERNAL, 128*1024, senderSem), mMonitor(), mMveStartupUds(MALI_VIDEO_STARTUP, sizeof(MALI_VIDEO_STARTUP)), mMaliStartupUds(MALI_GRAPHICS_STARTUP, sizeof(MALI_GRAPHICS_STARTUP)), mAnnotate(8083), mInterruptFd(-1), mMaliUds(-1), mMveUds(-1) {
 	sem_init(&mBufferSem, 0, 0);
 }
 
 ExternalSource::~ExternalSource() {
 }
 
-void ExternalSource::waitFor(const uint64_t currTime, const int bytes) {
+void ExternalSource::waitFor(const int bytes) {
 	while (mBuffer.bytesAvailable() <= bytes) {
-		mBuffer.check(currTime);
 		sem_wait(&mBufferSem);
 	}
 }
@@ -63,11 +65,21 @@ void ExternalSource::configureConnection(const int fd, const char *const handsha
 	}
 
 	// Write the handshake to the circular buffer
-	waitFor(1, Buffer::MAXSIZE_PACK32 + 4 + size - 1);
+	waitFor(Buffer::MAXSIZE_PACK32 + size - 1);
 	mBuffer.packInt(fd);
-	mBuffer.writeLEInt((unsigned char *)mBuffer.getWritePos(), size - 1);
-	mBuffer.advanceWrite(4);
 	mBuffer.writeBytes(handshake, size - 1);
+	mBuffer.commit(1);
+}
+
+bool ExternalSource::connectMali() {
+	mMaliUds = OlySocket::connect(MALI_GRAPHICS, sizeof(MALI_GRAPHICS));
+	if (mMaliUds < 0) {
+		return false;
+	}
+
+	configureConnection(mMaliUds, MALI_GRAPHICS_V1, sizeof(MALI_GRAPHICS_V1));
+
+	return true;
 }
 
 bool ExternalSource::connectMve() {
@@ -90,10 +102,15 @@ bool ExternalSource::connectMve() {
 }
 
 bool ExternalSource::prepare() {
-	if (!mMonitor.init() || !setNonblock(mMveStartupUds.getFd()) || !mMonitor.add(mMveStartupUds.getFd())) {
+	if (!mMonitor.init() ||
+			!setNonblock(mMveStartupUds.getFd()) || !mMonitor.add(mMveStartupUds.getFd()) ||
+			!setNonblock(mMaliStartupUds.getFd()) || !mMonitor.add(mMaliStartupUds.getFd()) ||
+			!setNonblock(mAnnotate.getFd()) || !mMonitor.add(mAnnotate.getFd()) ||
+			false) {
 		return false;
 	}
 
+	connectMali();
 	connectMve();
 
 	return true;
@@ -104,7 +121,7 @@ void ExternalSource::run() {
 
 	prctl(PR_SET_NAME, (unsigned long)&"gatord-external", 0, 0, 0);
 
-	if (pipe(pipefd) != 0) {
+	if (pipe_cloexec(pipefd) != 0) {
 		logg->logError(__FILE__, __LINE__, "pipe failed");
 		handleException();
 	}
@@ -115,6 +132,9 @@ void ExternalSource::run() {
 		handleException();
 	}
 
+	// Notify annotate clients to retry connecting to gatord
+	gSessionData->annotateListener.signal();
+
 	while (gSessionData->mSessionIsActive) {
 		struct epoll_event events[16];
 		// Clear any pending sem posts
@@ -138,36 +158,60 @@ void ExternalSource::run() {
 					logg->logError(__FILE__, __LINE__, "Unable to configure incoming Mali video connection");
 					handleException();
 				}
+			} else if (fd == mMaliStartupUds.getFd()) {
+				// Mali Graphics says it's alive
+				int client = mMaliStartupUds.acceptConnection();
+				// Don't read from this connection, establish a new connection to Mali Graphics
+				close(client);
+				if (!connectMali()) {
+					logg->logError(__FILE__, __LINE__, "Unable to configure incoming Mali graphics connection");
+					handleException();
+				}
+			} else if (fd == mAnnotate.getFd()) {
+				int client = mAnnotate.acceptConnection();
+				if (!setNonblock(client) || !mMonitor.add(client)) {
+					logg->logError(__FILE__, __LINE__, "Unable to set socket options on incoming annotation connection");
+					handleException();
+				}
 			} else if (fd == pipefd[0]) {
 				// Means interrupt has been called and mSessionIsActive should be reread
 			} else {
-				while (true) {
-					waitFor(currTime, Buffer::MAXSIZE_PACK32 + 4);
-
+				/* This can result in some starvation if there are multiple
+				 * threads which are annotating heavily, but it is not
+				 * recommended that threads annotate that much as it can also
+				 * starve out the gator data.
+				 */
+				while (gSessionData->mSessionIsActive) {
+					// Wait until there is enough room for the fd, two headers and two ints
+					waitFor(7*Buffer::MAXSIZE_PACK32 + 2*sizeof(uint32_t));
 					mBuffer.packInt(fd);
-					char *const bytesPos = mBuffer.getWritePos();
-					mBuffer.advanceWrite(4);
 					const int contiguous = mBuffer.contiguousSpaceAvailable();
 					const int bytes = read(fd, mBuffer.getWritePos(), contiguous);
 					if (bytes < 0) {
 						if (errno == EAGAIN) {
-							// Nothing left to read, and Buffer convention dictates that writePos can't go backwards
-							mBuffer.writeLEInt((unsigned char *)bytesPos, 0);
+							// Nothing left to read
+							mBuffer.commit(currTime);
 							break;
 						}
 						// Something else failed, close the socket
-						mBuffer.writeLEInt((unsigned char *)bytesPos, -1);
+						mBuffer.commit(currTime);
+						mBuffer.packInt(-1);
+						mBuffer.packInt(fd);
+						mBuffer.commit(currTime);
 						close(fd);
 						break;
 					} else if (bytes == 0) {
 						// The other side is closed
-						mBuffer.writeLEInt((unsigned char *)bytesPos, -1);
+						mBuffer.commit(currTime);
+						mBuffer.packInt(-1);
+						mBuffer.packInt(fd);
+						mBuffer.commit(currTime);
 						close(fd);
 						break;
 					}
 
-					mBuffer.writeLEInt((unsigned char *)bytesPos, bytes);
 					mBuffer.advanceWrite(bytes);
+					mBuffer.commit(currTime);
 
 					// Short reads also mean nothing is left to read
 					if (bytes < contiguous) {
@@ -176,13 +220,14 @@ void ExternalSource::run() {
 				}
 			}
 		}
-
-		// Only call mBufferCheck once per iteration
-		mBuffer.check(currTime);
 	}
 
 	mBuffer.setDone();
 
+	if (mMveUds >= 0) {
+		gSessionData->maliVideo.stop(mMveUds);
+	}
+
 	mInterruptFd = -1;
 	close(pipefd[0]);
 	close(pipefd[1]);
diff --git a/tools/gator/daemon/ExternalSource.h b/tools/gator/daemon/ExternalSource.h
index 2e7ed27df255..919e75e8a41a 100644
--- a/tools/gator/daemon/ExternalSource.h
+++ b/tools/gator/daemon/ExternalSource.h
@@ -16,7 +16,7 @@
 #include "OlySocket.h"
 #include "Source.h"
 
-// Unix domain socket counters from external sources like graphics drivers
+// Counters from external sources like graphics drivers and annotations
 class ExternalSource : public Source {
 public:
 	ExternalSource(sem_t *senderSem);
@@ -30,15 +30,19 @@ class ExternalSource : public Source {
 	void write(Sender *sender);
 
 private:
-	void waitFor(const uint64_t currTime, const int bytes);
+	void waitFor(const int bytes);
 	void configureConnection(const int fd, const char *const handshake, size_t size);
+	bool connectMali();
 	bool connectMve();
 
 	sem_t mBufferSem;
 	Buffer mBuffer;
 	Monitor mMonitor;
 	OlyServerSocket mMveStartupUds;
+	OlyServerSocket mMaliStartupUds;
+	OlyServerSocket mAnnotate;
 	int mInterruptFd;
+	int mMaliUds;
 	int mMveUds;
 
 	// Intentionally unimplemented
diff --git a/tools/gator/daemon/FSDriver.cpp b/tools/gator/daemon/FSDriver.cpp
index 40c8df1af222..dd8eb804dc99 100644
--- a/tools/gator/daemon/FSDriver.cpp
+++ b/tools/gator/daemon/FSDriver.cpp
@@ -14,43 +14,34 @@
 #include <sys/types.h>
 #include <unistd.h>
 
-#include "Buffer.h"
-#include "Counter.h"
 #include "DriverSource.h"
 #include "Logging.h"
-#include "SessionData.h"
 
-class FSCounter {
+class FSCounter : public DriverCounter {
 public:
-	FSCounter(FSCounter *next, char *name, const char *regex);
+	FSCounter(DriverCounter *next, char *name, char *path, const char *regex);
 	~FSCounter();
 
-	FSCounter *getNext() const { return next; }
-	int getKey() const { return key; }
-	bool isEnabled() const { return enabled; }
-	void setEnabled(const bool enabled) { this->enabled = enabled; }
-	const char *getName() const { return name; }
+	const char *getPath() const { return mPath; }
+
 	int64_t read();
 
 private:
-	FSCounter *const next;
-	regex_t reg;
-	char *name;
-	const int key;
-	int enabled : 1,
-		useRegex : 1;
+	char *const mPath;
+	regex_t mReg;
+	bool mUseRegex;
 
 	// Intentionally unimplemented
 	FSCounter(const FSCounter &);
 	FSCounter &operator=(const FSCounter &);
 };
 
-FSCounter::FSCounter(FSCounter *next, char *name, const char *regex) : next(next), name(name), key(getEventKey()), enabled(false), useRegex(regex != NULL) {
-	if (useRegex) {
-		int result = regcomp(&reg, regex, REG_EXTENDED);
+FSCounter::FSCounter(DriverCounter *next, char *name, char *path, const char *regex) : DriverCounter(next, name), mPath(path), mUseRegex(regex != NULL) {
+	if (mUseRegex) {
+		int result = regcomp(&mReg, regex, REG_EXTENDED);
 		if (result != 0) {
 			char buf[128];
-			regerror(result, &reg, buf, sizeof(buf));
+			regerror(result, &mReg, buf, sizeof(buf));
 			logg->logError(__FILE__, __LINE__, "Invalid regex '%s': %s", regex, buf);
 			handleException();
 		}
@@ -58,18 +49,18 @@ FSCounter::FSCounter(FSCounter *next, char *name, const char *regex) : next(next
 }
 
 FSCounter::~FSCounter() {
-	free(name);
-	if (useRegex) {
-		regfree(&reg);
+	free(mPath);
+	if (mUseRegex) {
+		regfree(&mReg);
 	}
 }
 
 int64_t FSCounter::read() {
 	int64_t value;
-	if (useRegex) {
+	if (mUseRegex) {
 		char buf[4096];
 		size_t pos = 0;
-		const int fd = open(name, O_RDONLY);
+		const int fd = open(mPath, O_RDONLY | O_CLOEXEC);
 		if (fd < 0) {
 			goto fail;
 		}
@@ -86,53 +77,43 @@ int64_t FSCounter::read() {
 		buf[pos] = '\0';
 
 		regmatch_t match[2];
-		int result = regexec(&reg, buf, 2, match, 0);
+		int result = regexec(&mReg, buf, 2, match, 0);
 		if (result != 0) {
-			regerror(result, &reg, buf, sizeof(buf));
-			logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", name, buf);
+			regerror(result, &mReg, buf, sizeof(buf));
+			logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", mPath, buf);
 			handleException();
 		}
 
 		if (match[1].rm_so < 0) {
-			logg->logError(__FILE__, __LINE__, "Parsing %s failed", name);
+			logg->logError(__FILE__, __LINE__, "Parsing %s failed", mPath);
 			handleException();
 		}
-		char *endptr;
+
 		errno = 0;
-		value = strtoll(buf + match[1].rm_so, &endptr, 0);
+		value = strtoll(buf + match[1].rm_so, NULL, 0);
 		if (errno != 0) {
-			logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", name, strerror(errno));
+			logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", mPath, strerror(errno));
 			handleException();
 		}
 	} else {
-		if (DriverSource::readInt64Driver(name, &value) != 0) {
+		if (DriverSource::readInt64Driver(mPath, &value) != 0) {
 			goto fail;
 		}
 	}
 	return value;
 
  fail:
-	logg->logError(__FILE__, __LINE__, "Unable to read %s", name);
+	logg->logError(__FILE__, __LINE__, "Unable to read %s", mPath);
 	handleException();
 }
 
-FSDriver::FSDriver() : counters(NULL) {
+FSDriver::FSDriver() {
 }
 
 FSDriver::~FSDriver() {
-	while (counters != NULL) {
-		FSCounter * counter = counters;
-		counters = counter->getNext();
-		delete counter;
-	}
 }
 
-void FSDriver::setup(mxml_node_t *const xml) {
-	// fs driver does not currently work with perf
-	if (gSessionData->perf.isSetup()) {
-		return;
-	}
-
+void FSDriver::readEvents(mxml_node_t *const xml) {
 	mxml_node_t *node = xml;
 	while (true) {
 		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
@@ -140,56 +121,33 @@ void FSDriver::setup(mxml_node_t *const xml) {
 			break;
 		}
 		const char *counter = mxmlElementGetAttr(node, "counter");
-		if ((counter != NULL) && (counter[0] == '/')) {
-			const char *regex = mxmlElementGetAttr(node, "regex");
-			counters = new FSCounter(counters, strdup(counter), regex);
+		if (counter == NULL) {
+			continue;
 		}
-	}
-}
 
-FSCounter *FSDriver::findCounter(const Counter &counter) const {
-	for (FSCounter * fsCounter = counters; fsCounter != NULL; fsCounter = fsCounter->getNext()) {
-		if (strcmp(fsCounter->getName(), counter.getType()) == 0) {
-			return fsCounter;
+		if (counter[0] == '/') {
+			logg->logError(__FILE__, __LINE__, "Old style filesystem counter (%s) detected, please create a new unique counter value and move the filename into the path attribute, see events-Filesystem.xml for examples", counter);
+			handleException();
 		}
-	}
 
-	return NULL;
-}
-
-bool FSDriver::claimCounter(const Counter &counter) const {
-	return findCounter(counter) != NULL;
-}
-
-bool FSDriver::countersEnabled() const {
-	for (FSCounter *counter = counters; counter != NULL; counter = counter->getNext()) {
-		if (counter->isEnabled()) {
-			return true;
+		if (strncmp(counter, "filesystem_", 11) != 0) {
+			continue;
 		}
-	}
-	return false;
-}
 
-void FSDriver::resetCounters() {
-	for (FSCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
-		counter->setEnabled(false);
+		const char *path = mxmlElementGetAttr(node, "path");
+		if (path == NULL) {
+			logg->logError(__FILE__, __LINE__, "The filesystem counter %s is missing the required path attribute", counter);
+			handleException();
+		}
+		const char *regex = mxmlElementGetAttr(node, "regex");
+		setCounters(new FSCounter(getCounters(), strdup(counter), strdup(path), regex));
 	}
 }
 
-void FSDriver::setupCounter(Counter &counter) {
-	FSCounter *const fsCounter = findCounter(counter);
-	if (fsCounter == NULL) {
-		counter.setEnabled(false);
-		return;
-	}
-	fsCounter->setEnabled(true);
-	counter.setKey(fsCounter->getKey());
-}
-
 int FSDriver::writeCounters(mxml_node_t *root) const {
 	int count = 0;
-	for (FSCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
-		if (access(counter->getName(), R_OK) == 0) {
+	for (FSCounter *counter = static_cast<FSCounter *>(getCounters()); counter != NULL; counter = static_cast<FSCounter *>(counter->getNext())) {
+		if (access(counter->getPath(), R_OK) == 0) {
 			mxml_node_t *node = mxmlNewElement(root, "counter");
 			mxmlElementSetAttr(node, "name", counter->getName());
 			++count;
@@ -198,15 +156,3 @@ int FSDriver::writeCounters(mxml_node_t *root) const {
 
 	return count;
 }
-
-void FSDriver::start() {
-}
-
-void FSDriver::read(Buffer * const buffer) {
-	for (FSCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
-		if (!counter->isEnabled()) {
-			continue;
-		}
-		buffer->event(counter->getKey(), counter->read());
-	}
-}
diff --git a/tools/gator/daemon/FSDriver.h b/tools/gator/daemon/FSDriver.h
index ef3955362331..a7dc8b4df9dd 100644
--- a/tools/gator/daemon/FSDriver.h
+++ b/tools/gator/daemon/FSDriver.h
@@ -11,31 +11,16 @@
 
 #include "Driver.h"
 
-class Buffer;
-class FSCounter;
-
-class FSDriver : public Driver {
+class FSDriver : public PolledDriver {
 public:
 	FSDriver();
 	~FSDriver();
 
-	void setup(mxml_node_t *const xml);
-
-	bool claimCounter(const Counter &counter) const;
-	bool countersEnabled() const;
-	void resetCounters();
-	void setupCounter(Counter &counter);
+	void readEvents(mxml_node_t *const xml);
 
 	int writeCounters(mxml_node_t *root) const;
 
-	void start();
-	void read(Buffer * buffer);
-
 private:
-	FSCounter *findCounter(const Counter &counter) const;
-
-	FSCounter *counters;
-
 	// Intentionally unimplemented
 	FSDriver(const FSDriver &);
 	FSDriver &operator=(const FSDriver &);
diff --git a/tools/gator/daemon/Fifo.cpp b/tools/gator/daemon/Fifo.cpp
index f672e92a6807..41275fd287b8 100644
--- a/tools/gator/daemon/Fifo.cpp
+++ b/tools/gator/daemon/Fifo.cpp
@@ -9,9 +9,6 @@
 #include "Fifo.h"
 
 #include <stdlib.h>
-#ifdef WIN32
-#define valloc malloc
-#endif
 
 #include "Logging.h"
 
@@ -23,7 +20,7 @@ Fifo::Fifo(int singleBufferSize, int bufferSize, sem_t* readerSem) {
   mWrapThreshold = bufferSize;
   mSingleBufferSize = singleBufferSize;
   mReaderSem = readerSem;
-  mBuffer = (char*)valloc(bufferSize + singleBufferSize);
+  mBuffer = (char*)malloc(bufferSize + singleBufferSize);
   mEnd = false;
 
   if (mBuffer == NULL) {
diff --git a/tools/gator/daemon/Fifo.h b/tools/gator/daemon/Fifo.h
index bdda3f549b50..21c8d8580391 100644
--- a/tools/gator/daemon/Fifo.h
+++ b/tools/gator/daemon/Fifo.h
@@ -6,8 +6,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef	__FIFO_H__
-#define	__FIFO_H__
+#ifndef __FIFO_H__
+#define __FIFO_H__
 
 #ifdef WIN32
 #include <windows.h>
@@ -35,10 +35,10 @@ class Fifo {
 
 private:
   int mSingleBufferSize, mWrite, mRead, mReadCommit, mRaggedEnd, mWrapThreshold;
-  sem_t	mWaitForSpaceSem;
+  sem_t mWaitForSpaceSem;
   sem_t* mReaderSem;
-  char*	mBuffer;
-  bool	mEnd;
+  char* mBuffer;
+  bool mEnd;
 
   // Intentionally unimplemented
   Fifo(const Fifo &);
diff --git a/tools/gator/daemon/FtraceDriver.cpp b/tools/gator/daemon/FtraceDriver.cpp
new file mode 100644
index 000000000000..b156f1c0b8b4
--- /dev/null
+++ b/tools/gator/daemon/FtraceDriver.cpp
@@ -0,0 +1,118 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "FtraceDriver.h"
+
+#include <regex.h>
+
+#include "Logging.h"
+
+class FtraceCounter : public DriverCounter {
+public:
+	FtraceCounter(DriverCounter *next, char *name, const char *regex);
+	~FtraceCounter();
+
+	int read(const char *const line, int64_t *values);
+
+private:
+	regex_t reg;
+
+	// Intentionally unimplemented
+	FtraceCounter(const FtraceCounter &);
+	FtraceCounter &operator=(const FtraceCounter &);
+};
+
+FtraceCounter::FtraceCounter(DriverCounter *next, char *name, const char *regex) : DriverCounter(next, name) {
+	int result = regcomp(&reg, regex, REG_EXTENDED);
+	if (result != 0) {
+		char buf[128];
+		regerror(result, &reg, buf, sizeof(buf));
+		logg->logError(__FILE__, __LINE__, "Invalid regex '%s': %s", regex, buf);
+		handleException();
+	}
+}
+
+FtraceCounter::~FtraceCounter() {
+	regfree(&reg);
+}
+
+int FtraceCounter::read(const char *const line, int64_t *values) {
+	regmatch_t match[2];
+	int result = regexec(&reg, line, 2, match, 0);
+	if (result != 0) {
+		// No match
+		return 0;
+	}
+
+	if (match[1].rm_so < 0) {
+		logg->logError(__FILE__, __LINE__, "Parsing %s failed", getName());
+		handleException();
+	}
+
+	errno = 0;
+	int64_t value = strtoll(line + match[1].rm_so, NULL, 0);
+	if (errno != 0) {
+		logg->logError(__FILE__, __LINE__, "Parsing %s failed: %s", getName(), strerror(errno));
+		handleException();
+	}
+
+	values[0] = getKey();
+	values[1] = value;
+
+	return 1;
+}
+
+FtraceDriver::FtraceDriver() : mValues(NULL) {
+}
+
+FtraceDriver::~FtraceDriver() {
+	delete [] mValues;
+}
+
+void FtraceDriver::readEvents(mxml_node_t *const xml) {
+	mxml_node_t *node = xml;
+	int count = 0;
+	while (true) {
+		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
+		if (node == NULL) {
+			break;
+		}
+		const char *counter = mxmlElementGetAttr(node, "counter");
+		if (counter == NULL) {
+			continue;
+		}
+
+		if (strncmp(counter, "ftrace_", 7) != 0) {
+			continue;
+		}
+
+		const char *regex = mxmlElementGetAttr(node, "regex");
+		if (regex == NULL) {
+			logg->logError(__FILE__, __LINE__, "The regex counter %s is missing the required regex attribute", counter);
+			handleException();
+		}
+		setCounters(new FtraceCounter(getCounters(), strdup(counter), regex));
+		++count;
+	}
+
+	mValues = new int64_t[2*count];
+}
+
+int FtraceDriver::read(const char *line, int64_t **buf) {
+	int count = 0;
+
+	for (FtraceCounter *counter = static_cast<FtraceCounter *>(getCounters()); counter != NULL; counter = static_cast<FtraceCounter *>(counter->getNext())) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		count += counter->read(line, mValues + 2*count);
+	}
+
+	*buf = mValues;
+	return count;
+}
diff --git a/tools/gator/daemon/FtraceDriver.h b/tools/gator/daemon/FtraceDriver.h
new file mode 100644
index 000000000000..5f958bec672c
--- /dev/null
+++ b/tools/gator/daemon/FtraceDriver.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef FTRACEDRIVER_H
+#define FTRACEDRIVER_H
+
+#include "Driver.h"
+
+class FtraceDriver : public SimpleDriver {
+public:
+	FtraceDriver();
+	~FtraceDriver();
+
+	void readEvents(mxml_node_t *const xml);
+
+	int read(const char *line, int64_t **buf);
+
+private:
+	int64_t *mValues;
+
+	// Intentionally unimplemented
+	FtraceDriver(const FtraceDriver &);
+	FtraceDriver &operator=(const FtraceDriver &);
+};
+
+#endif // FTRACEDRIVER_H
diff --git a/tools/gator/daemon/FtraceSource.cpp b/tools/gator/daemon/FtraceSource.cpp
new file mode 100644
index 000000000000..521633357417
--- /dev/null
+++ b/tools/gator/daemon/FtraceSource.cpp
@@ -0,0 +1,158 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "FtraceSource.h"
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "DriverSource.h"
+#include "Logging.h"
+#include "SessionData.h"
+
+static void handler(int signum)
+{
+	(void)signum;
+};
+
+FtraceSource::FtraceSource(sem_t *senderSem) : mFtraceFh(NULL), mBuffer(0, FRAME_BLOCK_COUNTER, 128*1024, senderSem), mTid(-1), mTracingOn(0) {
+}
+
+FtraceSource::~FtraceSource() {
+}
+
+bool FtraceSource::prepare() {
+	{
+		struct sigaction act;
+		act.sa_handler = handler;
+		act.sa_flags = (int)SA_RESETHAND;
+		if (sigaction(SIGUSR1, &act, NULL) != 0) {
+			logg->logError(__FILE__, __LINE__, "sigaction failed: %s\n", strerror(errno));
+			handleException();
+		}
+	}
+
+	if (DriverSource::readIntDriver("/sys/kernel/debug/tracing/tracing_on", &mTracingOn)) {
+		logg->logError(__FILE__, __LINE__, "Unable to read if ftrace is enabled");
+		handleException();
+	}
+
+	if (DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", "0") != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to turn ftrace off before truncating the buffer");
+		handleException();
+	}
+
+	{
+		int fd;
+		fd = open("/sys/kernel/debug/tracing/trace", O_WRONLY | O_TRUNC | O_CLOEXEC, 0666);
+		if (fd < 0) {
+			logg->logError(__FILE__, __LINE__, "Unable truncate ftrace buffer: %s", strerror(errno));
+			handleException();
+		}
+		close(fd);
+	}
+
+	if (DriverSource::writeDriver("/sys/kernel/debug/tracing/trace_clock", "perf") != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to switch ftrace to the perf clock, please ensure you are running Linux 3.10 or later");
+		handleException();
+	}
+
+	mFtraceFh = fopen_cloexec("/sys/kernel/debug/tracing/trace_pipe", "rb");
+	if (mFtraceFh == NULL) {
+		logg->logError(__FILE__, __LINE__, "Unable to open trace_pipe");
+		handleException();
+	}
+
+	return true;
+}
+
+void FtraceSource::run() {
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-ftrace", 0, 0, 0);
+	mTid = syscall(__NR_gettid);
+
+	if (DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", "1") != 0) {
+		logg->logError(__FILE__, __LINE__, "Unable to turn ftrace on");
+		handleException();
+	}
+
+	while (gSessionData->mSessionIsActive) {
+		char buf[1<<12];
+
+		if (fgets(buf, sizeof(buf), mFtraceFh) == NULL) {
+			if (errno == EINTR) {
+				// Interrupted by interrupt - likely user request to terminate
+				break;
+			}
+			logg->logError(__FILE__, __LINE__, "Unable read trace data: %s", strerror(errno));
+			handleException();
+		}
+
+		const uint64_t currTime = getTime();
+
+		char *const colon = strstr(buf, ": ");
+		if (colon == NULL) {
+			logg->logError(__FILE__, __LINE__, "Unable find colon: %s", buf);
+			handleException();
+		}
+		*colon = '\0';
+
+		char *const space = strrchr(buf, ' ');
+		if (space == NULL) {
+			logg->logError(__FILE__, __LINE__, "Unable find space: %s", buf);
+			handleException();
+		}
+		*colon = ':';
+
+		int64_t *data = NULL;
+		int count = gSessionData->ftraceDriver.read(colon + 2, &data);
+		if (count > 0) {
+			errno = 0;
+			const long long time = strtod(space, NULL) * 1000000000;
+			if (errno != 0) {
+				logg->logError(__FILE__, __LINE__, "Unable to parse time: %s", strerror(errno));
+				handleException();
+			}
+			mBuffer.event64(-1, time);
+
+			for (int i = 0; i < count; ++i) {
+				mBuffer.event64(data[2*i + 0], data[2*i + 1]);
+			}
+
+			mBuffer.check(currTime);
+		}
+
+	}
+
+	mBuffer.setDone();
+
+	DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", mTracingOn);
+	fclose(mFtraceFh);
+	DriverSource::writeDriver("/sys/kernel/debug/tracing/trace_clock", "local");
+}
+
+void FtraceSource::interrupt() {
+	// Closing the underlying file handle does not result in the read on the ftrace file handle to return, so send a signal to the thread
+	syscall(__NR_tgkill, getpid(), mTid, SIGUSR1);
+}
+
+bool FtraceSource::isDone() {
+	return mBuffer.isDone();
+}
+
+void FtraceSource::write(Sender *sender) {
+	// Don't send ftrace data until the summary packet is sent so that monotonic delta is available
+	if (!gSessionData->mSentSummary) {
+		return;
+	}
+	if (!mBuffer.isDone()) {
+		mBuffer.write(sender);
+	}
+}
diff --git a/tools/gator/daemon/FtraceSource.h b/tools/gator/daemon/FtraceSource.h
new file mode 100644
index 000000000000..2391b881494e
--- /dev/null
+++ b/tools/gator/daemon/FtraceSource.h
@@ -0,0 +1,43 @@
+/**
+ * Copyright (C) ARM Limited 2010-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef FTRACESOURCE_H
+#define FTRACESOURCE_H
+
+#include <semaphore.h>
+#include <stdio.h>
+
+#include "Buffer.h"
+#include "Source.h"
+
+class FtraceSource : public Source {
+public:
+	FtraceSource(sem_t *senderSem);
+	~FtraceSource();
+
+	bool prepare();
+	void run();
+	void interrupt();
+
+	bool isDone();
+	void write(Sender *sender);
+
+private:
+	void waitFor(const int bytes);
+
+	FILE *mFtraceFh;
+	Buffer mBuffer;
+	int mTid;
+	int mTracingOn;
+
+	// Intentionally unimplemented
+	FtraceSource(const FtraceSource &);
+	FtraceSource &operator=(const FtraceSource &);
+};
+
+#endif // FTRACESOURCE_H
diff --git a/tools/gator/daemon/Hwmon.h b/tools/gator/daemon/Hwmon.h
deleted file mode 100644
index a22a3609f99f..000000000000
--- a/tools/gator/daemon/Hwmon.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Copyright (C) ARM Limited 2013-2014. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef	HWMON_H
-#define	HWMON_H
-
-#include "Driver.h"
-
-class Buffer;
-class HwmonCounter;
-
-class Hwmon : public Driver {
-public:
-	Hwmon();
-	~Hwmon();
-
-	void setup();
-
-	bool claimCounter(const Counter &counter) const;
-	bool countersEnabled() const;
-	void resetCounters();
-	void setupCounter(Counter &counter);
-
-	int writeCounters(mxml_node_t *root) const;
-	void writeEvents(mxml_node_t *root) const;
-
-	void start();
-	void read(Buffer * buffer);
-
-private:
-	HwmonCounter *findCounter(const Counter &counter) const;
-
-	HwmonCounter *counters;
-
-	// Intentionally unimplemented
-	Hwmon(const Hwmon &);
-	Hwmon &operator=(const Hwmon &);
-};
-
-#endif // HWMON_H
diff --git a/tools/gator/daemon/Hwmon.cpp b/tools/gator/daemon/HwmonDriver.cpp
similarity index 51%
rename from tools/gator/daemon/Hwmon.cpp
rename to tools/gator/daemon/HwmonDriver.cpp
index e44424743ef0..9d161ae5ac56 100644
--- a/tools/gator/daemon/Hwmon.cpp
+++ b/tools/gator/daemon/HwmonDriver.cpp
@@ -6,24 +6,33 @@
  * published by the Free Software Foundation.
  */
 
-#include "Hwmon.h"
+#include "HwmonDriver.h"
 
 #include "libsensors/sensors.h"
 
-#include "Buffer.h"
-#include "Counter.h"
 #include "Logging.h"
-#include "SessionData.h"
 
-class HwmonCounter {
+// feature->type to input map
+static sensors_subfeature_type getInput(const sensors_feature_type type) {
+	switch (type) {
+	case SENSORS_FEATURE_IN: return SENSORS_SUBFEATURE_IN_INPUT;
+	case SENSORS_FEATURE_FAN: return SENSORS_SUBFEATURE_FAN_INPUT;
+	case SENSORS_FEATURE_TEMP: return SENSORS_SUBFEATURE_TEMP_INPUT;
+	case SENSORS_FEATURE_POWER: return SENSORS_SUBFEATURE_POWER_INPUT;
+	case SENSORS_FEATURE_ENERGY: return SENSORS_SUBFEATURE_ENERGY_INPUT;
+	case SENSORS_FEATURE_CURR: return SENSORS_SUBFEATURE_CURR_INPUT;
+	case SENSORS_FEATURE_HUMIDITY: return SENSORS_SUBFEATURE_HUMIDITY_INPUT;
+	default:
+		logg->logError(__FILE__, __LINE__, "Unsupported hwmon feature %i", type);
+		handleException();
+	}
+};
+
+class HwmonCounter : public DriverCounter {
 public:
-	HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, const sensors_feature *feature);
+	HwmonCounter(DriverCounter *next, char *const name, const sensors_chip_name *chip, const sensors_feature *feature);
 	~HwmonCounter();
 
-	HwmonCounter *getNext() const { return next; }
-	int getKey() const { return key; }
-	bool isEnabled() const { return enabled; }
-	const char *getName() const { return name; }
 	const char *getLabel() const { return label; }
 	const char *getTitle() const { return title; }
 	bool isDuplicate() const { return duplicate; }
@@ -32,63 +41,34 @@ public:
 	const char *getUnit() const { return unit; }
 	int getModifier() const { return modifier; }
 
-	void setEnabled(const bool enabled) {
-		this->enabled = enabled;
-		// canRead will clear enabled if the counter is not readable
-		canRead();
-	}
-
-	double read();
-	bool canRead();
+	int64_t read();
 
 private:
 	void init(const sensors_chip_name *chip, const sensors_feature *feature);
 
-	HwmonCounter *const next;
-	const int key;
-	int polled : 1,
-		readable : 1,
-		enabled : 1,
-		monotonic: 1,
-		duplicate : 1;
-
 	const sensors_chip_name *chip;
 	const sensors_feature *feature;
-
-	char *name;
 	char *label;
 	const char *title;
 	const char *display;
 	const char *counter_class;
 	const char *unit;
-	int modifier;
 	double previous_value;
-
-	sensors_subfeature_type input;
+	int modifier;
+	int monotonic: 1,
+		duplicate : 1;
 
 	// Intentionally unimplemented
 	HwmonCounter(const HwmonCounter &);
 	HwmonCounter &operator=(const HwmonCounter &);
 };
 
-HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, const sensors_feature *feature) : next(next), key(getEventKey()), polled(false), readable(false), enabled(false), duplicate(false), chip(chip), feature(feature) {
-
-	int len = sensors_snprintf_chip_name(NULL, 0, chip) + 1;
-	char *chip_name = new char[len];
-	sensors_snprintf_chip_name(chip_name, len, chip);
-
-	len = snprintf(NULL, 0, "hwmon_%s_%d", chip_name, feature->number) + 1;
-	name = new char[len];
-	snprintf(name, len, "hwmon_%s_%d", chip_name, feature->number);
-
-	delete [] chip_name;
-
+HwmonCounter::HwmonCounter(DriverCounter *next, char *const name, const sensors_chip_name *chip, const sensors_feature *feature) : DriverCounter(next, name), chip(chip), feature(feature), duplicate(false) {
 	label = sensors_get_label(chip, feature);
 
 	switch (feature->type) {
 	case SENSORS_FEATURE_IN:
 		title = "Voltage";
-		input = SENSORS_SUBFEATURE_IN_INPUT;
 		display = "maximum";
 		counter_class = "absolute";
 		unit = "V";
@@ -97,7 +77,6 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		break;
 	case SENSORS_FEATURE_FAN:
 		title = "Fan";
-		input = SENSORS_SUBFEATURE_FAN_INPUT;
 		display = "average";
 		counter_class = "absolute";
 		unit = "RPM";
@@ -106,7 +85,6 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		break;
 	case SENSORS_FEATURE_TEMP:
 		title = "Temperature";
-		input = SENSORS_SUBFEATURE_TEMP_INPUT;
 		display = "maximum";
 		counter_class = "absolute";
 		unit = "°C";
@@ -115,7 +93,6 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		break;
 	case SENSORS_FEATURE_POWER:
 		title = "Power";
-		input = SENSORS_SUBFEATURE_POWER_INPUT;
 		display = "maximum";
 		counter_class = "absolute";
 		unit = "W";
@@ -124,7 +101,6 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		break;
 	case SENSORS_FEATURE_ENERGY:
 		title = "Energy";
-		input = SENSORS_SUBFEATURE_ENERGY_INPUT;
 		display = "accumulate";
 		counter_class = "delta";
 		unit = "J";
@@ -133,7 +109,6 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		break;
 	case SENSORS_FEATURE_CURR:
 		title = "Current";
-		input = SENSORS_SUBFEATURE_CURR_INPUT;
 		display = "maximum";
 		counter_class = "absolute";
 		unit = "A";
@@ -142,7 +117,6 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		break;
 	case SENSORS_FEATURE_HUMIDITY:
 		title = "Humidity";
-		input = SENSORS_SUBFEATURE_HUMIDITY_INPUT;
 		display = "average";
 		counter_class = "absolute";
 		unit = "%";
@@ -154,7 +128,7 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 		handleException();
 	}
 
-	for (HwmonCounter * counter = next; counter != NULL; counter = counter->getNext()) {
+	for (HwmonCounter * counter = static_cast<HwmonCounter *>(next); counter != NULL; counter = static_cast<HwmonCounter *>(counter->getNext())) {
 		if (strcmp(label, counter->getLabel()) == 0 && strcmp(title, counter->getTitle()) == 0) {
 			duplicate = true;
 			counter->duplicate = true;
@@ -165,16 +139,15 @@ HwmonCounter::HwmonCounter(HwmonCounter *next, const sensors_chip_name *chip, co
 
 HwmonCounter::~HwmonCounter() {
 	free((void *)label);
-	delete [] name;
 }
 
-double HwmonCounter::read() {
+int64_t HwmonCounter::read() {
 	double value;
 	double result;
 	const sensors_subfeature *subfeature;
 
-	// Keep in sync with canRead
-	subfeature = sensors_get_subfeature(chip, feature, input);
+	// Keep in sync with the read check in HwmonDriver::readEvents
+	subfeature = sensors_get_subfeature(chip, feature, getInput(feature->type));
 	if (!subfeature) {
 		logg->logError(__FILE__, __LINE__, "No input value for hwmon sensor %s", label);
 		handleException();
@@ -191,46 +164,14 @@ double HwmonCounter::read() {
 	return result;
 }
 
-bool HwmonCounter::canRead() {
-	if (!polled) {
-		double value;
-		const sensors_subfeature *subfeature;
-		bool result = true;
-
-		subfeature = sensors_get_subfeature(chip, feature, input);
-		if (!subfeature) {
-			result = false;
-		} else {
-			result = sensors_get_value(chip, subfeature->number, &value) == 0;
-		}
-
-		polled = true;
-		readable = result;
-	}
-
-	enabled &= readable;
-
-	return readable;
+HwmonDriver::HwmonDriver() {
 }
 
-Hwmon::Hwmon() : counters(NULL) {
-}
-
-Hwmon::~Hwmon() {
-	while (counters != NULL) {
-		HwmonCounter * counter = counters;
-		counters = counter->getNext();
-		delete counter;
-	}
+HwmonDriver::~HwmonDriver() {
 	sensors_cleanup();
 }
 
-void Hwmon::setup() {
-	// hwmon does not currently work with perf
-	if (gSessionData->perf.isSetup()) {
-		return;
-	}
-
+void HwmonDriver::readEvents(mxml_node_t *const) {
 	int err = sensors_init(NULL);
 	if (err) {
 		logg->logMessage("Failed to initialize libsensors! (%d)", err);
@@ -244,73 +185,34 @@ void Hwmon::setup() {
 		int feature_nr = 0;
 		const sensors_feature *feature;
 		while ((feature = sensors_get_features(chip, &feature_nr))) {
-			counters = new HwmonCounter(counters, chip, feature);
+			// Keep in sync with HwmonCounter::read
+			// Can this counter be read?
+			double value;
+			const sensors_subfeature *const subfeature = sensors_get_subfeature(chip, feature, getInput(feature->type));
+			if ((subfeature == NULL) || (sensors_get_value(chip, subfeature->number, &value) != 0)) {
+				continue;
+			}
+
+			// Get the name of the counter
+			int len = sensors_snprintf_chip_name(NULL, 0, chip) + 1;
+			char *chip_name = new char[len];
+			sensors_snprintf_chip_name(chip_name, len, chip);
+			len = snprintf(NULL, 0, "hwmon_%s_%d_%d", chip_name, chip_nr, feature->number) + 1;
+			char *const name = new char[len];
+			snprintf(name, len, "hwmon_%s_%d_%d", chip_name, chip_nr, feature->number);
+			delete [] chip_name;
+
+			setCounters(new HwmonCounter(getCounters(), name, chip, feature));
 		}
 	}
 }
 
-HwmonCounter *Hwmon::findCounter(const Counter &counter) const {
-	for (HwmonCounter * hwmonCounter = counters; hwmonCounter != NULL; hwmonCounter = hwmonCounter->getNext()) {
-		if (hwmonCounter->canRead() && strcmp(hwmonCounter->getName(), counter.getType()) == 0) {
-			return hwmonCounter;
-		}
-	}
-
-	return NULL;
-}
-
-bool Hwmon::claimCounter(const Counter &counter) const {
-	return findCounter(counter) != NULL;
-}
-
-bool Hwmon::countersEnabled() const {
-	for (HwmonCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
-		if (counter->isEnabled()) {
-			return true;
-		}
-	}
-	return false;
-}
-
-void Hwmon::resetCounters() {
-	for (HwmonCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
-		counter->setEnabled(false);
-	}
-}
-
-void Hwmon::setupCounter(Counter &counter) {
-	HwmonCounter *const hwmonCounter = findCounter(counter);
-	if (hwmonCounter == NULL) {
-		counter.setEnabled(false);
-		return;
-	}
-	hwmonCounter->setEnabled(true);
-	counter.setKey(hwmonCounter->getKey());
-}
-
-int Hwmon::writeCounters(mxml_node_t *root) const {
-	int count = 0;
-	for (HwmonCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
-		if (!counter->canRead()) {
-			continue;
-		}
-		mxml_node_t *node = mxmlNewElement(root, "counter");
-		mxmlElementSetAttr(node, "name", counter->getName());
-		++count;
-	}
-
-	return count;
-}
-
-void Hwmon::writeEvents(mxml_node_t *root) const {
+void HwmonDriver::writeEvents(mxml_node_t *root) const {
 	root = mxmlNewElement(root, "category");
 	mxmlElementSetAttr(root, "name", "hwmon");
 
 	char buf[1024];
-	for (HwmonCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
-		if (!counter->canRead()) {
-			continue;
-		}
+	for (HwmonCounter *counter = static_cast<HwmonCounter *>(getCounters()); counter != NULL; counter = static_cast<HwmonCounter *>(counter->getNext())) {
 		mxml_node_t *node = mxmlNewElement(root, "event");
 		mxmlElementSetAttr(node, "counter", counter->getName());
 		mxmlElementSetAttr(node, "title", counter->getTitle());
@@ -333,20 +235,11 @@ void Hwmon::writeEvents(mxml_node_t *root) const {
 	}
 }
 
-void Hwmon::start() {
-	for (HwmonCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
+void HwmonDriver::start() {
+	for (DriverCounter *counter = getCounters(); counter != NULL; counter = counter->getNext()) {
 		if (!counter->isEnabled()) {
 			continue;
 		}
 		counter->read();
 	}
 }
-
-void Hwmon::read(Buffer * const buffer) {
-	for (HwmonCounter * counter = counters; counter != NULL; counter = counter->getNext()) {
-		if (!counter->isEnabled()) {
-			continue;
-		}
-		buffer->event(counter->getKey(), counter->read());
-	}
-}
diff --git a/tools/gator/daemon/HwmonDriver.h b/tools/gator/daemon/HwmonDriver.h
new file mode 100644
index 000000000000..f28d825e3b7b
--- /dev/null
+++ b/tools/gator/daemon/HwmonDriver.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef HWMONDRIVER_H
+#define HWMONDRIVER_H
+
+#include "Driver.h"
+
+class HwmonDriver : public PolledDriver {
+public:
+	HwmonDriver();
+	~HwmonDriver();
+
+	void readEvents(mxml_node_t *const root);
+
+	void writeEvents(mxml_node_t *root) const;
+
+	void start();
+
+private:
+	// Intentionally unimplemented
+	HwmonDriver(const HwmonDriver &);
+	HwmonDriver &operator=(const HwmonDriver &);
+};
+
+#endif // HWMONDRIVER_H
diff --git a/tools/gator/daemon/KMod.cpp b/tools/gator/daemon/KMod.cpp
index 73e123d2f14e..fe9dc6a7e4f7 100644
--- a/tools/gator/daemon/KMod.cpp
+++ b/tools/gator/daemon/KMod.cpp
@@ -16,6 +16,7 @@
 #include "Counter.h"
 #include "DriverSource.h"
 #include "Logging.h"
+#include "SessionData.h"
 
 // Claim all the counters in /dev/gator/events
 bool KMod::claimCounter(const Counter &counter) const {
@@ -46,11 +47,19 @@ void KMod::resetCounters() {
 	}
 }
 
+static const char ARM_MALI_MIDGARD[] = "ARM_Mali-Midgard_";
+static const char ARM_MALI_T[] = "ARM_Mali-T";
+
 void KMod::setupCounter(Counter &counter) {
 	char base[128];
 	char text[128];
 	snprintf(base, sizeof(base), "/dev/gator/events/%s", counter.getType());
 
+	if ((strncmp(counter.getType(), ARM_MALI_MIDGARD, sizeof(ARM_MALI_MIDGARD) - 1) == 0 ||
+	     strncmp(counter.getType(), ARM_MALI_T, sizeof(ARM_MALI_T) - 1) == 0)) {
+		mIsMaliCapture = true;
+	}
+
 	snprintf(text, sizeof(text), "%s/enabled", base);
 	int enabled = true;
 	if (DriverSource::writeReadDriver(text, &enabled) || !enabled) {
diff --git a/tools/gator/daemon/KMod.h b/tools/gator/daemon/KMod.h
index fb7fc8a8f9c6..900a60e87d24 100644
--- a/tools/gator/daemon/KMod.h
+++ b/tools/gator/daemon/KMod.h
@@ -14,7 +14,7 @@
 // Driver for the gator kernel module
 class KMod : public Driver {
 public:
-	KMod() {}
+	KMod() : mIsMaliCapture(false) {}
 	~KMod() {}
 
 	bool claimCounter(const Counter &counter) const;
@@ -22,6 +22,11 @@ class KMod : public Driver {
 	void setupCounter(Counter &counter);
 
 	int writeCounters(mxml_node_t *root) const;
+
+	bool isMaliCapture() const { return mIsMaliCapture; }
+
+private:
+	bool mIsMaliCapture;
 };
 
 #endif // KMOD_H
diff --git a/tools/gator/daemon/LocalCapture.h b/tools/gator/daemon/LocalCapture.h
index b1e7219795cf..25d281f8328b 100644
--- a/tools/gator/daemon/LocalCapture.h
+++ b/tools/gator/daemon/LocalCapture.h
@@ -6,8 +6,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef	__LOCAL_CAPTURE_H__
-#define	__LOCAL_CAPTURE_H__
+#ifndef __LOCAL_CAPTURE_H__
+#define __LOCAL_CAPTURE_H__
 
 struct ImageLinkList;
 
diff --git a/tools/gator/daemon/Logging.cpp b/tools/gator/daemon/Logging.cpp
index b8d3178950d6..41ffa1a45151 100644
--- a/tools/gator/daemon/Logging.cpp
+++ b/tools/gator/daemon/Logging.cpp
@@ -14,15 +14,15 @@
 #include <string.h>
 
 #ifdef WIN32
-#define MUTEX_INIT()	mLoggingMutex = CreateMutex(NULL, false, NULL);
-#define MUTEX_LOCK()	WaitForSingleObject(mLoggingMutex, 0xFFFFFFFF);
-#define MUTEX_UNLOCK()	ReleaseMutex(mLoggingMutex);
-#define snprintf		_snprintf
+#define MUTEX_INIT()    mLoggingMutex = CreateMutex(NULL, false, NULL);
+#define MUTEX_LOCK()    WaitForSingleObject(mLoggingMutex, 0xFFFFFFFF);
+#define MUTEX_UNLOCK()  ReleaseMutex(mLoggingMutex);
+#define snprintf _snprintf
 #else
 #include <pthread.h>
-#define MUTEX_INIT()	pthread_mutex_init(&mLoggingMutex, NULL)
-#define MUTEX_LOCK()	pthread_mutex_lock(&mLoggingMutex)
-#define MUTEX_UNLOCK()	pthread_mutex_unlock(&mLoggingMutex)
+#define MUTEX_INIT()    pthread_mutex_init(&mLoggingMutex, NULL)
+#define MUTEX_LOCK()    pthread_mutex_lock(&mLoggingMutex)
+#define MUTEX_UNLOCK()  pthread_mutex_unlock(&mLoggingMutex)
 #endif
 
 // Global thread-safe logging
@@ -40,7 +40,7 @@ Logging::~Logging() {
 }
 
 void Logging::logError(const char* file, int line, const char* fmt, ...) {
-	va_list	args;
+	va_list args;
 
 	MUTEX_LOCK();
 	if (mDebug) {
@@ -61,7 +61,7 @@ void Logging::logError(const char* file, int line, const char* fmt, ...) {
 
 void Logging::logMessage(const char* fmt, ...) {
 	if (mDebug) {
-		va_list	args;
+		va_list args;
 
 		MUTEX_LOCK();
 		strcpy(mLogBuf, "INFO: ");
diff --git a/tools/gator/daemon/Logging.h b/tools/gator/daemon/Logging.h
index 4934bb079754..09e93ff13f7a 100644
--- a/tools/gator/daemon/Logging.h
+++ b/tools/gator/daemon/Logging.h
@@ -6,8 +6,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef	__LOGGING_H__
-#define	__LOGGING_H__
+#ifndef __LOGGING_H__
+#define __LOGGING_H__
 
 #include <pthread.h>
 
@@ -23,10 +23,10 @@ class Logging {
 	char* getLastMessage() {return mLogBuf;}
 
 private:
-	char	mErrBuf[4096]; // Arbitrarily large buffer to hold a string
-	char	mLogBuf[4096]; // Arbitrarily large buffer to hold a string
-	bool	mDebug;
-	pthread_mutex_t	mLoggingMutex;
+	char mErrBuf[4096]; // Arbitrarily large buffer to hold a string
+	char mLogBuf[4096]; // Arbitrarily large buffer to hold a string
+	bool mDebug;
+	pthread_mutex_t mLoggingMutex;
 };
 
 extern Logging* logg;
diff --git a/tools/gator/daemon/Makefile b/tools/gator/daemon/Makefile
index 2ed49fdb688b..27531b438b63 100644
--- a/tools/gator/daemon/Makefile
+++ b/tools/gator/daemon/Makefile
@@ -11,11 +11,8 @@
 CC = $(CROSS_COMPILE)gcc
 CXX = $(CROSS_COMPILE)g++
 
-# -mthumb-interwork is required for interworking to ARM or Thumb stdlibc
-CPPFLAGS += -mthumb-interwork
-
 ifeq ($(SOFTFLOAT),1)
-	CPPFLAGS += -marm -march=armv4t -mfloat-abi=soft
+	CPPFLAGS += -marm -mthumb-interwork -march=armv4t -mfloat-abi=soft
 	LDFLAGS += -marm -march=armv4t -mfloat-abi=soft
 endif
 ifneq ($(SYSROOT),)
diff --git a/tools/gator/daemon/MaliVideoDriver.cpp b/tools/gator/daemon/MaliVideoDriver.cpp
index 18b413b01a37..5eef2643ab15 100644
--- a/tools/gator/daemon/MaliVideoDriver.cpp
+++ b/tools/gator/daemon/MaliVideoDriver.cpp
@@ -34,51 +34,30 @@ static const char COUNTER[] = "ARM_Mali-V500_cnt";
 static const char EVENT[] = "ARM_Mali-V500_evn";
 static const char ACTIVITY[] = "ARM_Mali-V500_act";
 
-class MaliVideoCounter {
+class MaliVideoCounter : public DriverCounter {
 public:
-	MaliVideoCounter(MaliVideoCounter *next, const char *name, const MaliVideoCounterType type, const int id) : mNext(next), mName(name), mType(type), mId(id), mKey(getEventKey()), mEnabled(false) {
+	MaliVideoCounter(DriverCounter *next, const char *name, const MaliVideoCounterType type, const int id) : DriverCounter(next, name), mType(type), mId(id) {
 	}
 
 	~MaliVideoCounter() {
-		delete mName;
 	}
 
-	MaliVideoCounter *getNext() const { return mNext; }
-	const char *getName() const { return mName; }
 	MaliVideoCounterType getType() const { return mType; }
 	int getId() const { return mId; }
-	int getKey() const { return mKey; }
-	bool isEnabled() const { return mEnabled; }
-	void setEnabled(const bool enabled) { mEnabled = enabled; }
 
 private:
-	MaliVideoCounter *const mNext;
-	const char *const mName;
 	const MaliVideoCounterType mType;
 	// Mali Video id
 	const int mId;
-	// Streamline key
-	const int mKey;
-	bool mEnabled;
 };
 
-MaliVideoDriver::MaliVideoDriver() : mCounters(NULL), mActivityCount(0) {
+MaliVideoDriver::MaliVideoDriver() {
 }
 
 MaliVideoDriver::~MaliVideoDriver() {
-	while (mCounters != NULL) {
-		MaliVideoCounter *counter = mCounters;
-		mCounters = counter->getNext();
-		delete counter;
-	}
 }
 
-void MaliVideoDriver::setup(mxml_node_t *const xml) {
-	// hwmon does not currently work with perf
-	if (gSessionData->perf.isSetup()) {
-		return;
-	}
-
+void MaliVideoDriver::readEvents(mxml_node_t *const xml) {
 	mxml_node_t *node = xml;
 	while (true) {
 		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
@@ -90,95 +69,55 @@ void MaliVideoDriver::setup(mxml_node_t *const xml) {
 			// Ignore
 		} else if (strncmp(counter, COUNTER, sizeof(COUNTER) - 1) == 0) {
 			const int i = strtol(counter + sizeof(COUNTER) - 1, NULL, 10);
-			mCounters = new MaliVideoCounter(mCounters, strdup(counter), MVCT_COUNTER, i);
+			setCounters(new MaliVideoCounter(getCounters(), strdup(counter), MVCT_COUNTER, i));
 		} else if (strncmp(counter, EVENT, sizeof(EVENT) - 1) == 0) {
 			const int i = strtol(counter + sizeof(EVENT) - 1, NULL, 10);
-			mCounters = new MaliVideoCounter(mCounters, strdup(counter), MVCT_EVENT, i);
-		} else if (strcmp(counter, ACTIVITY) == 0) {
-			mCounters = new MaliVideoCounter(mCounters, strdup(ACTIVITY), MVCT_ACTIVITY, 0);
-			mActivityCount = 0;
-			while (true) {
-				char buf[32];
-				snprintf(buf, sizeof(buf), "activity%i", mActivityCount + 1);
-				if (mxmlElementGetAttr(node, buf) == NULL) {
-					break;
-				}
-				++mActivityCount;
-			}
+			setCounters(new MaliVideoCounter(getCounters(), strdup(counter), MVCT_EVENT, i));
+		} else if (strncmp(counter, ACTIVITY, sizeof(ACTIVITY) - 1) == 0) {
+			const int i = strtol(counter + sizeof(ACTIVITY) - 1, NULL, 10);
+			setCounters(new MaliVideoCounter(getCounters(), strdup(counter), MVCT_ACTIVITY, i));
 		}
 	}
 }
 
-MaliVideoCounter *MaliVideoDriver::findCounter(const Counter &counter) const {
-	for (MaliVideoCounter *maliVideoCounter = mCounters; maliVideoCounter != NULL; maliVideoCounter = maliVideoCounter->getNext()) {
-		if (strcmp(maliVideoCounter->getName(), counter.getType()) == 0) {
-			return maliVideoCounter;
-		}
-	}
-
-	return NULL;
-}
-
-bool MaliVideoDriver::claimCounter(const Counter &counter) const {
-	return findCounter(counter) != NULL;
-}
-
-bool MaliVideoDriver::countersEnabled() const {
-	for (MaliVideoCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
-		if (counter->isEnabled()) {
-			return true;
-		}
-	}
-	return false;
-}
-
-void MaliVideoDriver::resetCounters() {
-	for (MaliVideoCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
-		counter->setEnabled(false);
-	}
-}
-
-void MaliVideoDriver::setupCounter(Counter &counter) {
-	MaliVideoCounter *const maliVideoCounter = findCounter(counter);
-	if (maliVideoCounter == NULL) {
-		counter.setEnabled(false);
-		return;
-	}
-	maliVideoCounter->setEnabled(true);
-	counter.setKey(maliVideoCounter->getKey());
-}
-
 int MaliVideoDriver::writeCounters(mxml_node_t *root) const {
 	if (access("/dev/mv500", F_OK) != 0) {
 		return 0;
 	}
 
-	int count = 0;
-	for (MaliVideoCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
-		mxml_node_t *node = mxmlNewElement(root, "counter");
-		mxmlElementSetAttr(node, "name", counter->getName());
-		++count;
-	}
-
-	return count;
+	return super::writeCounters(root);
 }
 
 void MaliVideoDriver::marshalEnable(const MaliVideoCounterType type, char *const buf, const size_t bufsize, int &pos) {
 	// size
 	int numEnabled = 0;
-	for (MaliVideoCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
+	for (MaliVideoCounter *counter = static_cast<MaliVideoCounter *>(getCounters()); counter != NULL; counter = static_cast<MaliVideoCounter *>(counter->getNext())) {
 		if (counter->isEnabled() && (counter->getType() == type)) {
 			++numEnabled;
 		}
 	}
 	Buffer::packInt(buf, bufsize, pos, numEnabled*sizeof(uint32_t));
-	for (MaliVideoCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
+	for (MaliVideoCounter *counter = static_cast<MaliVideoCounter *>(getCounters()); counter != NULL; counter = static_cast<MaliVideoCounter *>(counter->getNext())) {
 		if (counter->isEnabled() && (counter->getType() == type)) {
 			Buffer::packInt(buf, bufsize, pos, counter->getId());
 		}
 	}
 }
 
+static bool writeAll(const int mveUds, const char *const buf, const int pos) {
+	int written = 0;
+	while (written < pos) {
+		size_t bytes = ::write(mveUds, buf + written, pos - written);
+		if (bytes <= 0) {
+			logg->logMessage("%s(%s:%i): write failed", __FUNCTION__, __FILE__, __LINE__);
+			return false;
+		}
+		written += bytes;
+	}
+
+	return true;
+}
+
 bool MaliVideoDriver::start(const int mveUds) {
 	char buf[256];
 	int pos = 0;
@@ -225,29 +164,28 @@ bool MaliVideoDriver::start(const int mveUds) {
 	buf[pos++] = 'e';
 	marshalEnable(MVCT_EVENT, buf, sizeof(buf), pos);
 
-	/*
 	// code - MVE_INSTR_ENABLE_ACTIVITIES
 	buf[pos++] = 'C';
 	buf[pos++] = 'F';
 	buf[pos++] = 'G';
 	buf[pos++] = 'a';
-	// size
-	Buffer::packInt(buf, sizeof(buf), pos, mActivityCount*sizeof(uint32_t));
-	for (int i = 0; i < mActivityCount; ++i) {
-		// activity_id
-		Buffer::packInt(buf, sizeof(buf), pos, i);
-	}
-	*/
+	marshalEnable(MVCT_ACTIVITY, buf, sizeof(buf), pos);
 
-	int written = 0;
-	while (written < pos) {
-		size_t bytes = ::write(mveUds, buf + written, pos - written);
-		if (bytes <= 0) {
-			logg->logMessage("%s(%s:%i): write failed", __FUNCTION__, __FILE__, __LINE__);
-			return false;
-		}
-		written += bytes;
-	}
-
-	return true;
+	return writeAll(mveUds, buf, pos);
+}
+
+void MaliVideoDriver::stop(const int mveUds) {
+	char buf[8];
+	int pos = 0;
+
+	// code - MVE_INSTR_STOP
+	buf[pos++] = 'S';
+	buf[pos++] = 'T';
+	buf[pos++] = 'O';
+	buf[pos++] = 'P';
+	marshalEnable(MVCT_COUNTER, buf, sizeof(buf), pos);
+
+	writeAll(mveUds, buf, pos);
+
+	close(mveUds);
 }
diff --git a/tools/gator/daemon/MaliVideoDriver.h b/tools/gator/daemon/MaliVideoDriver.h
index 00cb80889a74..204a57a447ac 100644
--- a/tools/gator/daemon/MaliVideoDriver.h
+++ b/tools/gator/daemon/MaliVideoDriver.h
@@ -19,29 +19,24 @@ enum MaliVideoCounterType {
 	MVCT_ACTIVITY,
 };
 
-class MaliVideoDriver : public Driver {
+class MaliVideoDriver : public SimpleDriver {
+private:
+	typedef SimpleDriver super;
+
 public:
 	MaliVideoDriver();
 	~MaliVideoDriver();
 
-	void setup(mxml_node_t *const xml);
-
-	bool claimCounter(const Counter &counter) const;
-	bool countersEnabled() const;
-	void resetCounters();
-	void setupCounter(Counter &counter);
+	void readEvents(mxml_node_t *const root);
 
 	int writeCounters(mxml_node_t *root) const;
 
 	bool start(const int mveUds);
+	void stop(const int mveUds);
 
 private:
-	MaliVideoCounter *findCounter(const Counter &counter) const;
 	void marshalEnable(const MaliVideoCounterType type, char *const buf, const size_t bufsize, int &pos);
 
-	MaliVideoCounter *mCounters;
-	int mActivityCount;
-
 	// Intentionally unimplemented
 	MaliVideoDriver(const MaliVideoDriver &);
 	MaliVideoDriver &operator=(const MaliVideoDriver &);
diff --git a/tools/gator/daemon/MemInfoDriver.cpp b/tools/gator/daemon/MemInfoDriver.cpp
new file mode 100644
index 000000000000..cce15c16fcdc
--- /dev/null
+++ b/tools/gator/daemon/MemInfoDriver.cpp
@@ -0,0 +1,93 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "MemInfoDriver.h"
+
+#include "Logging.h"
+#include "SessionData.h"
+
+class MemInfoCounter : public DriverCounter {
+public:
+	MemInfoCounter(DriverCounter *next, char *const name, int64_t *const value);
+	~MemInfoCounter();
+
+	int64_t read();
+
+private:
+	int64_t *const mValue;
+
+	// Intentionally unimplemented
+	MemInfoCounter(const MemInfoCounter &);
+	MemInfoCounter &operator=(const MemInfoCounter &);
+};
+
+MemInfoCounter::MemInfoCounter(DriverCounter *next, char *const name, int64_t *const value) : DriverCounter(next, name), mValue(value) {
+}
+
+MemInfoCounter::~MemInfoCounter() {
+}
+
+int64_t MemInfoCounter::read() {
+	return *mValue;
+}
+
+MemInfoDriver::MemInfoDriver() : mBuf(), mMemUsed(0), mMemFree(0), mBuffers(0) {
+}
+
+MemInfoDriver::~MemInfoDriver() {
+}
+
+void MemInfoDriver::readEvents(mxml_node_t *const) {
+	// Only for use with perf
+	if (!gSessionData->perf.isSetup()) {
+		return;
+	}
+
+	setCounters(new MemInfoCounter(getCounters(), strdup("Linux_meminfo_memused2"), &mMemUsed));
+	setCounters(new MemInfoCounter(getCounters(), strdup("Linux_meminfo_memfree"), &mMemFree));
+	setCounters(new MemInfoCounter(getCounters(), strdup("Linux_meminfo_bufferram"), &mBuffers));
+}
+
+void MemInfoDriver::read(Buffer *const buffer) {
+	if (!countersEnabled()) {
+		return;
+	}
+
+	if (!mBuf.read("/proc/meminfo")) {
+		logg->logError(__FILE__, __LINE__, "Failed to read /proc/meminfo");
+		handleException();
+	}
+
+	char *key = mBuf.getBuf();
+	char *colon;
+	int64_t memTotal = 0;
+	while ((colon = strchr(key, ':')) != NULL) {
+		char *end = strchr(colon + 1, '\n');
+		if (end != NULL) {
+			*end = '\0';
+		}
+		*colon = '\0';
+
+		if (strcmp(key, "MemTotal") == 0) {
+			memTotal = strtoll(colon + 1, NULL, 10) << 10;
+		} else if (strcmp(key, "MemFree") == 0) {
+			mMemFree = strtoll(colon + 1, NULL, 10) << 10;
+		} else if (strcmp(key, "Buffers") == 0) {
+			mBuffers = strtoll(colon + 1, NULL, 10) << 10;
+		}
+
+		if (end == NULL) {
+			break;
+		}
+		key = end + 1;
+	}
+
+	mMemUsed = memTotal - mMemFree;
+
+	super::read(buffer);
+}
diff --git a/tools/gator/daemon/MemInfoDriver.h b/tools/gator/daemon/MemInfoDriver.h
new file mode 100644
index 000000000000..eb1b0417f309
--- /dev/null
+++ b/tools/gator/daemon/MemInfoDriver.h
@@ -0,0 +1,37 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MEMINFODRIVER_H
+#define MEMINFODRIVER_H
+
+#include "Driver.h"
+#include "DynBuf.h"
+
+class MemInfoDriver : public PolledDriver {
+private:
+	typedef PolledDriver super;
+
+public:
+	MemInfoDriver();
+	~MemInfoDriver();
+
+	void readEvents(mxml_node_t *const root);
+	void read(Buffer *const buffer);
+
+private:
+	DynBuf mBuf;
+	int64_t mMemUsed;
+	int64_t mMemFree;
+	int64_t mBuffers;
+
+	// Intentionally unimplemented
+	MemInfoDriver(const MemInfoDriver &);
+	MemInfoDriver &operator=(const MemInfoDriver &);
+};
+
+#endif // MEMINFODRIVER_H
diff --git a/tools/gator/daemon/Monitor.cpp b/tools/gator/daemon/Monitor.cpp
index b34a15f0eb0c..74f22ee29fec 100644
--- a/tools/gator/daemon/Monitor.cpp
+++ b/tools/gator/daemon/Monitor.cpp
@@ -9,6 +9,7 @@
 #include "Monitor.h"
 
 #include <errno.h>
+#include <fcntl.h>
 #include <string.h>
 #include <unistd.h>
 
@@ -31,12 +32,25 @@ void Monitor::close() {
 }
 
 bool Monitor::init() {
+#ifdef EPOLL_CLOEXEC
+	mFd = epoll_create1(EPOLL_CLOEXEC);
+#else
 	mFd = epoll_create(16);
+#endif
 	if (mFd < 0) {
 		logg->logMessage("%s(%s:%i): epoll_create1 failed", __FUNCTION__, __FILE__, __LINE__);
 		return false;
 	}
 
+#ifndef EPOLL_CLOEXEC
+  int fdf = fcntl(mFd, F_GETFD);
+  if ((fdf == -1) || (fcntl(mFd, F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+		logg->logMessage("%s(%s:%i): fcntl failed", __FUNCTION__, __FILE__, __LINE__);
+    ::close(mFd);
+    return -1;
+  }
+#endif
+
 	return true;
 }
 
diff --git a/tools/gator/daemon/NetDriver.cpp b/tools/gator/daemon/NetDriver.cpp
new file mode 100644
index 000000000000..e75c0695779a
--- /dev/null
+++ b/tools/gator/daemon/NetDriver.cpp
@@ -0,0 +1,129 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+// Define to get format macros from inttypes.h
+#define __STDC_FORMAT_MACROS
+
+#include "NetDriver.h"
+
+#include <inttypes.h>
+
+#include "Logging.h"
+#include "SessionData.h"
+
+class NetCounter : public DriverCounter {
+public:
+	NetCounter(DriverCounter *next, char *const name, int64_t *const value);
+	~NetCounter();
+
+	int64_t read();
+
+private:
+	int64_t *const mValue;
+	int64_t mPrev;
+
+	// Intentionally unimplemented
+	NetCounter(const NetCounter &);
+	NetCounter &operator=(const NetCounter &);
+};
+
+NetCounter::NetCounter(DriverCounter *next, char *const name, int64_t *const value) : DriverCounter(next, name), mValue(value), mPrev(0) {
+}
+
+NetCounter::~NetCounter() {
+}
+
+int64_t NetCounter::read() {
+	int64_t result = *mValue - mPrev;
+	mPrev = *mValue;
+	return result;
+}
+
+NetDriver::NetDriver() : mBuf(), mReceiveBytes(0), mTransmitBytes(0) {
+}
+
+NetDriver::~NetDriver() {
+}
+
+void NetDriver::readEvents(mxml_node_t *const) {
+	// Only for use with perf
+	if (!gSessionData->perf.isSetup()) {
+		return;
+	}
+
+	setCounters(new NetCounter(getCounters(), strdup("Linux_net_rx"), &mReceiveBytes));
+	setCounters(new NetCounter(getCounters(), strdup("Linux_net_tx"), &mTransmitBytes));
+}
+
+bool NetDriver::doRead() {
+	if (!countersEnabled()) {
+		return true;
+	}
+
+	if (!mBuf.read("/proc/net/dev")) {
+		return false;
+	}
+
+	// Skip the header
+	char *key;
+	if (((key = strchr(mBuf.getBuf(), '\n')) == NULL) ||
+			((key = strchr(key + 1, '\n')) == NULL)) {
+		return false;
+	}
+	key = key + 1;
+
+	mReceiveBytes = 0;
+	mTransmitBytes = 0;
+
+	char *colon;
+	while ((colon = strchr(key, ':')) != NULL) {
+		char *end = strchr(colon + 1, '\n');
+		if (end != NULL) {
+			*end = '\0';
+		}
+		*colon = '\0';
+
+		int64_t receiveBytes;
+		int64_t transmitBytes;
+		const int count = sscanf(colon + 1, " %" SCNu64 " %*u %*u %*u %*u %*u %*u %*u %" SCNu64, &receiveBytes, &transmitBytes);
+		if (count != 2) {
+			return false;
+		}
+		mReceiveBytes += receiveBytes;
+		mTransmitBytes += transmitBytes;
+
+		if (end == NULL) {
+			break;
+		}
+		key = end + 1;
+	}
+
+	return true;
+}
+
+void NetDriver::start() {
+	if (!doRead()) {
+		logg->logError(__FILE__, __LINE__, "Unable to read network stats");
+		handleException();
+	}
+	// Initialize previous values
+	for (DriverCounter *counter = getCounters(); counter != NULL; counter = counter->getNext()) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		counter->read();
+	}
+}
+
+void NetDriver::read(Buffer *const buffer) {
+	if (!doRead()) {
+		logg->logError(__FILE__, __LINE__, "Unable to read network stats");
+		handleException();
+	}
+	super::read(buffer);
+}
diff --git a/tools/gator/daemon/NetDriver.h b/tools/gator/daemon/NetDriver.h
new file mode 100644
index 000000000000..50ff850bfc6d
--- /dev/null
+++ b/tools/gator/daemon/NetDriver.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef NETDRIVER_H
+#define NETDRIVER_H
+
+#include "Driver.h"
+#include "DynBuf.h"
+
+class NetDriver : public PolledDriver {
+private:
+	typedef PolledDriver super;
+
+public:
+	NetDriver();
+	~NetDriver();
+
+	void readEvents(mxml_node_t *const root);
+	void start();
+	void read(Buffer *const buffer);
+
+private:
+	bool doRead();
+
+	DynBuf mBuf;
+	int64_t mReceiveBytes;
+	int64_t mTransmitBytes;
+
+	// Intentionally unimplemented
+	NetDriver(const NetDriver &);
+	NetDriver &operator=(const NetDriver &);
+};
+
+#endif // NETDRIVER_H
diff --git a/tools/gator/daemon/OlySocket.cpp b/tools/gator/daemon/OlySocket.cpp
index 28774e36e510..aa0ce4929916 100644
--- a/tools/gator/daemon/OlySocket.cpp
+++ b/tools/gator/daemon/OlySocket.cpp
@@ -15,10 +15,10 @@
 #include <ws2tcpip.h>
 #else
 #include <netinet/in.h>
-#include <sys/socket.h>
 #include <sys/un.h>
 #include <unistd.h>
 #include <netdb.h>
+#include <fcntl.h>
 #endif
 
 #include "Logging.h"
@@ -32,6 +32,48 @@
 #define SHUTDOWN_RX_TX SHUT_RDWR
 #endif
 
+int socket_cloexec(int domain, int type, int protocol) {
+#ifdef SOCK_CLOEXEC
+  return socket(domain, type | SOCK_CLOEXEC, protocol);
+#else
+  int sock = socket(domain, type, protocol);
+#ifdef FD_CLOEXEC
+  if (sock < 0) {
+    return -1;
+  }
+  int fdf = fcntl(sock, F_GETFD);
+  if ((fdf == -1) || (fcntl(sock, F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+    close(sock);
+    return -1;
+  }
+#endif
+  return sock;
+#endif
+}
+
+int accept_cloexec(int sockfd, struct sockaddr *addr, socklen_t *addrlen) {
+  int sock;
+#ifdef SOCK_CLOEXEC
+  sock = accept4(sockfd, addr, addrlen, SOCK_CLOEXEC);
+  if (sock >= 0) {
+    return sock;
+  }
+  // accept4 with SOCK_CLOEXEC may not work on all kernels, so fallback
+#endif
+  sock = accept(sockfd, addr, addrlen);
+#ifdef FD_CLOEXEC
+  if (sock < 0) {
+    return -1;
+  }
+  int fdf = fcntl(sock, F_GETFD);
+  if ((fdf == -1) || (fcntl(sock, F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+    close(sock);
+    return -1;
+  }
+#endif
+  return sock;
+}
+
 OlyServerSocket::OlyServerSocket(int port) {
 #ifdef WIN32
   WSADATA wsaData;
@@ -57,7 +99,7 @@ OlySocket::OlySocket(int socketID) : mSocketID(socketID) {
 
 OlyServerSocket::OlyServerSocket(const char* path, const size_t pathSize) {
   // Create socket
-  mFDServer = socket(PF_UNIX, SOCK_STREAM, 0);
+  mFDServer = socket_cloexec(PF_UNIX, SOCK_STREAM, 0);
   if (mFDServer < 0) {
     logg->logError(__FILE__, __LINE__, "Error creating server socket");
     handleException();
@@ -84,7 +126,7 @@ OlyServerSocket::OlyServerSocket(const char* path, const size_t pathSize) {
 }
 
 int OlySocket::connect(const char* path, const size_t pathSize) {
-  int fd = socket(PF_UNIX, SOCK_STREAM, 0);
+  int fd = socket_cloexec(PF_UNIX, SOCK_STREAM, 0);
   if (fd < 0) {
     return -1;
   }
@@ -143,10 +185,10 @@ void OlyServerSocket::createServerSocket(int port) {
   int family = AF_INET6;
 
   // Create socket
-  mFDServer = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
+  mFDServer = socket_cloexec(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
   if (mFDServer < 0) {
     family = AF_INET;
-    mFDServer = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
+    mFDServer = socket_cloexec(PF_INET, SOCK_STREAM, IPPROTO_TCP);
     if (mFDServer < 0) {
       logg->logError(__FILE__, __LINE__, "Error creating server socket");
       handleException();
@@ -190,7 +232,7 @@ int OlyServerSocket::acceptConnection() {
   }
 
   // Accept a connection, note that this call blocks until a client connects
-  socketID = accept(mFDServer, NULL, NULL);
+  socketID = accept_cloexec(mFDServer, NULL, NULL);
   if (socketID < 0) {
     logg->logError(__FILE__, __LINE__, "Socket acceptance failed");
     handleException();
diff --git a/tools/gator/daemon/OlySocket.h b/tools/gator/daemon/OlySocket.h
index 20c67cc695e1..6b53b01fc3ee 100644
--- a/tools/gator/daemon/OlySocket.h
+++ b/tools/gator/daemon/OlySocket.h
@@ -11,6 +11,12 @@
 
 #include <stddef.h>
 
+#ifdef WIN32
+typedef socklen_t int;
+#else
+#include <sys/socket.h>
+#endif
+
 class OlySocket {
 public:
 #ifndef WIN32
@@ -52,4 +58,7 @@ class OlyServerSocket {
   void createServerSocket(int port);
 };
 
+int socket_cloexec(int domain, int type, int protocol);
+int accept_cloexec(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+
 #endif //__OLY_SOCKET_H__
diff --git a/tools/gator/daemon/PerfBuffer.cpp b/tools/gator/daemon/PerfBuffer.cpp
index 5fad583f7bd0..f127c996d43b 100644
--- a/tools/gator/daemon/PerfBuffer.cpp
+++ b/tools/gator/daemon/PerfBuffer.cpp
@@ -20,6 +20,7 @@ PerfBuffer::PerfBuffer() {
 	for (int cpu = 0; cpu < ARRAY_LENGTH(mBuf); ++cpu) {
 		mBuf[cpu] = MAP_FAILED;
 		mDiscard[cpu] = false;
+		mFds[cpu] = -1;
 	}
 }
 
@@ -31,8 +32,8 @@ PerfBuffer::~PerfBuffer() {
 	}
 }
 
-bool PerfBuffer::useFd(const int cpu, const int fd, const int groupFd) {
-	if (fd == groupFd) {
+bool PerfBuffer::useFd(const int cpu, const int fd) {
+	if (mFds[cpu] < 0) {
 		if (mBuf[cpu] != MAP_FAILED) {
 			logg->logMessage("%s(%s:%i): cpu %i already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__, cpu);
 			return false;
@@ -44,6 +45,7 @@ bool PerfBuffer::useFd(const int cpu, const int fd, const int groupFd) {
 			logg->logMessage("%s(%s:%i): mmap failed", __FUNCTION__, __FILE__, __LINE__);
 			return false;
 		}
+		mFds[cpu] = fd;
 
 		// Check the version
 		struct perf_event_mmap_page *pemp = static_cast<struct perf_event_mmap_page *>(mBuf[cpu]);
@@ -57,7 +59,7 @@ bool PerfBuffer::useFd(const int cpu, const int fd, const int groupFd) {
 			return false;
 		}
 
-		if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, groupFd) < 0) {
+		if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, mFds[cpu]) < 0) {
 			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
 			return false;
 		}
@@ -89,6 +91,41 @@ bool PerfBuffer::isEmpty() {
 	return true;
 }
 
+static void compressAndSend(const int cpu, const __u64 head, __u64 tail, const uint8_t *const b, Sender *const sender) {
+	// Pick a big size but something smaller than the chunkSize in Sender::writeData which is 100k
+	char buf[1<<16];
+	int writePos = 0;
+	const int typeLength = gSessionData->mLocalCapture ? 0 : 1;
+
+	while (head > tail) {
+		writePos = 0;
+		if (!gSessionData->mLocalCapture) {
+			buf[writePos++] = RESPONSE_APC_DATA;
+		}
+		// Reserve space for size
+		writePos += sizeof(uint32_t);
+		Buffer::packInt(buf, sizeof(buf), writePos, FRAME_PERF);
+		Buffer::packInt(buf, sizeof(buf), writePos, cpu);
+
+		while (head > tail) {
+			const int count = reinterpret_cast<const struct perf_event_header *>(b + (tail & BUF_MASK))->size/sizeof(uint64_t);
+			// Can this whole message be written as Streamline assumes events are not split between frames
+			if (sizeof(buf) <= writePos + count*Buffer::MAXSIZE_PACK64) {
+				break;
+			}
+			for (int i = 0; i < count; ++i) {
+				// Must account for message size
+				Buffer::packInt64(buf, sizeof(buf), writePos, *reinterpret_cast<const uint64_t *>(b + (tail & BUF_MASK)));
+				tail += sizeof(uint64_t);
+			}
+		}
+
+		// Write size
+		Buffer::writeLEInt(reinterpret_cast<unsigned char *>(buf + typeLength), writePos - typeLength - sizeof(uint32_t));
+		sender->writeData(buf, writePos, RESPONSE_APC_DATA);
+	}
+}
+
 bool PerfBuffer::send(Sender *const sender) {
 	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
 		if (mBuf[cpu] == MAP_FAILED) {
@@ -102,26 +139,7 @@ bool PerfBuffer::send(Sender *const sender) {
 
 		if (head > tail) {
 			const uint8_t *const b = static_cast<uint8_t *>(mBuf[cpu]) + gSessionData->mPageSize;
-			const int offset = gSessionData->mLocalCapture ? 1 : 0;
-			unsigned char header[7];
-			header[0] = RESPONSE_APC_DATA;
-			Buffer::writeLEInt(header + 1, head - tail + sizeof(header) - 5);
-			// Should use real packing functions
-			header[5] = FRAME_PERF;
-			header[6] = cpu;
-
-			// Write header
-			sender->writeData(reinterpret_cast<const char *>(&header) + offset, sizeof(header) - offset, RESPONSE_APC_DATA);
-
-			// Write data
-			if ((head & ~BUF_MASK) == (tail & ~BUF_MASK)) {
-				// Not wrapped
-				sender->writeData(reinterpret_cast<const char *>(b + (tail & BUF_MASK)), head - tail, RESPONSE_APC_DATA);
-			} else {
-				// Wrapped
-				sender->writeData(reinterpret_cast<const char *>(b + (tail & BUF_MASK)), BUF_SIZE - (tail & BUF_MASK), RESPONSE_APC_DATA);
-				sender->writeData(reinterpret_cast<const char *>(b), head & BUF_MASK, RESPONSE_APC_DATA);
-			}
+			compressAndSend(cpu, head, tail, b, sender);
 
 			// Update tail with the data read
 			pemp->data_tail = head;
@@ -131,6 +149,7 @@ bool PerfBuffer::send(Sender *const sender) {
 			munmap(mBuf[cpu], gSessionData->mPageSize + BUF_SIZE);
 			mBuf[cpu] = MAP_FAILED;
 			mDiscard[cpu] = false;
+			mFds[cpu] = -1;
 			logg->logMessage("%s(%s:%i): Unmaped cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu);
 		}
 	}
diff --git a/tools/gator/daemon/PerfBuffer.h b/tools/gator/daemon/PerfBuffer.h
index 278a3b9d6db7..25a10625a9e8 100644
--- a/tools/gator/daemon/PerfBuffer.h
+++ b/tools/gator/daemon/PerfBuffer.h
@@ -21,7 +21,7 @@ class PerfBuffer {
 	PerfBuffer();
 	~PerfBuffer();
 
-	bool useFd(const int cpu, const int fd, const int groupFd);
+	bool useFd(const int cpu, const int fd);
 	void discard(const int cpu);
 	bool isEmpty();
 	bool send(Sender *const sender);
@@ -30,6 +30,8 @@ class PerfBuffer {
 	void *mBuf[NR_CPUS];
 	// After the buffer is flushed it should be unmaped
 	bool mDiscard[NR_CPUS];
+	// fd that corresponds to the mBuf
+	int mFds[NR_CPUS];
 
 	// Intentionally undefined
 	PerfBuffer(const PerfBuffer &);
diff --git a/tools/gator/daemon/PerfDriver.cpp b/tools/gator/daemon/PerfDriver.cpp
index ac97a077d266..ee90284cee41 100644
--- a/tools/gator/daemon/PerfDriver.cpp
+++ b/tools/gator/daemon/PerfDriver.cpp
@@ -22,6 +22,7 @@
 #include "Logging.h"
 #include "PerfGroup.h"
 #include "SessionData.h"
+#include "Setup.h"
 
 #define PERF_DEVICES "/sys/bus/event_source/devices"
 
@@ -47,7 +48,6 @@ static const struct gator_cpu gator_cpus[] = {
 	{ 0xc07, "Cortex-A7",    "ARMv7_Cortex_A7",  4 },
 	{ 0xc08, "Cortex-A8",    "ARMv7_Cortex_A8",  4 },
 	{ 0xc09, "Cortex-A9",    "ARMv7_Cortex_A9",  6 },
-	{ 0xc0d, "Cortex-A12",   "ARMv7_Cortex_A12", 6 },
 	{ 0xc0f, "Cortex-A15",   "ARMv7_Cortex_A15", 6 },
 	{ 0xc0e, "Cortex-A17",   "ARMv7_Cortex_A17", 6 },
 	{ 0x00f, "Scorpion",     "Scorpion",         4 },
@@ -64,68 +64,57 @@ static const char OLD_PMU_PREFIX[] = "ARMv7 Cortex-";
 static const char NEW_PMU_PREFIX[] = "ARMv7_Cortex_";
 
 struct uncore_counter {
-	// gatorfs event and Perf PMU name
-	const char *const name;
+	// Perf PMU name
+	const char *const perfName;
+	// gatorfs event name
+	const char *const gatorName;
 	const int count;
 };
 
 static const struct uncore_counter uncore_counters[] = {
-	{ "CCI_400", 4 },
-	{ "CCI_400-r1", 4 },
+	{ "CCI_400", "CCI_400", 4 },
+	{ "CCI_400-r1", "CCI_400-r1", 4 },
+	{ "ccn", "ARM_CCN_5XX", 8 },
 };
 
-class PerfCounter {
+class PerfCounter : public DriverCounter {
 public:
-	PerfCounter(PerfCounter *next, const char *name, uint32_t type, uint64_t config, bool perCpu) : mNext(next), mName(name), mType(type), mCount(0), mKey(getEventKey()), mConfig(config), mEnabled(false), mPerCpu(perCpu) {}
+	PerfCounter(DriverCounter *next, const char *name, uint32_t type, uint64_t config, bool perCpu) : DriverCounter(next, name), mType(type), mCount(0), mConfig(config), mPerCpu(perCpu) {}
+
 	~PerfCounter() {
-		delete [] mName;
 	}
 
-	PerfCounter *getNext() const { return mNext; }
-	const char *getName() const { return mName; }
 	uint32_t getType() const { return mType; }
 	int getCount() const { return mCount; }
 	void setCount(const int count) { mCount = count; }
-	int getKey() const { return mKey; }
 	uint64_t getConfig() const { return mConfig; }
 	void setConfig(const uint64_t config) { mConfig = config; }
-	bool isEnabled() const { return mEnabled; }
-	void setEnabled(const bool enabled) { mEnabled = enabled; }
 	bool isPerCpu() const { return mPerCpu; }
 
 private:
-	PerfCounter *const mNext;
-	const char *const mName;
 	const uint32_t mType;
 	int mCount;
-	const int mKey;
 	uint64_t mConfig;
-	int mEnabled : 1,
-		mPerCpu : 1;
+	bool mPerCpu;
 };
 
-PerfDriver::PerfDriver() : mCounters(NULL), mIsSetup(false), mLegacySupport(false) {
+PerfDriver::PerfDriver() : mIsSetup(false), mLegacySupport(false) {
 }
 
 PerfDriver::~PerfDriver() {
-	while (mCounters != NULL) {
-		PerfCounter *counter = mCounters;
-		mCounters = counter->getNext();
-		delete counter;
-	}
 }
 
 void PerfDriver::addCpuCounters(const char *const counterName, const int type, const int numCounters) {
 	int len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1;
 	char *name = new char[len];
 	snprintf(name, len, "%s_ccnt", counterName);
-	mCounters = new PerfCounter(mCounters, name, type, -1, true);
+	setCounters(new PerfCounter(getCounters(), name, type, -1, true));
 
 	for (int j = 0; j < numCounters; ++j) {
 		len = snprintf(NULL, 0, "%s_cnt%d", counterName, j) + 1;
 		name = new char[len];
 		snprintf(name, len, "%s_cnt%d", counterName, j);
-		mCounters = new PerfCounter(mCounters, name, type, -1, true);
+		setCounters(new PerfCounter(getCounters(), name, type, -1, true));
 	}
 }
 
@@ -133,40 +122,24 @@ void PerfDriver::addUncoreCounters(const char *const counterName, const int type
 	int len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1;
 	char *name = new char[len];
 	snprintf(name, len, "%s_ccnt", counterName);
-	mCounters = new PerfCounter(mCounters, name, type, -1, false);
+	setCounters(new PerfCounter(getCounters(), name, type, -1, false));
 
 	for (int j = 0; j < numCounters; ++j) {
 		len = snprintf(NULL, 0, "%s_cnt%d", counterName, j) + 1;
 		name = new char[len];
 		snprintf(name, len, "%s_cnt%d", counterName, j);
-		mCounters = new PerfCounter(mCounters, name, type, -1, false);
+		setCounters(new PerfCounter(getCounters(), name, type, -1, false));
 	}
 }
 
-// From include/generated/uapi/linux/version.h
-#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
-
 bool PerfDriver::setup() {
 	// Check the kernel version
-	struct utsname utsname;
-	if (uname(&utsname) != 0) {
-		logg->logMessage("%s(%s:%i): uname failed", __FUNCTION__, __FILE__, __LINE__);
+	int release[3];
+	if (!getLinuxVersion(release)) {
+		logg->logMessage("%s(%s:%i): getLinuxVersion failed", __FUNCTION__, __FILE__, __LINE__);
 		return false;
 	}
 
-	int release[3] = { 0, 0, 0 };
-	int part = 0;
-	char *ch = utsname.release;
-	while (*ch >= '0' && *ch <= '9' && part < ARRAY_LENGTH(release)) {
-		release[part] = 10*release[part] + *ch - '0';
-
-		++ch;
-		if (*ch == '.') {
-			++part;
-			++ch;
-		}
-	}
-
 	if (KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 4, 0)) {
 		logg->logMessage("%s(%s:%i): Unsupported kernel version", __FUNCTION__, __FILE__, __LINE__);
 		return false;
@@ -174,7 +147,7 @@ bool PerfDriver::setup() {
 	mLegacySupport = KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 12, 0);
 
 	if (access(EVENTS_PATH, R_OK) != 0) {
-		logg->logMessage("%s(%s:%i): " EVENTS_PATH " does not exist, is CONFIG_TRACING enabled?", __FUNCTION__, __FILE__, __LINE__);
+		logg->logMessage("%s(%s:%i): " EVENTS_PATH " does not exist, is CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER enabled?", __FUNCTION__, __FILE__, __LINE__);
 		return false;
 	}
 
@@ -189,12 +162,14 @@ bool PerfDriver::setup() {
 	struct dirent *dirent;
 	while ((dirent = readdir(dir)) != NULL) {
 		for (int i = 0; i < ARRAY_LENGTH(gator_cpus); ++i) {
+			const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+
 			// Do the names match exactly?
-			if (strcmp(dirent->d_name, gator_cpus[i].pmnc_name) != 0 &&
-					// Do these names match but have the old vs new prefix?
-			    (strncmp(dirent->d_name, OLD_PMU_PREFIX, sizeof(OLD_PMU_PREFIX) - 1) != 0 ||
-			     strncmp(gator_cpus[i].pmnc_name, NEW_PMU_PREFIX, sizeof(NEW_PMU_PREFIX) - 1) != 0 ||
-			     strcmp(dirent->d_name + sizeof(OLD_PMU_PREFIX) - 1, gator_cpus[i].pmnc_name + sizeof(NEW_PMU_PREFIX) - 1) != 0)) {
+			if (strcasecmp(gator_cpu->pmnc_name, dirent->d_name) != 0 &&
+			    // Do these names match but have the old vs new prefix?
+			    ((strncasecmp(dirent->d_name, OLD_PMU_PREFIX, sizeof(OLD_PMU_PREFIX) - 1) != 0 ||
+			      strncasecmp(gator_cpu->pmnc_name, NEW_PMU_PREFIX, sizeof(NEW_PMU_PREFIX) - 1) != 0 ||
+			      strcasecmp(dirent->d_name + sizeof(OLD_PMU_PREFIX) - 1, gator_cpu->pmnc_name + sizeof(NEW_PMU_PREFIX) - 1) != 0))) {
 				continue;
 			}
 
@@ -206,11 +181,12 @@ bool PerfDriver::setup() {
 			}
 
 			foundCpu = true;
-			addCpuCounters(gator_cpus[i].pmnc_name, type, gator_cpus[i].pmnc_counters);
+			logg->logMessage("Adding cpu counters for %s", gator_cpu->pmnc_name);
+			addCpuCounters(gator_cpu->pmnc_name, type, gator_cpu->pmnc_counters);
 		}
 
 		for (int i = 0; i < ARRAY_LENGTH(uncore_counters); ++i) {
-			if (strcmp(dirent->d_name, uncore_counters[i].name) != 0) {
+			if (strcmp(dirent->d_name, uncore_counters[i].perfName) != 0) {
 				continue;
 			}
 
@@ -221,7 +197,8 @@ bool PerfDriver::setup() {
 				continue;
 			}
 
-			addUncoreCounters(uncore_counters[i].name, type, uncore_counters[i].count);
+			logg->logMessage("Adding uncore counters for %s", uncore_counters[i].gatorName);
+			addUncoreCounters(uncore_counters[i].gatorName, type, uncore_counters[i].count);
 		}
 	}
 	closedir(dir);
@@ -234,6 +211,7 @@ bool PerfDriver::setup() {
 			}
 
 			foundCpu = true;
+			logg->logMessage("Adding cpu counters (based on cpuid) for %s", gator_cpus[i].pmnc_name);
 			addCpuCounters(gator_cpus[i].pmnc_name, PERF_TYPE_RAW, gator_cpus[i].pmnc_counters);
 		}
 	}
@@ -252,31 +230,20 @@ bool PerfDriver::setup() {
 
 	id = getTracepointId("irq/softirq_exit", &printb);
 	if (id >= 0) {
-		mCounters = new PerfCounter(mCounters, "Linux_irq_softirq", PERF_TYPE_TRACEPOINT, id, true);
+		setCounters(new PerfCounter(getCounters(), "Linux_irq_softirq", PERF_TYPE_TRACEPOINT, id, true));
 	}
 
 	id = getTracepointId("irq/irq_handler_exit", &printb);
 	if (id >= 0) {
-		mCounters = new PerfCounter(mCounters, "Linux_irq_irq", PERF_TYPE_TRACEPOINT, id, true);
+		setCounters(new PerfCounter(getCounters(), "Linux_irq_irq", PERF_TYPE_TRACEPOINT, id, true));
 	}
 
-	//Linux_block_rq_wr
-	//Linux_block_rq_rd
-	//Linux_net_rx
-	//Linux_net_tx
-
 	id = getTracepointId(SCHED_SWITCH, &printb);
 	if (id >= 0) {
-		mCounters = new PerfCounter(mCounters, "Linux_sched_switch", PERF_TYPE_TRACEPOINT, id, true);
+		setCounters(new PerfCounter(getCounters(), "Linux_sched_switch", PERF_TYPE_TRACEPOINT, id, true));
 	}
 
-	//Linux_meminfo_memused
-	//Linux_meminfo_memfree
-	//Linux_meminfo_bufferram
-	//Linux_power_cpu_freq
-	//Linux_power_cpu_idle
-
-	mCounters = new PerfCounter(mCounters, "Linux_cpu_wait_contention", TYPE_DERIVED, -1, false);
+	setCounters(new PerfCounter(getCounters(), "Linux_cpu_wait_contention", TYPE_DERIVED, -1, false));
 
 	//Linux_cpu_wait_io
 
@@ -299,62 +266,48 @@ bool PerfDriver::summary(Buffer *const buffer) {
 		logg->logMessage("%s(%s:%i): clock_gettime failed", __FUNCTION__, __FILE__, __LINE__);
 		return false;
 	}
-	const int64_t timestamp = (int64_t)ts.tv_sec * 1000000000L + ts.tv_nsec;
+	const int64_t timestamp = (int64_t)ts.tv_sec * NS_PER_S + ts.tv_nsec;
 
-	const int64_t uptime = getTime();
+	const uint64_t monotonicStarted = getTime();
+	gSessionData->mMonotonicStarted = monotonicStarted;
 
-	buffer->summary(timestamp, uptime, 0, buf);
+	buffer->summary(monotonicStarted, timestamp, monotonicStarted, monotonicStarted, buf);
 
 	for (int i = 0; i < gSessionData->mCores; ++i) {
-		// Don't send information on a cpu we know nothing about
-		if (gSessionData->mCpuIds[i] == -1) {
-			continue;
-		}
-
-		int j;
-		for (j = 0; j < ARRAY_LENGTH(gator_cpus); ++j) {
-			if (gator_cpus[j].cpuid == gSessionData->mCpuIds[i]) {
-				break;
-			}
-		}
-		if (gator_cpus[j].cpuid == gSessionData->mCpuIds[i]) {
-			buffer->coreName(i, gSessionData->mCpuIds[i], gator_cpus[j].core_name);
-		} else {
-			if (gSessionData->mCpuIds[i] == -1) {
-				snprintf(buf, sizeof(buf), "Unknown");
-			} else {
-				snprintf(buf, sizeof(buf), "Unknown (0x%.3x)", gSessionData->mCpuIds[i]);
-			}
-			buffer->coreName(i, gSessionData->mCpuIds[i], buf);
-		}
+		coreName(monotonicStarted, buffer, i);
 	}
-	buffer->commit(1);
+	buffer->commit(monotonicStarted);
 
 	return true;
 }
 
-PerfCounter *PerfDriver::findCounter(const Counter &counter) const {
-	for (PerfCounter * perfCounter = mCounters; perfCounter != NULL; perfCounter = perfCounter->getNext()) {
-		if (strcmp(perfCounter->getName(), counter.getType()) == 0) {
-			return perfCounter;
-		}
+void PerfDriver::coreName(const uint32_t startTime, Buffer *const buffer, const int cpu) {
+	// Don't send information on a cpu we know nothing about
+	if (gSessionData->mCpuIds[cpu] == -1) {
+		return;
 	}
 
-	return NULL;
-}
-
-bool PerfDriver::claimCounter(const Counter &counter) const {
-	return findCounter(counter) != NULL;
-}
-
-void PerfDriver::resetCounters() {
-	for (PerfCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
-		counter->setEnabled(false);
+	int j;
+	for (j = 0; j < ARRAY_LENGTH(gator_cpus); ++j) {
+		if (gator_cpus[j].cpuid == gSessionData->mCpuIds[cpu]) {
+			break;
+		}
+	}
+	if (gator_cpus[j].cpuid == gSessionData->mCpuIds[cpu]) {
+		buffer->coreName(startTime, cpu, gSessionData->mCpuIds[cpu], gator_cpus[j].core_name);
+	} else {
+		char buf[32];
+		if (gSessionData->mCpuIds[cpu] == -1) {
+			snprintf(buf, sizeof(buf), "Unknown");
+		} else {
+			snprintf(buf, sizeof(buf), "Unknown (0x%.3x)", gSessionData->mCpuIds[cpu]);
+		}
+		buffer->coreName(startTime, cpu, gSessionData->mCpuIds[cpu], buf);
 	}
 }
 
 void PerfDriver::setupCounter(Counter &counter) {
-	PerfCounter *const perfCounter = findCounter(counter);
+	PerfCounter *const perfCounter = static_cast<PerfCounter *>(findCounter(counter));
 	if (perfCounter == NULL) {
 		counter.setEnabled(false);
 		return;
@@ -369,21 +322,10 @@ void PerfDriver::setupCounter(Counter &counter) {
 	counter.setKey(perfCounter->getKey());
 }
 
-int PerfDriver::writeCounters(mxml_node_t *root) const {
-	int count = 0;
-	for (PerfCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
-		mxml_node_t *node = mxmlNewElement(root, "counter");
-		mxmlElementSetAttr(node, "name", counter->getName());
-		++count;
-	}
-
-	return count;
-}
-
-bool PerfDriver::enable(PerfGroup *const group, Buffer *const buffer) const {
-	for (PerfCounter * counter = mCounters; counter != NULL; counter = counter->getNext()) {
+bool PerfDriver::enable(const uint64_t currTime, PerfGroup *const group, Buffer *const buffer) const {
+	for (PerfCounter *counter = static_cast<PerfCounter *>(getCounters()); counter != NULL; counter = static_cast<PerfCounter *>(counter->getNext())) {
 		if (counter->isEnabled() && (counter->getType() != TYPE_DERIVED)) {
-			if (!group->add(buffer, counter->getKey(), counter->getType(), counter->getConfig(), counter->getCount(), counter->getCount() > 0 ? PERF_SAMPLE_TID | PERF_SAMPLE_IP : 0, counter->isPerCpu() ? PERF_GROUP_PER_CPU : 0)) {
+			if (!group->add(currTime, buffer, counter->getKey(), counter->getType(), counter->getConfig(), counter->getCount(), counter->getCount() > 0 ? PERF_SAMPLE_TID | PERF_SAMPLE_IP : 0, counter->isPerCpu() ? PERF_GROUP_PER_CPU : 0)) {
 				logg->logMessage("%s(%s:%i): PerfGroup::add failed", __FUNCTION__, __FILE__, __LINE__);
 				return false;
 			}
diff --git a/tools/gator/daemon/PerfDriver.h b/tools/gator/daemon/PerfDriver.h
index 2cae575a7059..846203a9e18b 100644
--- a/tools/gator/daemon/PerfDriver.h
+++ b/tools/gator/daemon/PerfDriver.h
@@ -9,6 +9,8 @@
 #ifndef PERFDRIVER_H
 #define PERFDRIVER_H
 
+#include <stdint.h>
+
 #include "Driver.h"
 
 // If debugfs is not mounted at /sys/kernel/debug, update DEBUGFS_PATH
@@ -16,13 +18,13 @@
 #define EVENTS_PATH DEBUGFS_PATH "/tracing/events"
 
 #define SCHED_SWITCH "sched/sched_switch"
+#define CPU_IDLE "power/cpu_idle"
 
 class Buffer;
 class DynBuf;
-class PerfCounter;
 class PerfGroup;
 
-class PerfDriver : public Driver {
+class PerfDriver : public SimpleDriver {
 public:
 	PerfDriver();
 	~PerfDriver();
@@ -31,24 +33,19 @@ class PerfDriver : public Driver {
 
 	bool setup();
 	bool summary(Buffer *const buffer);
+	void coreName(const uint32_t startTime, Buffer *const buffer, const int cpu);
 	bool isSetup() const { return mIsSetup; }
 
-	bool claimCounter(const Counter &counter) const;
-	void resetCounters();
 	void setupCounter(Counter &counter);
 
-	int writeCounters(mxml_node_t *root) const;
-
-	bool enable(PerfGroup *const group, Buffer *const buffer) const;
+	bool enable(const uint64_t currTime, PerfGroup *const group, Buffer *const buffer) const;
 
 	static long long getTracepointId(const char *const name, DynBuf *const printb);
 
 private:
-	PerfCounter *findCounter(const Counter &counter) const;
 	void addCpuCounters(const char *const counterName, const int type, const int numCounters);
 	void addUncoreCounters(const char *const counterName, const int type, const int numCounters);
 
-	PerfCounter *mCounters;
 	bool mIsSetup;
 	bool mLegacySupport;
 
diff --git a/tools/gator/daemon/PerfGroup.cpp b/tools/gator/daemon/PerfGroup.cpp
index 2a0239f7c348..4fd960a9058c 100644
--- a/tools/gator/daemon/PerfGroup.cpp
+++ b/tools/gator/daemon/PerfGroup.cpp
@@ -9,6 +9,7 @@
 #include "PerfGroup.h"
 
 #include <errno.h>
+#include <fcntl.h>
 #include <string.h>
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
@@ -33,10 +34,19 @@
 	/* have a sampling interrupt happen when we cross the wakeup_watermark boundary */ \
 	pea.watermark = 1; \
 	/* Be conservative in flush size as only one buffer set is monitored */ \
-	pea.wakeup_watermark = 3 * BUF_SIZE / 4
+	pea.wakeup_watermark = BUF_SIZE / 2
 
 static int sys_perf_event_open(struct perf_event_attr *const attr, const pid_t pid, const int cpu, const int group_fd, const unsigned long flags) {
-	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+	int fd = syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+	if (fd < 0) {
+		return -1;
+	}
+	int fdf = fcntl(fd, F_GETFD);
+	if ((fdf == -1) || (fcntl(fd, F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+		close(fd);
+		return -1;
+	}
+	return fd;
 }
 
 PerfGroup::PerfGroup(PerfBuffer *const pb) : mPb(pb) {
@@ -54,7 +64,7 @@ PerfGroup::~PerfGroup() {
 	}
 }
 
-bool PerfGroup::add(Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) {
+bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) {
 	int i;
 	for (i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
 		if (mKeys[i] < 0) {
@@ -82,12 +92,12 @@ bool PerfGroup::add(Buffer *const buffer, const int key, const __u32 type, const
 
 	mKeys[i] = key;
 
-	buffer->pea(&mAttrs[i], key);
+	buffer->pea(currTime, &mAttrs[i], key);
 
 	return true;
 }
 
-bool PerfGroup::prepareCPU(const int cpu) {
+int PerfGroup::prepareCPU(const int cpu, Monitor *const monitor) {
 	logg->logMessage("%s(%s:%i): Onlining cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu);
 
 	for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
@@ -102,26 +112,35 @@ bool PerfGroup::prepareCPU(const int cpu) {
 		const int offset = i * gSessionData->mCores;
 		if (mFds[cpu + offset] >= 0) {
 			logg->logMessage("%s(%s:%i): cpu already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__);
-			return false;
+			return PG_FAILURE;
 		}
 
 		logg->logMessage("%s(%s:%i): perf_event_open cpu: %i type: %lli config: %lli sample: %lli sample_type: 0x%llx pinned: %i mmap: %i comm: %i freq: %i task: %i sample_id_all: %i", __FUNCTION__, __FILE__, __LINE__, cpu, (long long)mAttrs[i].type, (long long)mAttrs[i].config, (long long)mAttrs[i].sample_period, (long long)mAttrs[i].sample_type, mAttrs[i].pinned, mAttrs[i].mmap, mAttrs[i].comm, mAttrs[i].freq, mAttrs[i].task, mAttrs[i].sample_id_all);
 		mFds[cpu + offset] = sys_perf_event_open(&mAttrs[i], -1, cpu, i == 0 ? -1 : mFds[cpu], i == 0 ? 0 : PERF_FLAG_FD_OUTPUT);
 		if (mFds[cpu + offset] < 0) {
 			logg->logMessage("%s(%s:%i): failed %s", __FUNCTION__, __FILE__, __LINE__, strerror(errno));
+			if (errno == ENODEV) {
+				return PG_CPU_OFFLINE;
+			}
 			continue;
 		}
 
-		if (!mPb->useFd(cpu, mFds[cpu + offset], mFds[cpu])) {
+		if (!mPb->useFd(cpu, mFds[cpu + offset])) {
 			logg->logMessage("%s(%s:%i): PerfBuffer::useFd failed", __FUNCTION__, __FILE__, __LINE__);
-			return false;
+			return PG_FAILURE;
+		}
+
+
+		if (!monitor->add(mFds[cpu + offset])) {
+		  logg->logMessage("%s(%s:%i): Monitor::add failed", __FUNCTION__, __FILE__, __LINE__);
+		  return PG_FAILURE;
 		}
 	}
 
-	return true;
+	return PG_SUCCESS;
 }
 
-int PerfGroup::onlineCPU(const int cpu, const bool start, Buffer *const buffer, Monitor *const monitor) {
+int PerfGroup::onlineCPU(const uint64_t currTime, const int cpu, const bool start, Buffer *const buffer) {
 	__u64 ids[ARRAY_LENGTH(mKeys)];
 	int coreKeys[ARRAY_LENGTH(mKeys)];
 	int idCount = 0;
@@ -137,38 +156,37 @@ int PerfGroup::onlineCPU(const int cpu, const bool start, Buffer *const buffer,
 				// Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works
 				ioctl(fd, (PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT), &ids[idCount]) != 0) {
 			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
-			return false;
+			return 0;
 		}
 		++idCount;
 	}
 
-	if (!monitor->add(mFds[cpu])) {
-		logg->logMessage("%s(%s:%i): Monitor::add failed", __FUNCTION__, __FILE__, __LINE__);
-		return false;
-	}
-
 	if (!gSessionData->perf.getLegacySupport()) {
-		buffer->keys(idCount, ids, coreKeys);
+		buffer->keys(currTime, idCount, ids, coreKeys);
 	} else {
 		char buf[1024];
 		ssize_t bytes = read(mFds[cpu], buf, sizeof(buf));
 		if (bytes < 0) {
 			logg->logMessage("read failed");
-			return false;
+			return 0;
 		}
-		buffer->keysOld(idCount, coreKeys, bytes, buf);
+		buffer->keysOld(currTime, idCount, coreKeys, bytes, buf);
 	}
 
 	if (start) {
 		for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
 			int offset = i * gSessionData->mCores + cpu;
-			if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_ENABLE) < 0) {
+			if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_ENABLE, 0) < 0) {
 				logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
-				return false;
+				return 0;
 			}
 		}
 	}
 
+	if (idCount == 0) {
+		logg->logMessage("%s(%s:%i): no events came online", __FUNCTION__, __FILE__, __LINE__);
+	}
+
 	return idCount;
 }
 
@@ -177,7 +195,7 @@ bool PerfGroup::offlineCPU(const int cpu) {
 
 	for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
 		int offset = i * gSessionData->mCores + cpu;
-		if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_DISABLE) < 0) {
+		if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_DISABLE, 0) < 0) {
 			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
 			return false;
 		}
@@ -203,7 +221,7 @@ bool PerfGroup::offlineCPU(const int cpu) {
 
 bool PerfGroup::start() {
 	for (int pos = 0; pos < ARRAY_LENGTH(mFds); ++pos) {
-		if (mFds[pos] >= 0 && ioctl(mFds[pos], PERF_EVENT_IOC_ENABLE) < 0) {
+		if (mFds[pos] >= 0 && ioctl(mFds[pos], PERF_EVENT_IOC_ENABLE, 0) < 0) {
 			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
 			goto fail;
 		}
@@ -220,7 +238,7 @@ bool PerfGroup::start() {
 void PerfGroup::stop() {
 	for (int pos = ARRAY_LENGTH(mFds) - 1; pos >= 0; --pos) {
 		if (mFds[pos] >= 0) {
-			ioctl(mFds[pos], PERF_EVENT_IOC_DISABLE);
+			ioctl(mFds[pos], PERF_EVENT_IOC_DISABLE, 0);
 		}
 	}
 }
diff --git a/tools/gator/daemon/PerfGroup.h b/tools/gator/daemon/PerfGroup.h
index 3f1e2bb4d1c8..f7b3d725bac7 100644
--- a/tools/gator/daemon/PerfGroup.h
+++ b/tools/gator/daemon/PerfGroup.h
@@ -9,6 +9,8 @@
 #ifndef PERF_GROUP
 #define PERF_GROUP
 
+#include <stdint.h>
+
 // Use a snapshot of perf_event.h as it may be more recent than what is on the target and if not newer features won't be supported anyways
 #include "k/perf_event.h"
 
@@ -27,16 +29,22 @@ enum PerfGroupFlags {
 	PERF_GROUP_PER_CPU       = 1 << 5,
 };
 
+enum {
+	PG_SUCCESS = 0,
+	PG_FAILURE,
+	PG_CPU_OFFLINE,
+};
+
 class PerfGroup {
 public:
 	PerfGroup(PerfBuffer *const pb);
 	~PerfGroup();
 
-	bool add(Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags);
+	bool add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags);
 	// Safe to call concurrently
-	bool prepareCPU(const int cpu);
+	int prepareCPU(const int cpu, Monitor *const monitor);
 	// Not safe to call concurrently. Returns the number of events enabled
-	int onlineCPU(const int cpu, const bool start, Buffer *const buffer, Monitor *const monitor);
+	int onlineCPU(const uint64_t currTime, const int cpu, const bool start, Buffer *const buffer);
 	bool offlineCPU(int cpu);
 	bool start();
 	void stop();
diff --git a/tools/gator/daemon/PerfSource.cpp b/tools/gator/daemon/PerfSource.cpp
index ecfaa66832bd..193b7789a290 100644
--- a/tools/gator/daemon/PerfSource.cpp
+++ b/tools/gator/daemon/PerfSource.cpp
@@ -8,8 +8,14 @@
 
 #include "PerfSource.h"
 
+#include <dirent.h>
 #include <errno.h>
+#include <signal.h>
 #include <string.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
 #include <unistd.h>
 
 #include "Child.h"
@@ -19,11 +25,13 @@
 #include "Proc.h"
 #include "SessionData.h"
 
-#define MS_PER_US 1000000
+#ifndef SCHED_RESET_ON_FORK
+#define SCHED_RESET_ON_FORK 0x40000000
+#endif
 
 extern Child *child;
 
-static bool sendTracepointFormat(Buffer *const buffer, const char *const name, DynBuf *const printb, DynBuf *const b) {
+static bool sendTracepointFormat(const uint64_t currTime, Buffer *const buffer, const char *const name, DynBuf *const printb, DynBuf *const b) {
 	if (!printb->printf(EVENTS_PATH "/%s/format", name)) {
 		logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
 		return false;
@@ -32,47 +40,112 @@ static bool sendTracepointFormat(Buffer *const buffer, const char *const name, D
 		logg->logMessage("%s(%s:%i): DynBuf::read failed", __FUNCTION__, __FILE__, __LINE__);
 		return false;
 	}
-	buffer->format(b->getLength(), b->getBuf());
+	buffer->format(currTime, b->getLength(), b->getBuf());
 
 	return true;
 }
 
-PerfSource::PerfSource(sem_t *senderSem, sem_t *startProfile) : mSummary(0, FRAME_SUMMARY, 1024, senderSem), mBuffer(0, FRAME_PERF_ATTRS, 4*1024*1024, senderSem), mCountersBuf(), mCountersGroup(&mCountersBuf), mMonitor(), mUEvent(), mSenderSem(senderSem), mStartProfile(startProfile), mInterruptFd(-1), mIsDone(false) {
+static void *syncFunc(void *arg)
+{
+	struct timespec ts;
+	int64_t nextTime = gSessionData->mMonotonicStarted;
+	int err;
+	(void)arg;
+
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-sync", 0, 0, 0);
+
+	// Mask all signals so that this thread will not be woken up
+	{
+		sigset_t set;
+		if (sigfillset(&set) != 0) {
+			logg->logError(__FILE__, __LINE__, "sigfillset failed");
+			handleException();
+		}
+		if ((err = pthread_sigmask(SIG_SETMASK, &set, NULL)) != 0) {
+			logg->logError(__FILE__, __LINE__, "pthread_sigmask failed");
+			handleException();
+		}
+	}
+
+	for (;;) {
+		if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) {
+			logg->logError(__FILE__, __LINE__, "clock_gettime failed");
+			handleException();
+		}
+		const int64_t currTime = ts.tv_sec * NS_PER_S + ts.tv_nsec;
+
+		// Wake up once a second
+		nextTime += NS_PER_S;
+
+		// Always sleep more than 1 ms, hopefully things will line up better next time
+		const int64_t sleepTime = max(nextTime - currTime, (int64_t)(NS_PER_MS + 1));
+		ts.tv_sec = sleepTime/NS_PER_S;
+		ts.tv_nsec = sleepTime % NS_PER_S;
+
+		err = nanosleep(&ts, NULL);
+		if (err != 0) {
+			fprintf(stderr, "clock_nanosleep failed: %s\n", strerror(err));
+			return NULL;
+		}
+	}
+
+	return NULL;
+}
+
+static long getMaxCoreNum() {
+	DIR *dir = opendir("/sys/devices/system/cpu");
+	if (dir == NULL) {
+		logg->logError(__FILE__, __LINE__, "Unable to determine the number of cores on the target, opendir failed");
+		handleException();
+	}
+
+	long maxCoreNum = -1;
+	struct dirent *dirent;
+	while ((dirent = readdir(dir)) != NULL) {
+		if (strncmp(dirent->d_name, "cpu", 3) == 0) {
+			char *endptr;
+			errno = 0;
+			long coreNum = strtol(dirent->d_name + 3, &endptr, 10);
+			if ((errno == 0) && (*endptr == '\0') && (coreNum >= maxCoreNum)) {
+				maxCoreNum = coreNum + 1;
+			}
+		}
+	}
+	closedir(dir);
+
+	if (maxCoreNum < 1) {
+		logg->logError(__FILE__, __LINE__, "Unable to determine the number of cores on the target, no cpu# directories found");
+		handleException();
+	}
+
+	if (maxCoreNum >= NR_CPUS) {
+		logg->logError(__FILE__, __LINE__, "Too many cores on the target, please increase NR_CPUS in Config.h");
+		handleException();
+	}
+
+	return maxCoreNum;
+}
+
+PerfSource::PerfSource(sem_t *senderSem, sem_t *startProfile) : mSummary(0, FRAME_SUMMARY, 1024, senderSem), mBuffer(0, FRAME_PERF_ATTRS, 1024*1024, senderSem), mCountersBuf(), mCountersGroup(&mCountersBuf), mIdleGroup(&mCountersBuf), mMonitor(), mUEvent(), mSenderSem(senderSem), mStartProfile(startProfile), mInterruptFd(-1), mIsDone(false) {
 	long l = sysconf(_SC_PAGE_SIZE);
 	if (l < 0) {
 		logg->logError(__FILE__, __LINE__, "Unable to obtain the page size");
 		handleException();
 	}
 	gSessionData->mPageSize = static_cast<int>(l);
-
-	l = sysconf(_SC_NPROCESSORS_CONF);
-	if (l < 0) {
-		logg->logError(__FILE__, __LINE__, "Unable to obtain the number of cores");
-		handleException();
-	}
-	gSessionData->mCores = static_cast<int>(l);
+	gSessionData->mCores = static_cast<int>(getMaxCoreNum());
 }
 
 PerfSource::~PerfSource() {
 }
 
-struct PrepareParallelArgs {
-	PerfGroup *pg;
-	int cpu;
-};
-
-void *prepareParallel(void *arg) {
-	const PrepareParallelArgs *const args = (PrepareParallelArgs *)arg;
-	args->pg->prepareCPU(args->cpu);
-	return NULL;
-}
-
 bool PerfSource::prepare() {
 	DynBuf printb;
 	DynBuf b1;
-	DynBuf b2;
-	DynBuf b3;
 	long long schedSwitchId;
+	long long cpuIdleId;
+
+	const uint64_t currTime = getTime();
 
 	// Reread cpuinfo since cores may have changed since startup
 	gSessionData->readCpuInfo();
@@ -83,76 +156,146 @@ bool PerfSource::prepare() {
 			|| !mMonitor.add(mUEvent.getFd())
 
 			|| (schedSwitchId = PerfDriver::getTracepointId(SCHED_SWITCH, &printb)) < 0
-			|| !sendTracepointFormat(&mBuffer, SCHED_SWITCH, &printb, &b1)
+			|| !sendTracepointFormat(currTime, &mBuffer, SCHED_SWITCH, &printb, &b1)
+
+			|| (cpuIdleId = PerfDriver::getTracepointId(CPU_IDLE, &printb)) < 0
+			|| !sendTracepointFormat(currTime, &mBuffer, CPU_IDLE, &printb, &b1)
 
 			// Only want RAW but not IP on sched_switch and don't want TID on SAMPLE_ID
-			|| !mCountersGroup.add(&mBuffer, 100/**/, PERF_TYPE_TRACEPOINT, schedSwitchId, 1, PERF_SAMPLE_RAW, PERF_GROUP_MMAP | PERF_GROUP_COMM | PERF_GROUP_TASK | PERF_GROUP_SAMPLE_ID_ALL | PERF_GROUP_PER_CPU)
+			|| !mCountersGroup.add(currTime, &mBuffer, 100/**/, PERF_TYPE_TRACEPOINT, schedSwitchId, 1, PERF_SAMPLE_RAW, PERF_GROUP_MMAP | PERF_GROUP_COMM | PERF_GROUP_TASK | PERF_GROUP_SAMPLE_ID_ALL | PERF_GROUP_PER_CPU)
+			|| !mIdleGroup.add(currTime, &mBuffer, 101/**/, PERF_TYPE_TRACEPOINT, cpuIdleId, 1, PERF_SAMPLE_RAW, PERF_GROUP_PER_CPU)
 
 			// Only want TID and IP but not RAW on timer
-			|| (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && !mCountersGroup.add(&mBuffer, 99/**/, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP, PERF_GROUP_PER_CPU))
+			|| (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && !mCountersGroup.add(currTime, &mBuffer, 102/**/, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP, PERF_GROUP_PER_CPU))
 
-			|| !gSessionData->perf.enable(&mCountersGroup, &mBuffer)
+			|| !gSessionData->perf.enable(currTime, &mCountersGroup, &mBuffer)
 			|| 0) {
 		logg->logMessage("%s(%s:%i): perf setup failed, are you running Linux 3.4 or later?", __FUNCTION__, __FILE__, __LINE__);
 		return false;
 	}
 
-	if (!gSessionData->perf.summary(&mSummary)) {
-		logg->logMessage("%s(%s:%i): PerfDriver::summary failed", __FUNCTION__, __FILE__, __LINE__);
-		return false;
-	}
-
-	{
-		// Run prepareCPU in parallel as perf_event_open can take more than 1 sec in some cases
-		pthread_t threads[NR_CPUS];
-		PrepareParallelArgs args[NR_CPUS];
-		for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
-			args[cpu].pg = &mCountersGroup;
-			args[cpu].cpu = cpu;
-			if (pthread_create(&threads[cpu], NULL, prepareParallel, &args[cpu]) != 0) {
-				logg->logMessage("%s(%s:%i): pthread_create failed", __FUNCTION__, __FILE__, __LINE__);
-				return false;
-			}
+	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
+		const int result = mCountersGroup.prepareCPU(cpu, &mMonitor);
+		if ((result != PG_SUCCESS) && (result != PG_CPU_OFFLINE)) {
+			logg->logError(__FILE__, __LINE__, "PerfGroup::prepareCPU on mCountersGroup failed");
+			handleException();
 		}
-		for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
-			if (pthread_join(threads[cpu], NULL) != 0) {
-				logg->logMessage("%s(%s:%i): pthread_join failed", __FUNCTION__, __FILE__, __LINE__);
-				return false;
-			}
+	}
+	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
+		const int result = mIdleGroup.prepareCPU(cpu, &mMonitor);
+		if ((result != PG_SUCCESS) && (result != PG_CPU_OFFLINE)) {
+			logg->logError(__FILE__, __LINE__, "PerfGroup::prepareCPU on mIdleGroup failed");
+			handleException();
 		}
 	}
 
 	int numEvents = 0;
 	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
-		numEvents += mCountersGroup.onlineCPU(cpu, false, &mBuffer, &mMonitor);
+		numEvents += mCountersGroup.onlineCPU(currTime, cpu, false, &mBuffer);
+	}
+	for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
+		numEvents += mIdleGroup.onlineCPU(currTime, cpu, false, &mBuffer);
 	}
 	if (numEvents <= 0) {
 		logg->logMessage("%s(%s:%i): PerfGroup::onlineCPU failed on all cores", __FUNCTION__, __FILE__, __LINE__);
 		return false;
 	}
 
-	// Start events before reading proc to avoid race conditions
-	if (!mCountersGroup.start()) {
-		logg->logMessage("%s(%s:%i): PerfGroup::start failed", __FUNCTION__, __FILE__, __LINE__);
-		return false;
+	// Send the summary right before the start so that the monotonic delta is close to the start time
+	if (!gSessionData->perf.summary(&mSummary)) {
+	  logg->logError(__FILE__, __LINE__, "PerfDriver::summary failed", __FUNCTION__, __FILE__, __LINE__);
+	  handleException();
 	}
 
-	if (!readProc(&mBuffer, true, &printb, &b1, &b2, &b3)) {
-		logg->logMessage("%s(%s:%i): readProc failed", __FUNCTION__, __FILE__, __LINE__);
-		return false;
+	// Start the timer thread to used to sync perf and monotonic raw times
+	pthread_t syncThread;
+	if (pthread_create(&syncThread, NULL, syncFunc, NULL)) {
+	  logg->logError(__FILE__, __LINE__, "pthread_create failed", __FUNCTION__, __FILE__, __LINE__);
+	  handleException();
+	}
+	struct sched_param param;
+	param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+	if (pthread_setschedparam(syncThread, SCHED_FIFO | SCHED_RESET_ON_FORK, &param) != 0) {
+	  logg->logError(__FILE__, __LINE__, "pthread_setschedparam failed");
+	  handleException();
 	}
 
-	mBuffer.commit(1);
+	mBuffer.commit(currTime);
 
 	return true;
 }
 
+struct ProcThreadArgs {
+	Buffer *mBuffer;
+	uint64_t mCurrTime;
+	bool mIsDone;
+};
+
+void *procFunc(void *arg) {
+	DynBuf printb;
+	DynBuf b;
+	const ProcThreadArgs *const args = (ProcThreadArgs *)arg;
+
+	prctl(PR_SET_NAME, (unsigned long)&"gatord-proc", 0, 0, 0);
+
+	// Gator runs at a high priority, reset the priority to the default
+	if (setpriority(PRIO_PROCESS, syscall(__NR_gettid), 0) == -1) {
+		logg->logError(__FILE__, __LINE__, "setpriority failed");
+		handleException();
+	}
+
+	if (!readProcMaps(args->mCurrTime, args->mBuffer, &printb, &b)) {
+		logg->logError(__FILE__, __LINE__, "readProcMaps failed");
+		handleException();
+	}
+	args->mBuffer->commit(args->mCurrTime);
+
+	if (!readKallsyms(args->mCurrTime, args->mBuffer, &args->mIsDone)) {
+		logg->logError(__FILE__, __LINE__, "readKallsyms failed");
+		handleException();
+	}
+	args->mBuffer->commit(args->mCurrTime);
+
+	return NULL;
+}
+
 static const char CPU_DEVPATH[] = "/devices/system/cpu/cpu";
 
 void PerfSource::run() {
 	int pipefd[2];
+	pthread_t procThread;
+	ProcThreadArgs procThreadArgs;
 
-	if (pipe(pipefd) != 0) {
+	{
+		DynBuf printb;
+		DynBuf b1;
+		DynBuf b2;
+
+		const uint64_t currTime = getTime();
+
+		// Start events before reading proc to avoid race conditions
+		if (!mCountersGroup.start() || !mIdleGroup.start()) {
+			logg->logError(__FILE__, __LINE__, "PerfGroup::start failed", __FUNCTION__, __FILE__, __LINE__);
+			handleException();
+		}
+
+		if (!readProcComms(currTime, &mBuffer, &printb, &b1, &b2)) {
+			logg->logError(__FILE__, __LINE__, "readProcComms failed");
+			handleException();
+		}
+		mBuffer.commit(currTime);
+
+		// Postpone reading kallsyms as on android adb gets too backed up and data is lost
+		procThreadArgs.mBuffer = &mBuffer;
+		procThreadArgs.mCurrTime = currTime;
+		procThreadArgs.mIsDone = false;
+		if (pthread_create(&procThread, NULL, procFunc, &procThreadArgs)) {
+			logg->logError(__FILE__, __LINE__, "pthread_create failed", __FUNCTION__, __FILE__, __LINE__);
+			handleException();
+		}
+	}
+
+	if (pipe_cloexec(pipefd) != 0) {
 		logg->logError(__FILE__, __LINE__, "pipe failed");
 		handleException();
 	}
@@ -165,7 +308,7 @@ void PerfSource::run() {
 
 	int timeout = -1;
 	if (gSessionData->mLiveRate > 0) {
-		timeout = gSessionData->mLiveRate/MS_PER_US;
+		timeout = gSessionData->mLiveRate/NS_PER_MS;
 	}
 
 	sem_post(mStartProfile);
@@ -178,10 +321,11 @@ void PerfSource::run() {
 			logg->logError(__FILE__, __LINE__, "Monitor::wait failed");
 			handleException();
 		}
+		const uint64_t currTime = getTime();
 
 		for (int i = 0; i < ready; ++i) {
 			if (events[i].data.fd == mUEvent.getFd()) {
-				if (!handleUEvent()) {
+				if (!handleUEvent(currTime)) {
 					logg->logError(__FILE__, __LINE__, "PerfSource::handleUEvent failed");
 					handleException();
 				}
@@ -200,6 +344,9 @@ void PerfSource::run() {
 		}
 	}
 
+	procThreadArgs.mIsDone = true;
+	pthread_join(procThread, NULL);
+	mIdleGroup.stop();
 	mCountersGroup.stop();
 	mBuffer.setDone();
 	mIsDone = true;
@@ -212,7 +359,7 @@ void PerfSource::run() {
 	close(pipefd[1]);
 }
 
-bool PerfSource::handleUEvent() {
+bool PerfSource::handleUEvent(const uint64_t currTime) {
 	UEventResult result;
 	if (!mUEvent.read(&result)) {
 		logg->logMessage("%s(%s:%i): UEvent::Read failed", __FUNCTION__, __FILE__, __LINE__);
@@ -231,14 +378,41 @@ bool PerfSource::handleUEvent() {
 			logg->logMessage("%s(%s:%i): strtol failed", __FUNCTION__, __FILE__, __LINE__);
 			return false;
 		}
+
+		if (cpu >= gSessionData->mCores) {
+			logg->logError(__FILE__, __LINE__, "Only %i cores are expected but core %i reports %s", gSessionData->mCores, cpu, result.mAction);
+			handleException();
+		}
+
 		if (strcmp(result.mAction, "online") == 0) {
+			mBuffer.onlineCPU(currTime, currTime - gSessionData->mMonotonicStarted, cpu);
 			// Only call onlineCPU if prepareCPU succeeded
-			const bool result = mCountersGroup.prepareCPU(cpu) &&
-				mCountersGroup.onlineCPU(cpu, true, &mBuffer, &mMonitor);
-			mBuffer.commit(1);
+			bool result = false;
+			int err = mCountersGroup.prepareCPU(cpu, &mMonitor);
+			if (err == PG_CPU_OFFLINE) {
+				result = true;
+			} else if (err == PG_SUCCESS) {
+				if (mCountersGroup.onlineCPU(currTime, cpu, true, &mBuffer)) {
+					err = mIdleGroup.prepareCPU(cpu, &mMonitor);
+					if (err == PG_CPU_OFFLINE) {
+						result = true;
+					} else if (err == PG_SUCCESS) {
+						if (mIdleGroup.onlineCPU(currTime, cpu, true, &mBuffer)) {
+							result = true;
+						}
+					}
+				}
+			}
+			mBuffer.commit(currTime);
+
+			gSessionData->readCpuInfo();
+			gSessionData->perf.coreName(currTime, &mSummary, cpu);
+			mSummary.commit(currTime);
 			return result;
 		} else if (strcmp(result.mAction, "offline") == 0) {
-			return mCountersGroup.offlineCPU(cpu);
+			const bool result = mCountersGroup.offlineCPU(cpu) && mIdleGroup.offlineCPU(cpu);
+			mBuffer.offlineCPU(currTime, currTime - gSessionData->mMonotonicStarted, cpu);
+			return result;
 		}
 	}
 
diff --git a/tools/gator/daemon/PerfSource.h b/tools/gator/daemon/PerfSource.h
index 3f471c8de414..ce1eafe8e953 100644
--- a/tools/gator/daemon/PerfSource.h
+++ b/tools/gator/daemon/PerfSource.h
@@ -33,12 +33,13 @@ class PerfSource : public Source {
 	void write(Sender *sender);
 
 private:
-	bool handleUEvent();
+	bool handleUEvent(const uint64_t currTime);
 
 	Buffer mSummary;
 	Buffer mBuffer;
 	PerfBuffer mCountersBuf;
 	PerfGroup mCountersGroup;
+	PerfGroup mIdleGroup;
 	Monitor mMonitor;
 	UEvent mUEvent;
 	sem_t *const mSenderSem;
diff --git a/tools/gator/daemon/Proc.cpp b/tools/gator/daemon/Proc.cpp
index 9f01770d6609..e6b26b1199fa 100644
--- a/tools/gator/daemon/Proc.cpp
+++ b/tools/gator/daemon/Proc.cpp
@@ -10,13 +10,16 @@
 
 #include <dirent.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 
 #include "Buffer.h"
 #include "DynBuf.h"
 #include "Logging.h"
+#include "SessionData.h"
 
 struct ProcStat {
 	// From linux-dev/include/linux/sched.h
@@ -57,6 +60,8 @@ static bool readProcStat(ProcStat *const ps, const char *const pathname, DynBuf
 	return true;
 }
 
+static const char APP_PROCESS[] = "app_process";
+
 static const char *readProcExe(DynBuf *const printb, const int pid, const int tid, DynBuf *const b) {
 	if (tid == -1 ? !printb->printf("/proc/%i/exe", pid)
 			: !printb->printf("/proc/%i/task/%i/exe", pid, tid)) {
@@ -82,7 +87,8 @@ static const char *readProcExe(DynBuf *const printb, const int pid, const int ti
 	}
 
 	// Android apps are run by app_process but the cmdline is changed to reference the actual app name
-	if (strcmp(image, "app_process") != 0) {
+	// On 64-bit android app_process can be app_process32 or app_process64
+	if (strncmp(image, APP_PROCESS, sizeof(APP_PROCESS) - 1) != 0) {
 		return image;
 	}
 
@@ -100,7 +106,7 @@ static const char *readProcExe(DynBuf *const printb, const int pid, const int ti
 	return b->getBuf();
 }
 
-static bool readProcTask(Buffer *const buffer, const int pid, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2) {
+static bool readProcTask(const uint64_t currTime, Buffer *const buffer, const int pid, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2) {
 	bool result = false;
 
 	if (!b1->printf("/proc/%i/task", pid)) {
@@ -110,7 +116,8 @@ static bool readProcTask(Buffer *const buffer, const int pid, DynBuf *const prin
 	DIR *task = opendir(b1->getBuf());
 	if (task == NULL) {
 		logg->logMessage("%s(%s:%i): opendir failed", __FUNCTION__, __FILE__, __LINE__);
-		return result;
+		// This is not a fatal error - the thread just doesn't exist any more
+		return true;
 	}
 
 	struct dirent *dirent;
@@ -138,7 +145,7 @@ static bool readProcTask(Buffer *const buffer, const int pid, DynBuf *const prin
 			goto fail;
 		}
 
-		buffer->comm(pid, tid, image, ps.comm);
+		buffer->comm(currTime, pid, tid, image, ps.comm);
 	}
 
 	result = true;
@@ -149,7 +156,7 @@ static bool readProcTask(Buffer *const buffer, const int pid, DynBuf *const prin
 	return result;
 }
 
-bool readProc(Buffer *const buffer, bool sendMaps, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2, DynBuf *const b3) {
+bool readProcComms(const uint64_t currTime, Buffer *const buffer, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2) {
 	bool result = false;
 
 	DIR *proc = opendir("/proc");
@@ -177,19 +184,6 @@ bool readProc(Buffer *const buffer, bool sendMaps, DynBuf *const printb, DynBuf
 			goto fail;
 		}
 
-		if (sendMaps) {
-			if (!printb->printf("/proc/%i/maps", pid)) {
-				logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
-				goto fail;
-			}
-			if (!b2->read(printb->getBuf())) {
-				logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the process exited", __FUNCTION__, __FILE__, __LINE__);
-				// This is not a fatal error - the process just doesn't exist any more
-				continue;
-			}
-
-			buffer->maps(pid, pid, b2->getBuf());
-		}
 		if (ps.numThreads <= 1) {
 			const char *const image = readProcExe(printb, pid, -1, b1);
 			if (image == NULL) {
@@ -197,9 +191,9 @@ bool readProc(Buffer *const buffer, bool sendMaps, DynBuf *const printb, DynBuf
 				goto fail;
 			}
 
-			buffer->comm(pid, pid, image, ps.comm);
+			buffer->comm(currTime, pid, pid, image, ps.comm);
 		} else {
-			if (!readProcTask(buffer, pid, printb, b1, b3)) {
+			if (!readProcTask(currTime, buffer, pid, printb, b1, b2)) {
 				logg->logMessage("%s(%s:%i): readProcTask failed", __FUNCTION__, __FILE__, __LINE__);
 				goto fail;
 			}
@@ -213,3 +207,106 @@ bool readProc(Buffer *const buffer, bool sendMaps, DynBuf *const printb, DynBuf
 
 	return result;
 }
+
+bool readProcMaps(const uint64_t currTime, Buffer *const buffer, DynBuf *const printb, DynBuf *const b) {
+	bool result = false;
+
+	DIR *proc = opendir("/proc");
+	if (proc == NULL) {
+		logg->logMessage("%s(%s:%i): opendir failed", __FUNCTION__, __FILE__, __LINE__);
+		return result;
+	}
+
+	struct dirent *dirent;
+	while ((dirent = readdir(proc)) != NULL) {
+		char *endptr;
+		const int pid = strtol(dirent->d_name, &endptr, 10);
+		if (*endptr != '\0') {
+			// Ignore proc items that are not integers like ., cpuinfo, etc...
+			continue;
+		}
+
+		if (!printb->printf("/proc/%i/maps", pid)) {
+			logg->logMessage("%s(%s:%i): DynBuf::printf failed", __FUNCTION__, __FILE__, __LINE__);
+			goto fail;
+		}
+		if (!b->read(printb->getBuf())) {
+			logg->logMessage("%s(%s:%i): DynBuf::read failed, likely because the process exited", __FUNCTION__, __FILE__, __LINE__);
+			// This is not a fatal error - the process just doesn't exist any more
+			continue;
+		}
+
+		buffer->maps(currTime, pid, pid, b->getBuf());
+	}
+
+	result = true;
+
+ fail:
+	closedir(proc);
+
+	return result;
+}
+
+bool readKallsyms(const uint64_t currTime, Buffer *const buffer, const bool *const isDone) {
+	int fd = ::open("/proc/kallsyms", O_RDONLY | O_CLOEXEC);
+
+	if (fd < 0) {
+		logg->logMessage("%s(%s:%i): open failed", __FUNCTION__, __FILE__, __LINE__);
+		return true;
+	};
+
+	char buf[1<<12];
+	ssize_t pos = 0;
+	while (gSessionData->mSessionIsActive && !ACCESS_ONCE(*isDone)) {
+		// Assert there is still space in the buffer
+		if (sizeof(buf) - pos - 1 == 0) {
+			logg->logError(__FILE__, __LINE__, "no space left in buffer");
+			handleException();
+		}
+
+		{
+			// -1 to reserve space for \0
+			const ssize_t bytes = ::read(fd, buf + pos, sizeof(buf) - pos - 1);
+			if (bytes < 0) {
+				logg->logError(__FILE__, __LINE__, "read failed", __FUNCTION__, __FILE__, __LINE__);
+				handleException();
+			}
+			if (bytes == 0) {
+				// Assert the buffer is empty
+				if (pos != 0) {
+					logg->logError(__FILE__, __LINE__, "buffer not empty on eof");
+					handleException();
+				}
+				break;
+			}
+			pos += bytes;
+		}
+
+		ssize_t newline;
+		// Find the last '\n'
+		for (newline = pos - 1; newline >= 0; --newline) {
+			if (buf[newline] == '\n') {
+				const char was = buf[newline + 1];
+				buf[newline + 1] = '\0';
+				buffer->kallsyms(currTime, buf);
+				// Sleep 3 ms to avoid sending out too much data too quickly
+				usleep(3000);
+				buf[0] = was;
+				// Assert the memory regions do not overlap
+				if (pos - newline >= newline + 1) {
+					logg->logError(__FILE__, __LINE__, "memcpy src and dst overlap");
+					handleException();
+				}
+				if (pos - newline - 2 > 0) {
+					memcpy(buf + 1, buf + newline + 2, pos - newline - 2);
+				}
+				pos -= newline + 1;
+				break;
+			}
+		}
+	}
+
+	close(fd);
+
+	return true;
+}
diff --git a/tools/gator/daemon/Proc.h b/tools/gator/daemon/Proc.h
index 31c2eecb7aeb..2a1a7cbc1e99 100644
--- a/tools/gator/daemon/Proc.h
+++ b/tools/gator/daemon/Proc.h
@@ -9,9 +9,13 @@
 #ifndef PROC_H
 #define PROC_H
 
+#include <stdint.h>
+
 class Buffer;
 class DynBuf;
 
-bool readProc(Buffer *const buffer, bool sendMaps, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2, DynBuf *const b3);
+bool readProcComms(const uint64_t currTime, Buffer *const buffer, DynBuf *const printb, DynBuf *const b1, DynBuf *const b2);
+bool readProcMaps(const uint64_t currTime, Buffer *const buffer, DynBuf *const printb, DynBuf *const b);
+bool readKallsyms(const uint64_t currTime, Buffer *const buffer, const bool *const isDone);
 
 #endif // PROC_H
diff --git a/tools/gator/daemon/Sender.cpp b/tools/gator/daemon/Sender.cpp
index 3a981a6427be..8a54a6678974 100644
--- a/tools/gator/daemon/Sender.cpp
+++ b/tools/gator/daemon/Sender.cpp
@@ -65,18 +65,13 @@ void Sender::createDataFile(char* apcDir) {
 
 	mDataFileName = (char*)malloc(strlen(apcDir) + 12);
 	sprintf(mDataFileName, "%s/0000000000", apcDir);
-	mDataFile = fopen(mDataFileName, "wb");
+	mDataFile = fopen_cloexec(mDataFileName, "wb");
 	if (!mDataFile) {
 		logg->logError(__FILE__, __LINE__, "Failed to open binary file: %s", mDataFileName);
 		handleException();
 	}
 }
 
-template<typename T>
-inline T min(const T a, const T b) {
-	return (a < b ? a : b);
-}
-
 void Sender::writeData(const char* data, int length, int type) {
 	if (length < 0 || (data == NULL && length > 0)) {
 		return;
diff --git a/tools/gator/daemon/Sender.h b/tools/gator/daemon/Sender.h
index 33b6cc3c5d8d..5aa911713820 100644
--- a/tools/gator/daemon/Sender.h
+++ b/tools/gator/daemon/Sender.h
@@ -6,8 +6,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef	__SENDER_H__
-#define	__SENDER_H__
+#ifndef __SENDER_H__
+#define __SENDER_H__
 
 #include <stdio.h>
 #include <pthread.h>
diff --git a/tools/gator/daemon/SessionData.cpp b/tools/gator/daemon/SessionData.cpp
index 14d995fc39fa..0e65d7842647 100644
--- a/tools/gator/daemon/SessionData.cpp
+++ b/tools/gator/daemon/SessionData.cpp
@@ -8,15 +8,31 @@
 
 #include "SessionData.h"
 
+#include <fcntl.h>
 #include <string.h>
 #include <sys/mman.h>
+#include <unistd.h>
 
-#include "SessionXML.h"
+#include "CPUFreqDriver.h"
+#include "DiskIODriver.h"
+#include "FSDriver.h"
+#include "HwmonDriver.h"
 #include "Logging.h"
+#include "MemInfoDriver.h"
+#include "NetDriver.h"
+#include "SessionXML.h"
+
+#define CORE_NAME_UNKNOWN "unknown"
 
 SessionData* gSessionData = NULL;
 
 SessionData::SessionData() {
+	usDrivers[0] = new HwmonDriver();
+	usDrivers[1] = new FSDriver();
+	usDrivers[2] = new MemInfoDriver();
+	usDrivers[3] = new NetDriver();
+	usDrivers[4] = new CPUFreqDriver();
+	usDrivers[5] = new DiskIODriver();
 	initialize();
 }
 
@@ -29,6 +45,7 @@ void SessionData::initialize() {
 	mLocalCapture = false;
 	mOneShot = false;
 	mSentSummary = false;
+	mAllowCommands = false;
 	const size_t cpuIdSize = sizeof(int)*NR_CPUS;
 	// Share mCpuIds across all instances of gatord
 	mCpuIds = (int *)mmap(NULL, cpuIdSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
@@ -37,15 +54,22 @@ void SessionData::initialize() {
 		handleException();
 	}
 	memset(mCpuIds, -1, cpuIdSize);
+	strcpy(mCoreName, CORE_NAME_UNKNOWN);
+	readModel();
 	readCpuInfo();
+	mImages = NULL;
 	mConfigurationXMLPath = NULL;
 	mSessionXMLPath = NULL;
 	mEventsXMLPath = NULL;
 	mTargetPath = NULL;
 	mAPCDir = NULL;
+	mCaptureWorkingDir = NULL;
+	mCaptureCommand = NULL;
+	mCaptureUser = NULL;
 	mSampleRate = 0;
 	mLiveRate = 0;
 	mDuration = 0;
+	mMonotonicStarted = -1;
 	mBacktraceDepth = 0;
 	mTotalBufferSize = 0;
 	// sysconf(_SC_NPROCESSORS_CONF) is unreliable on 2.6 Android, get the value from the kernel module
@@ -71,7 +95,6 @@ void SessionData::parseSessionXML(char* xmlString) {
 		handleException();
 	}
 	mBacktraceDepth = session.parameters.call_stack_unwinding == true ? 128 : 0;
-	mDuration = session.parameters.duration;
 
 	// Determine buffer size (in MB) based on buffer mode
 	mOneShot = true;
@@ -89,21 +112,38 @@ void SessionData::parseSessionXML(char* xmlString) {
 		handleException();
 	}
 
-	mImages = session.parameters.images;
 	// Convert milli- to nanoseconds
 	mLiveRate = session.parameters.live_rate * (int64_t)1000000;
 	if (mLiveRate > 0 && mLocalCapture) {
 		logg->logMessage("Local capture is not compatable with live, disabling live");
 		mLiveRate = 0;
 	}
+
+	if (!mAllowCommands && (mCaptureCommand != NULL)) {
+		logg->logError(__FILE__, __LINE__, "Running a command during a capture is not currently allowed. Please restart gatord with the -a flag.");
+		handleException();
+	}
+}
+
+void SessionData::readModel() {
+	FILE *fh = fopen("/proc/device-tree/model", "rb");
+	if (fh == NULL) {
+		return;
+	}
+
+	char buf[256];
+	if (fgets(buf, sizeof(buf), fh) != NULL) {
+		strcpy(mCoreName, buf);
+	}
+
+	fclose(fh);
 }
 
 void SessionData::readCpuInfo() {
 	char temp[256]; // arbitrarily large amount
-	strcpy(mCoreName, "unknown");
 	mMaxCpuId = -1;
 
-	FILE* f = fopen("/proc/cpuinfo", "r");
+	FILE *f = fopen("/proc/cpuinfo", "r");
 	if (f == NULL) {
 		logg->logMessage("Error opening /proc/cpuinfo\n"
 			"The core name in the captured xml file will be 'unknown'.");
@@ -122,7 +162,8 @@ void SessionData::readCpuInfo() {
 		}
 
 		if (len > 0) {
-			temp[len - 1] = '\0';	// Replace the line feed with a null
+			// Replace the line feed with a null
+			temp[len - 1] = '\0';
 		}
 
 		const bool foundHardware = strstr(temp, "Hardware") != 0;
@@ -137,7 +178,7 @@ void SessionData::readCpuInfo() {
 			}
 			position += 2;
 
-			if (foundHardware) {
+			if (foundHardware && (strcmp(mCoreName, CORE_NAME_UNKNOWN) == 0)) {
 				strncpy(mCoreName, position, sizeof(mCoreName));
 				mCoreName[sizeof(mCoreName) - 1] = 0; // strncpy does not guarantee a null-terminated string
 				foundCoreName = true;
@@ -171,10 +212,6 @@ void SessionData::readCpuInfo() {
 
 uint64_t getTime() {
 	struct timespec ts;
-#ifndef CLOCK_MONOTONIC_RAW
-	// Android doesn't have this defined but it was added in Linux 2.6.28
-#define CLOCK_MONOTONIC_RAW 4
-#endif
 	if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) != 0) {
 		logg->logError(__FILE__, __LINE__, "Failed to get uptime");
 		handleException();
@@ -185,10 +222,40 @@ uint64_t getTime() {
 int getEventKey() {
 	// key 0 is reserved as a timestamp
 	// key 1 is reserved as the marker for thread specific counters
+	// key 2 is reserved as the marker for core
 	// Odd keys are assigned by the driver, even keys by the daemon
-	static int key = 2;
+	static int key = 4;
 
 	const int ret = key;
 	key += 2;
 	return ret;
 }
+
+int pipe_cloexec(int pipefd[2]) {
+	if (pipe(pipefd) != 0) {
+		return -1;
+	}
+
+	int fdf;
+	if (((fdf = fcntl(pipefd[0], F_GETFD)) == -1) || (fcntl(pipefd[0], F_SETFD, fdf | FD_CLOEXEC) != 0) ||
+			((fdf = fcntl(pipefd[1], F_GETFD)) == -1) || (fcntl(pipefd[1], F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+		close(pipefd[0]);
+		close(pipefd[1]);
+		return -1;
+	}
+	return 0;
+}
+
+FILE *fopen_cloexec(const char *path, const char *mode) {
+	FILE *fh = fopen(path, mode);
+	if (fh == NULL) {
+		return NULL;
+	}
+	int fd = fileno(fh);
+	int fdf = fcntl(fd, F_GETFD);
+	if ((fdf == -1) || (fcntl(fd, F_SETFD, fdf | FD_CLOEXEC) != 0)) {
+		fclose(fh);
+		return NULL;
+	}
+	return fh;
+}
diff --git a/tools/gator/daemon/SessionData.h b/tools/gator/daemon/SessionData.h
index 835082d86c4b..ed282af4a869 100644
--- a/tools/gator/daemon/SessionData.h
+++ b/tools/gator/daemon/SessionData.h
@@ -11,17 +11,21 @@
 
 #include <stdint.h>
 
+#include "AnnotateListener.h"
 #include "Config.h"
 #include "Counter.h"
-#include "FSDriver.h"
-#include "Hwmon.h"
+#include "FtraceDriver.h"
+#include "KMod.h"
 #include "MaliVideoDriver.h"
 #include "PerfDriver.h"
 
-#define PROTOCOL_VERSION	19
-#define PROTOCOL_DEV		1000	// Differentiates development versions (timestamp) from release versions
+#define PROTOCOL_VERSION 20
+// Differentiates development versions (timestamp) from release versions
+#define PROTOCOL_DEV 1000
 
-#define NS_PER_S ((uint64_t)1000000000)
+#define NS_PER_S 1000000000LL
+#define NS_PER_MS 1000000LL
+#define NS_PER_US 1000LL
 
 struct ImageLinkList {
 	char* path;
@@ -36,30 +40,40 @@ class SessionData {
 	~SessionData();
 	void initialize();
 	void parseSessionXML(char* xmlString);
+	void readModel();
 	void readCpuInfo();
 
-	Hwmon hwmon;
-	FSDriver fsDriver;
+	PolledDriver *usDrivers[6];
+	KMod kmod;
 	PerfDriver perf;
 	MaliVideoDriver maliVideo;
+	FtraceDriver ftraceDriver;
+	AnnotateListener annotateListener;
 
 	char mCoreName[MAX_STRING_LEN];
 	struct ImageLinkList *mImages;
-	char* mConfigurationXMLPath;
-	char* mSessionXMLPath;
-	char* mEventsXMLPath;
-	char* mTargetPath;
-	char* mAPCDir;
+	char *mConfigurationXMLPath;
+	char *mSessionXMLPath;
+	char *mEventsXMLPath;
+	char *mTargetPath;
+	char *mAPCDir;
+	char *mCaptureWorkingDir;
+	char *mCaptureCommand;
+	char *mCaptureUser;
 
 	bool mWaitingOnCommand;
 	bool mSessionIsActive;
 	bool mLocalCapture;
-	bool mOneShot;		// halt processing of the driver data until profiling is complete or the buffer is filled
+	// halt processing of the driver data until profiling is complete or the buffer is filled
+	bool mOneShot;
 	bool mIsEBS;
 	bool mSentSummary;
+	bool mAllowCommands;
 
+	int64_t mMonotonicStarted;
 	int mBacktraceDepth;
-	int mTotalBufferSize;	// number of MB to use for the entire collection buffer
+	// number of MB to use for the entire collection buffer
+	int mTotalBufferSize;
 	int mSampleRate;
 	int64_t mLiveRate;
 	int mDuration;
@@ -82,5 +96,7 @@ extern SessionData* gSessionData;
 
 uint64_t getTime();
 int getEventKey();
+int pipe_cloexec(int pipefd[2]);
+FILE *fopen_cloexec(const char *path, const char *mode);
 
 #endif // SESSION_DATA_H
diff --git a/tools/gator/daemon/SessionXML.cpp b/tools/gator/daemon/SessionXML.cpp
index 8cdc9409ca21..dea4c8f299ec 100644
--- a/tools/gator/daemon/SessionXML.cpp
+++ b/tools/gator/daemon/SessionXML.cpp
@@ -16,33 +16,30 @@
 #include "OlyUtility.h"
 #include "SessionData.h"
 
-static const char*	TAG_SESSION = "session";
-static const char*	TAG_IMAGE   = "image";
+static const char *TAG_SESSION = "session";
+static const char *TAG_IMAGE   = "image";
 
-static const char*	ATTR_VERSION            = "version";
-static const char*	ATTR_CALL_STACK_UNWINDING = "call_stack_unwinding";
-static const char*	ATTR_BUFFER_MODE        = "buffer_mode";
-static const char*	ATTR_SAMPLE_RATE        = "sample_rate";
-static const char*	ATTR_DURATION           = "duration";
-static const char*	ATTR_PATH               = "path";
-static const char*	ATTR_LIVE_RATE          = "live_rate";
+static const char *ATTR_VERSION              = "version";
+static const char *ATTR_CALL_STACK_UNWINDING = "call_stack_unwinding";
+static const char *ATTR_BUFFER_MODE          = "buffer_mode";
+static const char *ATTR_SAMPLE_RATE          = "sample_rate";
+static const char *ATTR_DURATION             = "duration";
+static const char *ATTR_PATH                 = "path";
+static const char *ATTR_LIVE_RATE            = "live_rate";
+static const char *ATTR_CAPTURE_WORKING_DIR  = "capture_working_dir";
+static const char *ATTR_CAPTURE_COMMAND      = "capture_command";
+static const char *ATTR_CAPTURE_USER         = "capture_user";
 
 SessionXML::SessionXML(const char *str) {
 	parameters.buffer_mode[0] = 0;
 	parameters.sample_rate[0] = 0;
-	parameters.duration = 0;
 	parameters.call_stack_unwinding = false;
 	parameters.live_rate = 0;
-	parameters.images = NULL;
-	mPath = 0;
-	mSessionXML = (const char *)str;
+	mSessionXML = str;
 	logg->logMessage(mSessionXML);
 }
 
 SessionXML::~SessionXML() {
-	if (mPath != 0) {
-		free((char *)mSessionXML);
-	}
 }
 
 void SessionXML::parse() {
@@ -79,10 +76,13 @@ void SessionXML::sessionTag(mxml_node_t *tree, mxml_node_t *node) {
 		strncpy(parameters.sample_rate, mxmlElementGetAttr(node, ATTR_SAMPLE_RATE), sizeof(parameters.sample_rate));
 		parameters.sample_rate[sizeof(parameters.sample_rate) - 1] = 0; // strncpy does not guarantee a null-terminated string
 	}
+	if (mxmlElementGetAttr(node, ATTR_CAPTURE_WORKING_DIR)) gSessionData->mCaptureWorkingDir = strdup(mxmlElementGetAttr(node, ATTR_CAPTURE_WORKING_DIR));
+	if (mxmlElementGetAttr(node, ATTR_CAPTURE_COMMAND)) gSessionData->mCaptureCommand = strdup(mxmlElementGetAttr(node, ATTR_CAPTURE_COMMAND));
+	if (mxmlElementGetAttr(node, ATTR_CAPTURE_USER)) gSessionData->mCaptureUser = strdup(mxmlElementGetAttr(node, ATTR_CAPTURE_USER));
 
 	// integers/bools
 	parameters.call_stack_unwinding = util->stringToBool(mxmlElementGetAttr(node, ATTR_CALL_STACK_UNWINDING), false);
-	if (mxmlElementGetAttr(node, ATTR_DURATION)) parameters.duration = strtol(mxmlElementGetAttr(node, ATTR_DURATION), NULL, 10);
+	if (mxmlElementGetAttr(node, ATTR_DURATION)) gSessionData->mDuration = strtol(mxmlElementGetAttr(node, ATTR_DURATION), NULL, 10);
 	if (mxmlElementGetAttr(node, ATTR_LIVE_RATE)) parameters.live_rate = strtol(mxmlElementGetAttr(node, ATTR_LIVE_RATE), NULL, 10);
 
 	// parse subtags
@@ -106,6 +106,6 @@ void SessionXML::sessionImage(mxml_node_t *node) {
 	image = (struct ImageLinkList *)malloc(sizeof(struct ImageLinkList));
 	image->path = (char*)malloc(length + 1);
 	image->path = strdup(mxmlElementGetAttr(node, ATTR_PATH));
-	image->next = parameters.images;
-	parameters.images = image;
+	image->next = gSessionData->mImages;
+	gSessionData->mImages = image;
 }
diff --git a/tools/gator/daemon/SessionXML.h b/tools/gator/daemon/SessionXML.h
index e146094a4d17..53965749c74b 100644
--- a/tools/gator/daemon/SessionXML.h
+++ b/tools/gator/daemon/SessionXML.h
@@ -14,12 +14,13 @@
 struct ImageLinkList;
 
 struct ConfigParameters {
-	char buffer_mode[64];	// buffer mode, "streaming", "low", "normal", "high" defines oneshot and buffer size
-	char sample_rate[64];	// capture mode, "high", "normal", or "low"
-	int duration;		// length of profile in seconds
-	bool call_stack_unwinding;	// whether stack unwinding is performed
+	// buffer mode, "streaming", "low", "normal", "high" defines oneshot and buffer size
+	char buffer_mode[64];
+	// capture mode, "high", "normal", or "low"
+	char sample_rate[64];
+	// whether stack unwinding is performed
+	bool call_stack_unwinding;
 	int live_rate;
-	struct ImageLinkList *images;	// linked list of image strings
 };
 
 class SessionXML {
@@ -30,7 +31,6 @@ class SessionXML {
 	ConfigParameters parameters;
 private:
 	const char *mSessionXML;
-	const char *mPath;
 	void sessionTag(mxml_node_t *tree, mxml_node_t *node);
 	void sessionImage(mxml_node_t *node);
 
diff --git a/tools/gator/daemon/Setup.cpp b/tools/gator/daemon/Setup.cpp
new file mode 100644
index 000000000000..d4ce0328c633
--- /dev/null
+++ b/tools/gator/daemon/Setup.cpp
@@ -0,0 +1,232 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "Setup.h"
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "Config.h"
+#include "DynBuf.h"
+#include "Logging.h"
+
+bool getLinuxVersion(int version[3]) {
+	// Check the kernel version
+	struct utsname utsname;
+	if (uname(&utsname) != 0) {
+		logg->logMessage("%s(%s:%i): uname failed", __FUNCTION__, __FILE__, __LINE__);
+		return false;
+	}
+
+	version[0] = 0;
+	version[1] = 0;
+	version[2] = 0;
+
+	int part = 0;
+	char *ch = utsname.release;
+	while (*ch >= '0' && *ch <= '9' && part < 3) {
+		version[part] = 10*version[part] + *ch - '0';
+
+		++ch;
+		if (*ch == '.') {
+			++part;
+			++ch;
+		}
+	}
+
+	return true;
+}
+
+static int pgrep_gator(DynBuf *const printb) {
+	DynBuf b;
+
+	DIR *proc = opendir("/proc");
+	if (proc == NULL) {
+		logg->logError(__FILE__, __LINE__, "gator: error: opendir failed");
+		handleException();
+	}
+
+	int self = getpid();
+
+	struct dirent *dirent;
+	while ((dirent = readdir(proc)) != NULL) {
+		char *endptr;
+		const int pid = strtol(dirent->d_name, &endptr, 10);
+		if (*endptr != '\0' || (pid == self)) {
+			// Ignore proc items that are not integers like ., cpuinfo, etc...
+			continue;
+		}
+
+		if (!printb->printf("/proc/%i/stat", pid)) {
+			logg->logError(__FILE__, __LINE__, "gator: error: DynBuf::printf failed");
+			handleException();
+		}
+
+		if (!b.read(printb->getBuf())) {
+			// This is not a fatal error - the thread just doesn't exist any more
+			continue;
+		}
+
+		char *comm = strchr(b.getBuf(), '(');
+		if (comm == NULL) {
+			logg->logError(__FILE__, __LINE__, "gator: error: parsing stat begin failed");
+			handleException();
+		}
+		++comm;
+		char *const str = strrchr(comm, ')');
+		if (str == NULL) {
+			logg->logError(__FILE__, __LINE__, "gator: error: parsing stat end failed");
+			handleException();
+		}
+		*str = '\0';
+
+		if (strncmp(comm, "gator", 5) == 0) {
+			// Assume there is only one gator process
+			return pid;
+		}
+	}
+
+	closedir(proc);
+
+	return -1;
+}
+
+int update(const char *const gatorPath) {
+	printf("gator: starting\n");
+
+	int version[3];
+	if (!getLinuxVersion(version)) {
+		logg->logError(__FILE__, __LINE__, "gator: error: getLinuxVersion failed");
+		handleException();
+	}
+
+	if (KERNEL_VERSION(version[0], version[1], version[2]) < KERNEL_VERSION(2, 6, 32)) {
+		logg->logError(__FILE__, __LINE__, "gator: error: Streamline can't automatically setup gator as this kernel version is not supported. Please upgrade the kernel on your device.");
+		handleException();
+	}
+
+	if (KERNEL_VERSION(version[0], version[1], version[2]) < KERNEL_VERSION(3, 4, 0)) {
+		logg->logError(__FILE__, __LINE__, "gator: error: Streamline can't automatically setup gator as gator.ko is required for this version of Linux. Please build gator.ko and gatord and install them on your device.");
+		handleException();
+	}
+
+	if (access("/sys/module/gator", F_OK) == 0) {
+		logg->logError(__FILE__, __LINE__, "gator: error: Streamline has detected that the gator kernel module is loaded on your device. Please build an updated version of gator.ko and gatord and install them on your device.");
+		handleException();
+	}
+
+	if (geteuid() != 0) {
+		printf("gator: trying sudo\n");
+		execlp("sudo", "sudo", gatorPath, "-u", NULL);
+		// Streamline will provide the password if needed
+
+		printf("gator: trying su\n");
+		char buf[1<<10];
+		snprintf(buf, sizeof(buf), "%s -u", gatorPath);
+		execlp("su", "su", "-", "-c", buf, NULL);
+		// Streamline will provide the password if needed
+
+		logg->logError(__FILE__, __LINE__, "gator: error: Streamline was unable to sudo to root on your device. Please double check passwords, ensure sudo or su work with this user or try a different username.");
+		handleException();
+	}
+	printf("gator: now root\n");
+
+	// setenforce 0 not needed for userspace gator
+
+	// Kill existing gator
+	DynBuf gatorStatPath;
+	int gator_main = pgrep_gator(&gatorStatPath);
+	if (gator_main > 0) {
+		if (kill(gator_main, SIGTERM) != 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: kill SIGTERM failed");
+			handleException();
+		}
+		for (int i = 0; ; ++i) {
+			if (access(gatorStatPath.getBuf(), F_OK) != 0) {
+				break;
+			}
+			if (i == 5) {
+				if (kill(gator_main, SIGKILL) != 0) {
+					logg->logError(__FILE__, __LINE__, "gator: error: kill SIGKILL failed");
+					handleException();
+				}
+			} else if (i >= 10) {
+				logg->logError(__FILE__, __LINE__, "gator: error: unable to kill running gator");
+				handleException();
+			}
+			sleep(1);
+		}
+	}
+	printf("gator: no gatord running\n");
+
+	rename("gatord", "gatord.old");
+	rename("gator.ko", "gator.ko.old");
+
+	// Rename gatord.YYYYMMDDHHMMSSMMMM to gatord
+	char *newGatorPath = strdup(gatorPath);
+	char *dot = strrchr(newGatorPath, '.');
+	if (dot != NULL) {
+		*dot = '\0';
+		if (rename(gatorPath, newGatorPath) != 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: rename failed");
+			handleException();
+		}
+	}
+
+	// Fork and start gatord (redirect stdout and stderr)
+	int child = fork();
+	if (child < 0) {
+		logg->logError(__FILE__, __LINE__, "gator: error: fork failed");
+		handleException();
+	} else if (child == 0) {
+		int inFd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+		if (inFd < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: open of /dev/null failed");
+			handleException();
+		}
+		int outFd = open("gatord.out", O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0600);
+		if (outFd < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: open of gatord.out failed");
+			handleException();
+		}
+		int errFd = open("gatord.err", O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0600);
+		if (errFd < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: open of gatord.err failed");
+			handleException();
+		}
+		if (dup2(inFd, STDIN_FILENO) < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: dup2 for stdin failed");
+			handleException();
+		}
+		if (dup2(outFd, STDOUT_FILENO) < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: dup2 for stdout failed");
+			handleException();
+		}
+		if (dup2(errFd, STDERR_FILENO) < 0) {
+			logg->logError(__FILE__, __LINE__, "gator: error: dup2 for stderr failed");
+			handleException();
+		}
+		execlp(newGatorPath, newGatorPath, "-a", NULL);
+		logg->logError(__FILE__, __LINE__, "gator: error: execlp failed");
+		handleException();
+	}
+
+	printf("gator: done\n");
+
+	return 0;
+}
diff --git a/tools/gator/daemon/Setup.h b/tools/gator/daemon/Setup.h
new file mode 100644
index 000000000000..280d61139784
--- /dev/null
+++ b/tools/gator/daemon/Setup.h
@@ -0,0 +1,18 @@
+/**
+ * Copyright (C) ARM Limited 2014. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef SETUP_H
+#define SETUP_H
+
+// From include/generated/uapi/linux/version.h
+#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+
+bool getLinuxVersion(int version[3]);
+int update(const char *const gatorPath);
+
+#endif // SETUP_H
diff --git a/tools/gator/daemon/StreamlineSetup.h b/tools/gator/daemon/StreamlineSetup.h
index b380f46b98f0..623e14f2b64a 100644
--- a/tools/gator/daemon/StreamlineSetup.h
+++ b/tools/gator/daemon/StreamlineSetup.h
@@ -6,8 +6,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef	__STREAMLINE_SETUP_H__
-#define	__STREAMLINE_SETUP_H__
+#ifndef __STREAMLINE_SETUP_H__
+#define __STREAMLINE_SETUP_H__
 
 #include <stdint.h>
 #include <string.h>
diff --git a/tools/gator/daemon/UEvent.cpp b/tools/gator/daemon/UEvent.cpp
index 54d45751e3c9..f94a995393e8 100644
--- a/tools/gator/daemon/UEvent.cpp
+++ b/tools/gator/daemon/UEvent.cpp
@@ -15,6 +15,7 @@
 #include <linux/netlink.h>
 
 #include "Logging.h"
+#include "OlySocket.h"
 
 static const char EMPTY[] = "";
 static const char ACTION[] = "ACTION=";
@@ -31,7 +32,7 @@ UEvent::~UEvent() {
 }
 
 bool UEvent::init() {
-	mFd = socket(PF_NETLINK, SOCK_RAW, NETLINK_KOBJECT_UEVENT);
+	mFd = socket_cloexec(PF_NETLINK, SOCK_RAW, NETLINK_KOBJECT_UEVENT);
 	if (mFd < 0) {
 		logg->logMessage("%s(%s:%i): socket failed", __FUNCTION__, __FILE__, __LINE__);
 		return false;
diff --git a/tools/gator/daemon/UserSpaceSource.cpp b/tools/gator/daemon/UserSpaceSource.cpp
index 8c328e0e0fb5..4a9b22f4b555 100644
--- a/tools/gator/daemon/UserSpaceSource.cpp
+++ b/tools/gator/daemon/UserSpaceSource.cpp
@@ -16,8 +16,6 @@
 #include "Logging.h"
 #include "SessionData.h"
 
-#define NS_PER_US 1000
-
 extern Child *child;
 
 UserSpaceSource::UserSpaceSource(sem_t *senderSem) : mBuffer(0, FRAME_BLOCK_COUNTER, gSessionData->mTotalBufferSize*1024*1024, senderSem) {
@@ -33,16 +31,22 @@ bool UserSpaceSource::prepare() {
 void UserSpaceSource::run() {
 	prctl(PR_SET_NAME, (unsigned long)&"gatord-counters", 0, 0, 0);
 
-	gSessionData->hwmon.start();
-	gSessionData->fsDriver.start();
+	for (int i = 0; i < ARRAY_LENGTH(gSessionData->usDrivers); ++i) {
+		gSessionData->usDrivers[i]->start();
+	}
 
 	int64_t monotonic_started = 0;
 	while (monotonic_started <= 0) {
 		usleep(10);
 
-		if (DriverSource::readInt64Driver("/dev/gator/started", &monotonic_started) == -1) {
-			logg->logError(__FILE__, __LINE__, "Error reading gator driver start time");
-			handleException();
+		if (gSessionData->perf.isSetup()) {
+			monotonic_started = gSessionData->mMonotonicStarted;
+		} else {
+			if (DriverSource::readInt64Driver("/dev/gator/started", &monotonic_started) == -1) {
+				logg->logError(__FILE__, __LINE__, "Error reading gator driver start time");
+				handleException();
+			}
+			gSessionData->mMonotonicStarted = monotonic_started;
 		}
 	}
 
@@ -57,8 +61,9 @@ void UserSpaceSource::run() {
 		}
 
 		if (mBuffer.eventHeader(curr_time)) {
-			gSessionData->hwmon.read(&mBuffer);
-			gSessionData->fsDriver.read(&mBuffer);
+			for (int i = 0; i < ARRAY_LENGTH(gSessionData->usDrivers); ++i) {
+				gSessionData->usDrivers[i]->read(&mBuffer);
+			}
 			// Only check after writing all counters so that time and corresponding counters appear in the same frame
 			mBuffer.check(curr_time);
 		}
diff --git a/tools/gator/daemon/defaults.xml b/tools/gator/daemon/defaults.xml
index 39a0f656f7e6..086eca1e804e 100644
--- a/tools/gator/daemon/defaults.xml
+++ b/tools/gator/daemon/defaults.xml
@@ -24,16 +24,16 @@
   <configuration counter="ARMv7_Cortex_A9_cnt2" event="0x07"/>
   <configuration counter="ARMv7_Cortex_A9_cnt3" event="0x03"/>
   <configuration counter="ARMv7_Cortex_A9_cnt4" event="0x04"/>
-  <configuration counter="ARMv7_Cortex_A12_ccnt" event="0xff"/>
-  <configuration counter="ARMv7_Cortex_A12_cnt0" event="0x08"/>
-  <configuration counter="ARMv7_Cortex_A12_cnt1" event="0x16"/>
-  <configuration counter="ARMv7_Cortex_A12_cnt2" event="0x10"/>
-  <configuration counter="ARMv7_Cortex_A12_cnt3" event="0x19"/>
   <configuration counter="ARMv7_Cortex_A15_ccnt" event="0xff"/>
   <configuration counter="ARMv7_Cortex_A15_cnt0" event="0x8"/>
   <configuration counter="ARMv7_Cortex_A15_cnt1" event="0x16"/>
   <configuration counter="ARMv7_Cortex_A15_cnt2" event="0x10"/>
   <configuration counter="ARMv7_Cortex_A15_cnt3" event="0x19"/>
+  <configuration counter="ARMv7_Cortex_A17_ccnt" event="0xff"/>
+  <configuration counter="ARMv7_Cortex_A17_cnt0" event="0x08"/>
+  <configuration counter="ARMv7_Cortex_A17_cnt1" event="0x16"/>
+  <configuration counter="ARMv7_Cortex_A17_cnt2" event="0x10"/>
+  <configuration counter="ARMv7_Cortex_A17_cnt3" event="0x19"/>
   <configuration counter="ARM_Cortex-A53_ccnt" event="0x11"/>
   <configuration counter="ARM_Cortex-A53_cnt0" event="0x8"/>
   <configuration counter="ARM_Cortex-A53_cnt1" event="0x16"/>
@@ -56,12 +56,29 @@
   <configuration counter="Linux_block_rq_wr"/>
   <configuration counter="Linux_block_rq_rd"/>
   <configuration counter="Linux_meminfo_memused"/>
+  <configuration counter="Linux_meminfo_memused2"/>
   <configuration counter="Linux_meminfo_memfree"/>
   <configuration counter="Linux_power_cpu_freq"/>
   <configuration counter="ARM_Mali-4xx_fragment"/>
   <configuration counter="ARM_Mali-4xx_vertex"/>
-  <configuration counter="ARM_Mali-T6xx_fragment" cores="1"/>
-  <configuration counter="ARM_Mali-T6xx_vertex" cores="1"/>
-  <configuration counter="ARM_Mali-T6xx_opencl" cores="1"/>
+  <configuration counter="ARM_Mali-Midgard_fragment" cores="1"/>
+  <configuration counter="ARM_Mali-Midgard_vertex" cores="1"/>
+  <configuration counter="ARM_Mali-Midgard_opencl" cores="1"/>
+  <configuration counter="ARM_Mali-T60x_GPU_ACTIVE"/>
+  <configuration counter="ARM_Mali-T60x_JS0_ACTIVE"/>
+  <configuration counter="ARM_Mali-T60x_JS1_ACTIVE"/>
+  <configuration counter="ARM_Mali-T60x_JS2_ACTIVE"/>
+  <configuration counter="ARM_Mali-T62x_GPU_ACTIVE"/>
+  <configuration counter="ARM_Mali-T62x_JS0_ACTIVE"/>
+  <configuration counter="ARM_Mali-T62x_JS1_ACTIVE"/>
+  <configuration counter="ARM_Mali-T62x_JS2_ACTIVE"/>
+  <configuration counter="ARM_Mali-T72x_GPU_ACTIVE"/>
+  <configuration counter="ARM_Mali-T72x_JS0_ACTIVE"/>
+  <configuration counter="ARM_Mali-T72x_JS1_ACTIVE"/>
+  <configuration counter="ARM_Mali-T72x_JS2_ACTIVE"/>
+  <configuration counter="ARM_Mali-T76x_GPU_ACTIVE"/>
+  <configuration counter="ARM_Mali-T76x_JS0_ACTIVE"/>
+  <configuration counter="ARM_Mali-T76x_JS1_ACTIVE"/>
+  <configuration counter="ARM_Mali-T76x_JS2_ACTIVE"/>
   <configuration counter="L2C-310_cnt0" event="0x1"/>
 </configurations>
diff --git a/tools/gator/daemon/events-Cortex-A12.xml b/tools/gator/daemon/events-Cortex-A17.xml
similarity index 97%
rename from tools/gator/daemon/events-Cortex-A12.xml
rename to tools/gator/daemon/events-Cortex-A17.xml
index 9c04354ad137..4dd08c1f203d 100644
--- a/tools/gator/daemon/events-Cortex-A12.xml
+++ b/tools/gator/daemon/events-Cortex-A17.xml
@@ -1,6 +1,6 @@
-  <counter_set name="ARMv7_Cortex_A12_cnt" count="6"/>
-  <category name="Cortex-A12" counter_set="ARMv7_Cortex_A12_cnt" per_cpu="yes" supports_event_based_sampling="yes">
-    <event counter="ARMv7_Cortex_A12_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+  <counter_set name="ARMv7_Cortex_A17_cnt" count="6"/>
+  <category name="Cortex-A17" counter_set="ARMv7_Cortex_A17_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARMv7_Cortex_A17_ccnt" event="0xff" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
     <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
     <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
     <event event="0x03" title="Cache" name="Data refill" description="Memory Read or Write operation that causes a refill of at least the level of data or unified cache closest to the processor"/>
@@ -62,7 +62,7 @@
     <event event="0xc2" title="Cache" name="4 Ways Read" description="Number of set of 4 ways read in the instruction cache - Tag RAM"/>
     <event event="0xc3" title="Cache" name="Ways Read" description="Number of ways read in the instruction cache - Data RAM"/>
     <event event="0xc4" title="Cache" name="BATC Read" description="Number of ways read in the instruction BTAC RAM"/>
-    <event event="0xca" title="Memory" name="Snoop" description="Data snooped from other processor. This event counts memory-read operations that read data from another processor within the local Cortex-A12 cluster, rather than accessing the L2 cache or issuing an external read. It increments on each transaction, rather than on each beat of data"/>
+    <event event="0xca" title="Memory" name="Snoop" description="Data snooped from other processor. This event counts memory-read operations that read data from another processor within the local Cortex-A17 cluster, rather than accessing the L2 cache or issuing an external read. It increments on each transaction, rather than on each beat of data"/>
     <event event="0xd3" title="Slots" name="Load-Store Unit" description="Duration during which all slots in the Load-Store Unit are busy"/>
     <event event="0xd8" title="Slots" name="Load-Store Issue Queue" description="Duration during which all slots in the Load-Store Issue queue are busy"/>
     <event event="0xd9" title="Slots" name="Data Processing Issue Queue" description="Duration during which all slots in the Data Processing issue queue are busy"/>
diff --git a/tools/gator/daemon/events-Filesystem.xml b/tools/gator/daemon/events-Filesystem.xml
index 5feeb9014a63..9ef61ddac811 100644
--- a/tools/gator/daemon/events-Filesystem.xml
+++ b/tools/gator/daemon/events-Filesystem.xml
@@ -1,11 +1,11 @@
   <category name="Filesystem">
-    <!-- counter attributes must be unique -->
+    <!-- counter attribute must start with filesystem_ and be unique -->
     <!-- regex item in () is the value shown -->
-    <!-- these counters are not compatible with userspace gator, i.e. gator.ko must be loaded -->
     <!--
-    <event counter="/sys/devices/system/cpu/cpu1/online" title="online" name="cpu 1" class="absolute" description="If cpu 1 is online"/>
-    <event counter="/proc/self/loginuid" title="loginuid" name="loginuid" class="absolute" description="loginuid"/>
-    <event counter="/proc/self/stat" title="stat" name="rss" class="absolute" regex="-?[0-9]+ \(.*\) . -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ (-?[0-9]+)" units="pages" description="resident set size"/>
-    <event counter="/proc/stat" title="proc-stat" name="processes" class="absolute" regex="processes ([0-9]+)" description="Number of processes and threads created"/>
+    <event counter="filesystem_cpu1_online" path="/sys/devices/system/cpu/cpu1/online" title="online" name="cpu 1" class="absolute" description="If cpu 1 is online"/>
+    <event counter="filesystem_loginuid" path="/proc/self/loginuid" title="loginuid" name="loginuid" class="absolute" description="loginuid"/>
+    <event counter="filesystem_gatord_rss" path="/proc/self/stat" title="stat" name="rss" class="absolute" regex="-?[0-9]+ \(.*\) . -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ -?[0-9]+ (-?[0-9]+)" units="pages" description="resident set size"/>
+    <event counter="filesystem_processes" path="/proc/stat" title="proc-stat" name="processes" class="absolute" regex="processes ([0-9]+)" description="Number of processes and threads created"/>
+    <event counter="filesystem_context_switches" path="/proc/stat" title="proc-stat" name="context switches" class="absolute" regex="ctxt ([0-9]+)" description="Number of processes and threads created"/>
     -->
   </category>
diff --git a/tools/gator/daemon/events-Linux.xml b/tools/gator/daemon/events-Linux.xml
index c306dd62208e..62a7018d038f 100644
--- a/tools/gator/daemon/events-Linux.xml
+++ b/tools/gator/daemon/events-Linux.xml
@@ -1,16 +1,17 @@
   <category name="Linux">
     <event counter="Linux_irq_softirq" title="Interrupts" name="SoftIRQ" per_cpu="yes" description="Linux SoftIRQ taken"/>
     <event counter="Linux_irq_irq" title="Interrupts" name="IRQ" per_cpu="yes" description="Linux IRQ taken"/>
-    <event counter="Linux_block_rq_wr" title="Disk IO" name="Write" units="B" description="Disk IO Bytes Written"/>
-    <event counter="Linux_block_rq_rd" title="Disk IO" name="Read" units="B" description="Disk IO Bytes Read"/>
+    <event counter="Linux_block_rq_wr" title="Disk I/O" name="Write" units="B" description="Disk I/O Bytes Written"/>
+    <event counter="Linux_block_rq_rd" title="Disk I/O" name="Read" units="B" description="Disk I/O Bytes Read"/>
     <event counter="Linux_net_rx" title="Network" name="Receive" units="B" description="Receive network traffic, including effect from Streamline"/>
     <event counter="Linux_net_tx" title="Network" name="Transmit" units="B" description="Transmit network traffic, including effect from Streamline"/>
     <event counter="Linux_sched_switch" title="Scheduler" name="Switch" per_cpu="yes" description="Context switch events"/>
     <event counter="Linux_meminfo_memused" title="Memory" name="Used" class="absolute" units="B" proc="yes" description="Total used memory size. Note: a process' used memory includes shared memory that may be counted more than once (equivalent to RES from top). Kernel threads are not filterable."/>
+    <event counter="Linux_meminfo_memused2" title="Memory" name="Used" class="absolute" units="B" description="Total used memory size"/>
     <event counter="Linux_meminfo_memfree" title="Memory" name="Free" class="absolute" display="minimum" units="B" description="Available memory size"/>
     <event counter="Linux_meminfo_bufferram" title="Memory" name="Buffer" class="absolute" units="B" description="Memory used by OS disk buffers"/>
     <event counter="Linux_power_cpu_freq" title="Clock" name="Frequency" per_cpu="yes" class="absolute" units="Hz" series_composition="overlay" average_cores="yes" description="Frequency setting of the CPU"/>
-    <event counter="Linux_power_cpu_idle" title="Idle" name="State" per_cpu="yes" class="absolute" description="CPU Idle State + 1, set the Sample Rate to None to prevent the hrtimer from interrupting the system"/>
-    <event counter="Linux_cpu_wait_contention" title="CPU Contention" name="Wait" per_cpu="no" class="activity" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" color="0x003c96fb" description="Thread waiting on contended resource"/>
-    <event counter="Linux_cpu_wait_io" title="CPU I/O" name="Wait" per_cpu="no" class="activity" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" color="0x00b30000" description="Thread waiting on I/O resource"/>
+    <event counter="Linux_cpu_wait_contention" title="CPU Contention" name="Wait" per_cpu="no" class="activity" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" color="0x003c96fb" description="One or more threads are runnable but waiting due to CPU contention"/>
+    <event counter="Linux_cpu_wait_io" title="CPU I/O" name="Wait" per_cpu="no" class="activity" derived="yes" rendering_type="bar" average_selection="yes" percentage="yes" modifier="10000" color="0x00b30000" description="One or more threads are blocked on an I/O resource"/>
+    <event counter="Linux_power_cpu" title="CPU Status" name="Activity" class="activity" activity1="Off" activity_color1="0x0000ff00" activity2="WFI" activity_color2="0x000000ff" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" description="CPU Status"/>
   </category>
diff --git a/tools/gator/daemon/events-Mali-Midgard.xml b/tools/gator/daemon/events-Mali-Midgard.xml
new file mode 100644
index 000000000000..b6ab4b88cd2e
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-Midgard.xml
@@ -0,0 +1,46 @@
+  <category name="Mali-Midgard Software Counters" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_TOTAL_ALLOC_PAGES" title="Mali Total Alloc Pages" name="Total number of allocated pages" description="Mali total number of allocated pages."/>
+  </category>
+  <category name="Mali-Midgard PM Shader" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_PM_SHADER_0" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 0" description="Mali PM Shader: PM Shader Core 0."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_1" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 1" description="Mali PM Shader: PM Shader Core 1."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_2" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 2" description="Mali PM Shader: PM Shader Core 2."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_3" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 3" description="Mali PM Shader: PM Shader Core 3."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_4" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 4" description="Mali PM Shader: PM Shader Core 4."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_5" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 5" description="Mali PM Shader: PM Shader Core 5."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_6" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 6" description="Mali PM Shader: PM Shader Core 6."/>
+    <event counter="ARM_Mali-Midgard_PM_SHADER_7" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 7" description="Mali PM Shader: PM Shader Core 7."/>
+  </category>
+  <category name="Mali-Midgard PM Tiler" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_PM_TILER_0" display="average" average_selection="yes" percentage="yes" title="Mali PM Tiler" name="PM Tiler Core 0" description="Mali PM Tiler: PM Tiler Core 0."/>
+  </category>
+  <category name="Mali-Midgard PM L2" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_PM_L2_0" display="average" average_selection="yes" percentage="yes" title="Mali PM L2" name="PM L2 Core 0" description="Mali PM L2: PM L2 Core 0."/>
+    <event counter="ARM_Mali-Midgard_PM_L2_1" display="average" average_selection="yes" percentage="yes" title="Mali PM L2" name="PM L2 Core 1" description="Mali PM L2: PM L2 Core 1."/>
+  </category>
+  <category name="Mali-Midgard MMU Address Space" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_MMU_AS_0" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 0" description="Mali MMU Address Space 0 usage."/>
+    <event counter="ARM_Mali-Midgard_MMU_AS_1" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 1" description="Mali MMU Address Space 1 usage."/>
+    <event counter="ARM_Mali-Midgard_MMU_AS_2" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 2" description="Mali MMU Address Space 2 usage."/>
+    <event counter="ARM_Mali-Midgard_MMU_AS_3" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 3" description="Mali MMU Address Space 3 usage."/>
+  </category>
+  <category name="Mali-Midgard MMU Page Fault" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_MMU_PAGE_FAULT_0" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 0" description="Reports the number of newly allocated pages after a MMU page fault in address space 0."/>
+    <event counter="ARM_Mali-Midgard_MMU_PAGE_FAULT_1" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 1" description="Reports the number of newly allocated pages after a MMU page fault in address space 1."/>
+    <event counter="ARM_Mali-Midgard_MMU_PAGE_FAULT_2" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 2" description="Reports the number of newly allocated pages after a MMU page fault in address space 2."/>
+    <event counter="ARM_Mali-Midgard_MMU_PAGE_FAULT_3" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 3" description="Reports the number of newly allocated pages after a MMU page fault in address space 3."/>
+  </category>
+  <counter_set name="ARM_Mali-Midgard_Filmstrip_cnt" count="1"/>
+  <category name="Mali-Midgard Filmstrip" counter_set="ARM_Mali-Midgard_Filmstrip_cnt" per_cpu="no">
+    <option_set name="fs">
+      <option event_delta="0x3c" name="1:60" description="captures every 60th frame"/>
+      <option event_delta="0x1e" name="1:30" description="captures every 30th frame"/>
+      <option event_delta="0xa" name="1:10" description="captures every 10th frame"/>
+    </option_set>
+    <event event="0x0400" option_set="fs" title="ARM Mali-Midgard" name="Filmstrip" description="Scaled framebuffer"/>
+  </category>
+  <category name="Mali-Midgard Activity" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_fragment" title="GPU Fragment" name="Activity" class="activity" activity1="Activity" activity_color1="0x00006fcc" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 0 Activity"/>
+    <event counter="ARM_Mali-Midgard_vertex" title="GPU Vertex-Tiling-Compute" name="Activity" class="activity" activity1="Activity" activity_color1="0x00eda000" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 1 Activity"/>
+    <event counter="ARM_Mali-Midgard_opencl" title="GPU Vertex-Compute" name="Activity" class="activity" activity1="Activity" activity_color1="0x00ef022f" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 2 Activity"/>
+  </category>
diff --git a/tools/gator/daemon/events-Mali-Midgard_hw.xml b/tools/gator/daemon/events-Mali-Midgard_hw.xml
new file mode 100644
index 000000000000..4f3323f197d7
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-Midgard_hw.xml
@@ -0,0 +1,91 @@
+  <category name="Mali-Midgard Job Manager" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles the GPU was active"/>
+    <event counter="ARM_Mali-Midgard_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles the GPU had a pending interrupt"/>
+    <event counter="ARM_Mali-Midgard_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) was active"/>
+    <event counter="ARM_Mali-Midgard_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) was active"/>
+    <event counter="ARM_Mali-Midgard_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) was active"/>
+    <event counter="ARM_Mali-Midgard_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-Midgard_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-Midgard_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-Midgard_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-Midgard_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+    <event counter="ARM_Mali-Midgard_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+  </category>
+  <category name="Mali-Midgard Tiler" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-Midgard_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-Midgard_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-Midgard_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-Midgard_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+    <event counter="ARM_Mali-Midgard_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-Midgard_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-Midgard_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-Midgard_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-Midgard_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+    <event counter="ARM_Mali-Midgard_LEVEL0" title="Mali Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0"/>
+    <event counter="ARM_Mali-Midgard_LEVEL1" title="Mali Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1"/>
+    <event counter="ARM_Mali-Midgard_LEVEL2" title="Mali Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2"/>
+    <event counter="ARM_Mali-Midgard_LEVEL3" title="Mali Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3"/>
+    <event counter="ARM_Mali-Midgard_LEVEL4" title="Mali Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4"/>
+    <event counter="ARM_Mali-Midgard_LEVEL5" title="Mali Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5"/>
+    <event counter="ARM_Mali-Midgard_LEVEL6" title="Mali Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6"/>
+    <event counter="ARM_Mali-Midgard_LEVEL7" title="Mali Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_1" title="Mali Tiler Commands" name="Prims in 1 command" description="Number of primitives producing 1 command"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_2" title="Mali Tiler Commands" name="Prims in 2 command" description="Number of primitives producing 2 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_3" title="Mali Tiler Commands" name="Prims in 3 command" description="Number of primitives producing 3 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_4" title="Mali Tiler Commands" name="Prims in 4 command" description="Number of primitives producing 4 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_4_7" title="Mali Tiler Commands" name="Prims in 4-7 commands" description="Number of primitives producing 4-7 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_5_7" title="Mali Tiler Commands" name="Prims in 5-7 commands" description="Number of primitives producing 5-7 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_8_15" title="Mali Tiler Commands" name="Prims in 8-15 commands" description="Number of primitives producing 8-15 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_16_63" title="Mali Tiler Commands" name="Prims in 16-63 commands" description="Number of primitives producing 16-63 commands"/>
+    <event counter="ARM_Mali-Midgard_COMMAND_64" title="Mali Tiler Commands" name="Prims in &gt;= 64 commands" description="Number of primitives producing &gt;= 64 commands"/>
+  </category>
+  <category name="Mali-Midgard Shader Core" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles the Tripipe was active"/>
+    <event counter="ARM_Mali-Midgard_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-Midgard_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-Midgard_FRAG_CYCLE_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+    <event counter="ARM_Mali-Midgard_FRAG_THREADS" title="Mali Core Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-Midgard_FRAG_DUMMY_THREADS" title="Mali Core Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS" description="Number of threads killed by late ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_THREADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_THREADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS" description="Number of threads killed by late ZS test"/>
+    <event counter="ARM_Mali-Midgard_COMPUTE_TASKS" title="Mali Compute Threads" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-Midgard_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads started" description="Number of compute threads started"/>
+    <event counter="ARM_Mali-Midgard_COMPUTE_CYCLES_DESC" title="Mali Compute Threads" name="Compute cycles awaiting descriptors" description="Number of compute cycles spent waiting for descriptors"/>
+    <event counter="ARM_Mali-Midgard_FRAG_PRIMATIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-Midgard_FRAG_PRIMATIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+    <event counter="ARM_Mali-Midgard_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-Midgard_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-Midgard_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+    <event counter="ARM_Mali-Midgard_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)"/>
+    <event counter="ARM_Mali-Midgard_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-Midgard_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+    <event counter="ARM_Mali-Midgard_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-Midgard_TEX_THREADS" title="Mali Texture Pipe" name="T instruction issues" description="Number of instructions issused to the T-pipe, including restarts"/>
+    <event counter="ARM_Mali-Midgard_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
+    <event counter="ARM_Mali-Midgard_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+    <event counter="ARM_Mali-Midgard_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+  </category>
+  <category name="Mali-Midgard L2 and MMU" per_cpu="no">
+    <event counter="ARM_Mali-Midgard_L2_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-Midgard_L2_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-Midgard_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-Midgard_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+    <event counter="ARM_Mali-Midgard_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-Midgard_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-Midgard_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-Midgard_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+  </category>
diff --git a/tools/gator/daemon/events-Mali-T60x_hw.xml b/tools/gator/daemon/events-Mali-T60x_hw.xml
new file mode 100644
index 000000000000..50797e6492ad
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-T60x_hw.xml
@@ -0,0 +1,108 @@
+
+  <category name="Mali Job Manager" per_cpu="no">
+
+    <event counter="ARM_Mali-T60x_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active"/>
+    <event counter="ARM_Mali-T60x_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending"/>
+    <event counter="ARM_Mali-T60x_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active"/>
+    <event counter="ARM_Mali-T60x_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active"/>
+    <event counter="ARM_Mali-T60x_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active"/>
+
+    <event counter="ARM_Mali-T60x_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-T60x_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-T60x_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-T60x_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-T60x_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+    <event counter="ARM_Mali-T60x_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+
+  </category>
+
+  <category name="Mali Tiler" per_cpu="no">
+
+    <event counter="ARM_Mali-T60x_TI_ACTIVE" title="Mali Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active"/>
+
+    <event counter="ARM_Mali-T60x_TI_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-T60x_TI_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-T60x_TI_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-T60x_TI_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-T60x_TI_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+
+    <event counter="ARM_Mali-T60x_TI_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-T60x_TI_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-T60x_TI_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-T60x_TI_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-T60x_TI_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+
+    <event counter="ARM_Mali-T60x_TI_LEVEL0" title="Mali Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL1" title="Mali Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL2" title="Mali Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL3" title="Mali Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL4" title="Mali Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL5" title="Mali Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL6" title="Mali Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6"/>
+    <event counter="ARM_Mali-T60x_TI_LEVEL7" title="Mali Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7"/>
+
+  </category>
+
+  <category name="Mali Shader Core" per_cpu="no">
+
+    <event counter="ARM_Mali-T60x_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active"/>
+    <event counter="ARM_Mali-T60x_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-T60x_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-T60x_FRAG_CYCLES_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+
+    <event counter="ARM_Mali-T60x_FRAG_THREADS" title="Mali Fragment Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-T60x_FRAG_DUMMY_THREADS" title="Mali Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-T60x_FRAG_THREADS_LZS_TEST" title="Mali Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-T60x_FRAG_THREADS_LZS_KILLED" title="Mali Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test"/>
+
+    <event counter="ARM_Mali-T60x_COMPUTE_TASKS" title="Mali Compute Tasks" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-T60x_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads" description="Number of compute threads started"/>
+
+    <event counter="ARM_Mali-T60x_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-T60x_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+
+    <event counter="ARM_Mali-T60x_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-T60x_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-T60x_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+
+    <event counter="ARM_Mali-T60x_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-T60x_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+
+    <event counter="ARM_Mali-T60x_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)"/>
+
+    <event counter="ARM_Mali-T60x_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-T60x_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+
+    <event counter="ARM_Mali-T60x_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-T60x_TEX_ISSUES" title="Mali Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation"/>
+    <event counter="ARM_Mali-T60x_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
+
+    <event counter="ARM_Mali-T60x_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+    <event counter="ARM_Mali-T60x_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+
+  </category>
+
+  <category name="Mali L2 Cache" per_cpu="no">
+
+    <event counter="ARM_Mali-T60x_L2_EXT_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-T60x_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-T60x_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T60x_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-T60x_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_R_BUF_FULL" title="Mali L2 Cache" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_RD_BUF_FULL" title="Mali L2 Cache" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer"/>
+    <event counter="ARM_Mali-T60x_L2_EXT_W_BUF_FULL" title="Mali L2 Cache" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer"/>
+    <event counter="ARM_Mali-T60x_L2_READ_LOOKUP" title="Mali L2 Cache" name="L2 read lookups" description="Number of reads into the L2 cache"/>
+    <event counter="ARM_Mali-T60x_L2_WRITE_LOOKUP" title="Mali L2 Cache" name="L2 write lookups" description="Number of writes into the L2 cache"/>
+
+  </category>
diff --git a/tools/gator/daemon/events-Mali-T62x_hw.xml b/tools/gator/daemon/events-Mali-T62x_hw.xml
new file mode 100644
index 000000000000..6ecc53c2ada1
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-T62x_hw.xml
@@ -0,0 +1,109 @@
+
+  <category name="Mali Job Manager" per_cpu="no">
+
+    <event counter="ARM_Mali-T62x_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active"/>
+    <event counter="ARM_Mali-T62x_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending"/>
+    <event counter="ARM_Mali-T62x_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active"/>
+    <event counter="ARM_Mali-T62x_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active"/>
+    <event counter="ARM_Mali-T62x_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active"/>
+
+    <event counter="ARM_Mali-T62x_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-T62x_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-T62x_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-T62x_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-T62x_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+    <event counter="ARM_Mali-T62x_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+
+  </category>
+
+  <category name="Mali Tiler" per_cpu="no">
+
+    <event counter="ARM_Mali-T62x_TI_ACTIVE" title="Mali Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active"/>
+
+    <event counter="ARM_Mali-T62x_TI_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-T62x_TI_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-T62x_TI_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-T62x_TI_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-T62x_TI_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+
+    <event counter="ARM_Mali-T62x_TI_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-T62x_TI_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-T62x_TI_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-T62x_TI_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-T62x_TI_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+
+    <event counter="ARM_Mali-T62x_TI_LEVEL0" title="Mali Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL1" title="Mali Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL2" title="Mali Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL3" title="Mali Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL4" title="Mali Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL5" title="Mali Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL6" title="Mali Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6"/>
+    <event counter="ARM_Mali-T62x_TI_LEVEL7" title="Mali Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7"/>
+
+  </category>
+
+  <category name="Mali Shader Core" per_cpu="no">
+
+    <event counter="ARM_Mali-T62x_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active"/>
+    <event counter="ARM_Mali-T62x_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-T62x_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-T62x_FRAG_CYCLES_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+    <event counter="ARM_Mali-T62x_FRAG_CYCLES_FPKQ_ACTIVE" title="Mali Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads"/>
+
+    <event counter="ARM_Mali-T62x_FRAG_THREADS" title="Mali Fragment Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-T62x_FRAG_DUMMY_THREADS" title="Mali Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-T62x_FRAG_THREADS_LZS_TEST" title="Mali Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-T62x_FRAG_THREADS_LZS_KILLED" title="Mali Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test"/>
+
+    <event counter="ARM_Mali-T62x_COMPUTE_TASKS" title="Mali Compute Tasks" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-T62x_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads" description="Number of compute threads started"/>
+
+    <event counter="ARM_Mali-T62x_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-T62x_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+
+    <event counter="ARM_Mali-T62x_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-T62x_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-T62x_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+
+    <event counter="ARM_Mali-T62x_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-T62x_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+
+    <event counter="ARM_Mali-T62x_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)"/>
+
+    <event counter="ARM_Mali-T62x_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-T62x_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+
+    <event counter="ARM_Mali-T62x_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-T62x_TEX_ISSUES" title="Mali Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation"/>
+    <event counter="ARM_Mali-T62x_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
+
+    <event counter="ARM_Mali-T62x_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+    <event counter="ARM_Mali-T62x_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+
+  </category>
+
+  <category name="Mali L2 Cache" per_cpu="no">
+
+    <event counter="ARM_Mali-T62x_L2_EXT_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-T62x_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-T62x_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T62x_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-T62x_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_R_BUF_FULL" title="Mali L2 Cache" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_RD_BUF_FULL" title="Mali L2 Cache" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer"/>
+    <event counter="ARM_Mali-T62x_L2_EXT_W_BUF_FULL" title="Mali L2 Cache" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer"/>
+    <event counter="ARM_Mali-T62x_L2_READ_LOOKUP" title="Mali L2 Cache" name="L2 read lookups" description="Number of reads into the L2 cache"/>
+    <event counter="ARM_Mali-T62x_L2_WRITE_LOOKUP" title="Mali L2 Cache" name="L2 write lookups" description="Number of writes into the L2 cache"/>
+
+  </category>
diff --git a/tools/gator/daemon/events-Mali-T6xx.xml b/tools/gator/daemon/events-Mali-T6xx.xml
deleted file mode 100644
index 5e8979704870..000000000000
--- a/tools/gator/daemon/events-Mali-T6xx.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-  <category name="Mali-T6xx Software Counters" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_TOTAL_ALLOC_PAGES" title="Mali Total Alloc Pages" name="Total number of allocated pages" description="Mali total number of allocated pages."/>
-  </category>
-  <category name="Mali-T6xx PM Shader" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_PM_SHADER_0" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 0" description="Mali PM Shader: PM Shader Core 0."/>
-    <event counter="ARM_Mali-T6xx_PM_SHADER_1" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 1" description="Mali PM Shader: PM Shader Core 1."/>
-    <event counter="ARM_Mali-T6xx_PM_SHADER_2" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 2" description="Mali PM Shader: PM Shader Core 2."/>
-    <event counter="ARM_Mali-T6xx_PM_SHADER_3" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 3" description="Mali PM Shader: PM Shader Core 3."/>
-    <event counter="ARM_Mali-T6xx_PM_SHADER_4" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 4" description="Mali PM Shader: PM Shader Core 4."/>
-    <event counter="ARM_Mali-T6xx_PM_SHADER_5" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 5" description="Mali PM Shader: PM Shader Core 5."/>
-    <event counter="ARM_Mali-T6xx_PM_SHADER_6" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 6" description="Mali PM Shader: PM Shader Core 6."/>
-    <event counter="ARM_Mali-T6xx_PM_SHADER_7" class="absolute" display="average" average_selection="yes" percentage="yes" title="Mali PM Shader" name="PM Shader Core 7" description="Mali PM Shader: PM Shader Core 7."/>
-  </category>
-  <category name="Mali-T6xx PM Tiler" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_PM_TILER_0" display="average" average_selection="yes" percentage="yes" title="Mali PM Tiler" name="PM Tiler Core 0" description="Mali PM Tiler: PM Tiler Core 0."/>
-  </category>
-  <category name="Mali-T6xx PM L2" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_PM_L2_0" display="average" average_selection="yes" percentage="yes" title="Mali PM L2" name="PM L2 Core 0" description="Mali PM L2: PM L2 Core 0."/>
-    <event counter="ARM_Mali-T6xx_PM_L2_1" display="average" average_selection="yes" percentage="yes" title="Mali PM L2" name="PM L2 Core 1" description="Mali PM L2: PM L2 Core 1."/>
-  </category>
-  <category name="Mali-T6xx MMU Address Space" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_MMU_AS_0" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 0" description="Mali MMU Address Space 0 usage."/>
-    <event counter="ARM_Mali-T6xx_MMU_AS_1" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 1" description="Mali MMU Address Space 1 usage."/>
-    <event counter="ARM_Mali-T6xx_MMU_AS_2" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 2" description="Mali MMU Address Space 2 usage."/>
-    <event counter="ARM_Mali-T6xx_MMU_AS_3" display="average" average_selection="yes" percentage="yes" title="Mali MMU Address Space" name="MMU Address Space 3" description="Mali MMU Address Space 3 usage."/>
-  </category>
-  <category name="Mali-T6xx MMU Page Fault" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_MMU_PAGE_FAULT_0" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 0" description="Reports the number of newly allocated pages after a MMU page fault in address space 0."/>
-    <event counter="ARM_Mali-T6xx_MMU_PAGE_FAULT_1" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 1" description="Reports the number of newly allocated pages after a MMU page fault in address space 1."/>
-    <event counter="ARM_Mali-T6xx_MMU_PAGE_FAULT_2" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 2" description="Reports the number of newly allocated pages after a MMU page fault in address space 2."/>
-    <event counter="ARM_Mali-T6xx_MMU_PAGE_FAULT_3" title="Mali MMU Page Fault Add. Space" name="Mali MMU Page Fault Add. Space 3" description="Reports the number of newly allocated pages after a MMU page fault in address space 3."/>
-  </category>
-  <counter_set name="ARM_Mali-T6xx_Filmstrip_cnt" count="1"/>
-  <category name="Mali-T6xx Filmstrip" counter_set="ARM_Mali-T6xx_Filmstrip_cnt" per_cpu="no">
-    <option_set name="fs">
-      <option event_delta="0x3c" name="1:60" description="captures every 60th frame"/>
-      <option event_delta="0x1e" name="1:30" description="captures every 30th frame"/>
-      <option event_delta="0xa" name="1:10" description="captures every 10th frame"/>
-    </option_set>
-    <event event="0x0400" option_set="fs" title="ARM Mali-T6xx" name="Filmstrip" description="Scaled framebuffer"/>
-  </category>
-  <category name="Mali-T6xx Activity" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_fragment" title="GPU Fragment" name="Activity" class="activity" activity1="Activity" activity_color1="0x00006fcc" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 0 Activity"/>
-    <event counter="ARM_Mali-T6xx_vertex" title="GPU Vertex-Tiling-Compute" name="Activity" class="activity" activity1="Activity" activity_color1="0x00eda000" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 1 Activity"/>
-    <event counter="ARM_Mali-T6xx_opencl" title="GPU Vertex-Compute" name="Activity" class="activity" activity1="Activity" activity_color1="0x00ef022f" rendering_type="bar" average_selection="yes" percentage="yes" cores="1" description="GPU Job Slot 2 Activity"/>
-  </category>
diff --git a/tools/gator/daemon/events-Mali-T6xx_hw.xml b/tools/gator/daemon/events-Mali-T6xx_hw.xml
deleted file mode 100644
index df2796262473..000000000000
--- a/tools/gator/daemon/events-Mali-T6xx_hw.xml
+++ /dev/null
@@ -1,91 +0,0 @@
-  <category name="Mali-T6xx Job Manager" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles the GPU was active"/>
-    <event counter="ARM_Mali-T6xx_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles the GPU had a pending interrupt"/>
-    <event counter="ARM_Mali-T6xx_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) was active"/>
-    <event counter="ARM_Mali-T6xx_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) was active"/>
-    <event counter="ARM_Mali-T6xx_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) was active"/>
-    <event counter="ARM_Mali-T6xx_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
-    <event counter="ARM_Mali-T6xx_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
-    <event counter="ARM_Mali-T6xx_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
-    <event counter="ARM_Mali-T6xx_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
-    <event counter="ARM_Mali-T6xx_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
-    <event counter="ARM_Mali-T6xx_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
-  </category>
-  <category name="Mali-T6xx Tiler" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
-    <event counter="ARM_Mali-T6xx_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
-    <event counter="ARM_Mali-T6xx_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
-    <event counter="ARM_Mali-T6xx_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
-    <event counter="ARM_Mali-T6xx_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
-    <event counter="ARM_Mali-T6xx_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
-    <event counter="ARM_Mali-T6xx_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
-    <event counter="ARM_Mali-T6xx_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
-    <event counter="ARM_Mali-T6xx_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
-    <event counter="ARM_Mali-T6xx_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
-    <event counter="ARM_Mali-T6xx_LEVEL0" title="Mali Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0"/>
-    <event counter="ARM_Mali-T6xx_LEVEL1" title="Mali Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1"/>
-    <event counter="ARM_Mali-T6xx_LEVEL2" title="Mali Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2"/>
-    <event counter="ARM_Mali-T6xx_LEVEL3" title="Mali Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3"/>
-    <event counter="ARM_Mali-T6xx_LEVEL4" title="Mali Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4"/>
-    <event counter="ARM_Mali-T6xx_LEVEL5" title="Mali Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5"/>
-    <event counter="ARM_Mali-T6xx_LEVEL6" title="Mali Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6"/>
-    <event counter="ARM_Mali-T6xx_LEVEL7" title="Mali Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7"/>
-    <event counter="ARM_Mali-T6xx_COMMAND_1" title="Mali Tiler Commands" name="Prims in 1 command" description="Number of primitives producing 1 command"/>
-    <event counter="ARM_Mali-T6xx_COMMAND_2" title="Mali Tiler Commands" name="Prims in 2 command" description="Number of primitives producing 2 commands"/>
-    <event counter="ARM_Mali-T6xx_COMMAND_3" title="Mali Tiler Commands" name="Prims in 3 command" description="Number of primitives producing 3 commands"/>
-    <event counter="ARM_Mali-T6xx_COMMAND_4" title="Mali Tiler Commands" name="Prims in 4 command" description="Number of primitives producing 4 commands"/>
-    <event counter="ARM_Mali-T6xx_COMMAND_4_7" title="Mali Tiler Commands" name="Prims in 4-7 commands" description="Number of primitives producing 4-7 commands"/>
-    <event counter="ARM_Mali-T6xx_COMMAND_5_7" title="Mali Tiler Commands" name="Prims in 5-7 commands" description="Number of primitives producing 5-7 commands"/>
-    <event counter="ARM_Mali-T6xx_COMMAND_8_15" title="Mali Tiler Commands" name="Prims in 8-15 commands" description="Number of primitives producing 8-15 commands"/>
-    <event counter="ARM_Mali-T6xx_COMMAND_16_63" title="Mali Tiler Commands" name="Prims in 16-63 commands" description="Number of primitives producing 16-63 commands"/>
-    <event counter="ARM_Mali-T6xx_COMMAND_64" title="Mali Tiler Commands" name="Prims in &gt;= 64 commands" description="Number of primitives producing &gt;= 64 commands"/>
-  </category>
-  <category name="Mali-T6xx Shader Core" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles the Tripipe was active"/>
-    <event counter="ARM_Mali-T6xx_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
-    <event counter="ARM_Mali-T6xx_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
-    <event counter="ARM_Mali-T6xx_FRAG_CYCLE_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
-    <event counter="ARM_Mali-T6xx_FRAG_THREADS" title="Mali Core Threads" name="Fragment threads" description="Number of fragment threads started"/>
-    <event counter="ARM_Mali-T6xx_FRAG_DUMMY_THREADS" title="Mali Core Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
-    <event counter="ARM_Mali-T6xx_FRAG_QUADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS" description="Number of threads doing late ZS test"/>
-    <event counter="ARM_Mali-T6xx_FRAG_QUADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS" description="Number of threads killed by late ZS test"/>
-    <event counter="ARM_Mali-T6xx_FRAG_THREADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS" description="Number of threads doing late ZS test"/>
-    <event counter="ARM_Mali-T6xx_FRAG_THREADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS" description="Number of threads killed by late ZS test"/>
-    <event counter="ARM_Mali-T6xx_COMPUTE_TASKS" title="Mali Compute Threads" name="Compute tasks" description="Number of compute tasks"/>
-    <event counter="ARM_Mali-T6xx_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads started" description="Number of compute threads started"/>
-    <event counter="ARM_Mali-T6xx_COMPUTE_CYCLES_DESC" title="Mali Compute Threads" name="Compute cycles awaiting descriptors" description="Number of compute cycles spent waiting for descriptors"/>
-    <event counter="ARM_Mali-T6xx_FRAG_PRIMATIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
-    <event counter="ARM_Mali-T6xx_FRAG_PRIMATIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
-    <event counter="ARM_Mali-T6xx_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
-    <event counter="ARM_Mali-T6xx_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
-    <event counter="ARM_Mali-T6xx_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
-    <event counter="ARM_Mali-T6xx_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
-    <event counter="ARM_Mali-T6xx_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
-    <event counter="ARM_Mali-T6xx_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
-    <event counter="ARM_Mali-T6xx_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
-    <event counter="ARM_Mali-T6xx_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)"/>
-    <event counter="ARM_Mali-T6xx_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
-    <event counter="ARM_Mali-T6xx_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
-    <event counter="ARM_Mali-T6xx_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
-    <event counter="ARM_Mali-T6xx_TEX_THREADS" title="Mali Texture Pipe" name="T instruction issues" description="Number of instructions issused to the T-pipe, including restarts"/>
-    <event counter="ARM_Mali-T6xx_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
-    <event counter="ARM_Mali-T6xx_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
-    <event counter="ARM_Mali-T6xx_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
-    <event counter="ARM_Mali-T6xx_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
-    <event counter="ARM_Mali-T6xx_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
-    <event counter="ARM_Mali-T6xx_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
-    <event counter="ARM_Mali-T6xx_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
-    <event counter="ARM_Mali-T6xx_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
-    <event counter="ARM_Mali-T6xx_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
-    <event counter="ARM_Mali-T6xx_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
-  </category>
-  <category name="Mali-T6xx L2 and MMU" per_cpu="no">
-    <event counter="ARM_Mali-T6xx_L2_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
-    <event counter="ARM_Mali-T6xx_L2_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
-    <event counter="ARM_Mali-T6xx_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
-    <event counter="ARM_Mali-T6xx_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
-    <event counter="ARM_Mali-T6xx_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
-    <event counter="ARM_Mali-T6xx_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
-    <event counter="ARM_Mali-T6xx_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
-    <event counter="ARM_Mali-T6xx_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
-  </category>
diff --git a/tools/gator/daemon/events-Mali-T72x_hw.xml b/tools/gator/daemon/events-Mali-T72x_hw.xml
new file mode 100644
index 000000000000..5587534770c8
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-T72x_hw.xml
@@ -0,0 +1,95 @@
+
+  <category name="Mali Job Manager" per_cpu="no">
+
+    <event counter="ARM_Mali-T72x_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active"/>
+    <event counter="ARM_Mali-T72x_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending"/>
+    <event counter="ARM_Mali-T72x_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active"/>
+    <event counter="ARM_Mali-T72x_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active"/>
+    <event counter="ARM_Mali-T72x_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active"/>
+
+    <event counter="ARM_Mali-T72x_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-T72x_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-T72x_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-T72x_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-T72x_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+    <event counter="ARM_Mali-T72x_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+
+  </category>
+
+  <category name="Mali Tiler" per_cpu="no">
+
+    <event counter="ARM_Mali-T72x_TI_ACTIVE" title="Mali Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active"/>
+
+    <event counter="ARM_Mali-T72x_TI_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-T72x_TI_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-T72x_TI_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-T72x_TI_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-T72x_TI_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+
+    <event counter="ARM_Mali-T72x_TI_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-T72x_TI_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-T72x_TI_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-T72x_TI_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-T72x_TI_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+
+  </category>
+
+  <category name="Mali Shader Core" per_cpu="no">
+
+    <event counter="ARM_Mali-T72x_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active"/>
+    <event counter="ARM_Mali-T72x_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-T72x_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-T72x_FRAG_CYCLES_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+
+    <event counter="ARM_Mali-T72x_FRAG_THREADS" title="Mali Fragment Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-T72x_FRAG_DUMMY_THREADS" title="Mali Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-T72x_FRAG_THREADS_LZS_TEST" title="Mali Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-T72x_FRAG_THREADS_LZS_KILLED" title="Mali Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test"/>
+
+    <event counter="ARM_Mali-T72x_COMPUTE_TASKS" title="Mali Compute Tasks" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-T72x_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads" description="Number of compute threads started"/>
+
+    <event counter="ARM_Mali-T72x_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-T72x_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+
+    <event counter="ARM_Mali-T72x_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-T72x_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-T72x_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+
+    <event counter="ARM_Mali-T72x_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-T72x_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+
+    <event counter="ARM_Mali-T72x_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of batched instructions executed by the A-pipe"/>
+
+    <event counter="ARM_Mali-T72x_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-T72x_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+
+    <event counter="ARM_Mali-T72x_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-T72x_TEX_ISSUES" title="Mali Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation"/>
+
+    <event counter="ARM_Mali-T72x_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+    <event counter="ARM_Mali-T72x_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+
+  </category>
+
+  <category name="Mali L2 Cache" per_cpu="no">
+
+    <event counter="ARM_Mali-T72x_L2_EXT_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-T72x_L2_EXT_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-T72x_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-T72x_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T72x_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-T72x_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T72x_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T72x_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T72x_L2_READ_LOOKUP" title="Mali L2 Cache" name="L2 read lookups" description="Number of reads into the L2 cache"/>
+    <event counter="ARM_Mali-T72x_L2_WRITE_LOOKUP" title="Mali L2 Cache" name="L2 write lookups" description="Number of writes into the L2 cache"/>
+
+  </category>
diff --git a/tools/gator/daemon/events-Mali-T76x_hw.xml b/tools/gator/daemon/events-Mali-T76x_hw.xml
new file mode 100644
index 000000000000..be74c5a42624
--- /dev/null
+++ b/tools/gator/daemon/events-Mali-T76x_hw.xml
@@ -0,0 +1,108 @@
+
+  <category name="Mali Job Manager" per_cpu="no">
+
+    <event counter="ARM_Mali-T76x_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active"/>
+    <event counter="ARM_Mali-T76x_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending"/>
+    <event counter="ARM_Mali-T76x_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active"/>
+    <event counter="ARM_Mali-T76x_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active"/>
+    <event counter="ARM_Mali-T76x_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active"/>
+
+    <event counter="ARM_Mali-T76x_JS0_JOBS" title="Mali Job Manager Work" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-T76x_JS0_TASKS" title="Mali Job Manager Work" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-T76x_JS1_JOBS" title="Mali Job Manager Work" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-T76x_JS1_TASKS" title="Mali Job Manager Work" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-T76x_JS2_TASKS" title="Mali Job Manager Work" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+    <event counter="ARM_Mali-T76x_JS2_JOBS" title="Mali Job Manager Work" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+
+  </category>
+
+  <category name="Mali Tiler" per_cpu="no">
+
+    <event counter="ARM_Mali-T76x_TI_ACTIVE" title="Mali Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active"/>
+
+    <event counter="ARM_Mali-T76x_TI_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-T76x_TI_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-T76x_TI_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-T76x_TI_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-T76x_TI_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+
+    <event counter="ARM_Mali-T76x_TI_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-T76x_TI_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-T76x_TI_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-T76x_TI_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-T76x_TI_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+
+    <event counter="ARM_Mali-T76x_TI_LEVEL0" title="Mali Tiler Hierarchy" name="L0 prims" description="Number of primitives in hierarchy level 0"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL1" title="Mali Tiler Hierarchy" name="L1 prims" description="Number of primitives in hierarchy level 1"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL2" title="Mali Tiler Hierarchy" name="L2 prims" description="Number of primitives in hierarchy level 2"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL3" title="Mali Tiler Hierarchy" name="L3 prims" description="Number of primitives in hierarchy level 3"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL4" title="Mali Tiler Hierarchy" name="L4 prims" description="Number of primitives in hierarchy level 4"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL5" title="Mali Tiler Hierarchy" name="L5 prims" description="Number of primitives in hierarchy level 5"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL6" title="Mali Tiler Hierarchy" name="L6 prims" description="Number of primitives in hierarchy level 6"/>
+    <event counter="ARM_Mali-T76x_TI_LEVEL7" title="Mali Tiler Hierarchy" name="L7 prims" description="Number of primitives in hierarchy level 7"/>
+
+  </category>
+
+  <category name="Mali Shader Core" per_cpu="no">
+
+    <event counter="ARM_Mali-T76x_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active"/>
+    <event counter="ARM_Mali-T76x_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-T76x_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-T76x_FRAG_CYCLES_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+    <event counter="ARM_Mali-T76x_FRAG_CYCLES_FPKQ_ACTIVE" title="Mali Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads"/>
+
+    <event counter="ARM_Mali-T76x_FRAG_THREADS" title="Mali Fragment Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-T76x_FRAG_DUMMY_THREADS" title="Mali Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-T76x_FRAG_THREADS_LZS_TEST" title="Mali Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-T76x_FRAG_THREADS_LZS_KILLED" title="Mali Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test"/>
+
+    <event counter="ARM_Mali-T76x_COMPUTE_TASKS" title="Mali Compute Tasks" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-T76x_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads" description="Number of compute threads started"/>
+
+    <event counter="ARM_Mali-T76x_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-T76x_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+
+    <event counter="ARM_Mali-T76x_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-T76x_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-T76x_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+
+    <event counter="ARM_Mali-T76x_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-T76x_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+
+    <event counter="ARM_Mali-T76x_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of instructions completed by the the A-pipe (normalized per pipeline)"/>
+
+    <event counter="ARM_Mali-T76x_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-T76x_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+
+    <event counter="ARM_Mali-T76x_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-T76x_TEX_ISSUES" title="Mali Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation"/>
+    <event counter="ARM_Mali-T76x_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
+
+    <event counter="ARM_Mali-T76x_LSC_READ_HITS" title="Mali Load/Store Cache" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_READ_MISSES" title="Mali Load/Store Cache" name="Read misses" description="Number of read misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_WRITE_HITS" title="Mali Load/Store Cache" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_WRITE_MISSES" title="Mali Load/Store Cache" name="Write misses" description="Number of write misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_ATOMIC_HITS" title="Mali Load/Store Cache" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_ATOMIC_MISSES" title="Mali Load/Store Cache" name="Atomic misses" description="Number of atomic misses in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_LINE_FETCHES" title="Mali Load/Store Cache" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_DIRTY_LINE" title="Mali Load/Store Cache" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+    <event counter="ARM_Mali-T76x_LSC_SNOOPS" title="Mali Load/Store Cache" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+
+  </category>
+
+  <category name="Mali L2 Cache" per_cpu="no">
+
+    <event counter="ARM_Mali-T76x_L2_EXT_WRITE_BEATS" title="Mali L2 Cache" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_READ_BEATS" title="Mali L2 Cache" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-T76x_L2_READ_SNOOP" title="Mali L2 Cache" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-T76x_L2_READ_HIT" title="Mali L2 Cache" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T76x_L2_WRITE_SNOOP" title="Mali L2 Cache" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-T76x_L2_WRITE_HIT" title="Mali L2 Cache" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_AR_STALL" title="Mali L2 Cache" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_W_STALL" title="Mali L2 Cache" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_R_BUF_FULL" title="Mali L2 Cache" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_RD_BUF_FULL" title="Mali L2 Cache" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer"/>
+    <event counter="ARM_Mali-T76x_L2_EXT_W_BUF_FULL" title="Mali L2 Cache" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer"/>
+    <event counter="ARM_Mali-T76x_L2_READ_LOOKUP" title="Mali L2 Cache" name="L2 read lookups" description="Number of reads into the L2 cache"/>
+    <event counter="ARM_Mali-T76x_L2_WRITE_LOOKUP" title="Mali L2 Cache" name="L2 write lookups" description="Number of writes into the L2 cache"/>
+  </category>
diff --git a/tools/gator/daemon/events-Mali-V500.xml b/tools/gator/daemon/events-Mali-V500.xml
index d2751e7239b5..89bc7f4734df 100644
--- a/tools/gator/daemon/events-Mali-V500.xml
+++ b/tools/gator/daemon/events-Mali-V500.xml
@@ -1,29 +1,30 @@
   <category name="Mali-V500">
-    <event counter="ARM_Mali-V500_cnt0" title="Mali Video Engine" name="Samples" class="absolute" description="The number of times we have taken a sample"/>
-    <event counter="ARM_Mali-V500_cnt1" title="Mali Video Engine" name="Queued input-buffers" class="absolute" description="The number of input-buffers that has been queued for consumption by the MVE"/>
-    <event counter="ARM_Mali-V500_cnt2" title="Mali Video Engine" name="Consumed input-buffers" class="absolute" description="The number of input-buffers that has been consumed by the MVE and returned to the application"/>
-    <event counter="ARM_Mali-V500_cnt3" title="Mali Video Engine" name="Queued output-buffers" class="absolute" description="The number of output-buffers that has been queued for usage by the MVE"/>
-    <event counter="ARM_Mali-V500_cnt4" title="Mali Video Engine" name="Consumed output-buffers" class="absolute" description="The number of output-buffers that has been consumed by the MVE and returned to the application"/>
-    <event counter="ARM_Mali-V500_cnt5" title="Mali Video Engine" name="Created Sessions" class="absolute" description="The number of created sessions throughout the lifetime of the process"/>
-    <event counter="ARM_Mali-V500_cnt6" title="Mali Video Engine" name="Active Sessions" description="The number of currently existing sessions"/>
-    <event counter="ARM_Mali-V500_cnt7" title="Mali Video Engine" name="Processed Frames" class="absolute" description="The number of processed frames. A processed frame is one where the encode or decode is complete for that particular frame. Frames can be processed out of order so this is not the same as the number of output-buffers returned"/>
-    <event counter="ARM_Mali-V500_cnt8" title="Mali Video Engine" name="Input Flushes Requested" class="absolute" description="The number of requested flushes of the input queue"/>
-    <event counter="ARM_Mali-V500_cnt9" title="Mali Video Engine" name="Input Flushes Complete" class="absolute" description="The number of completed flushes of the input queue"/>
-    <event counter="ARM_Mali-V500_cnt10" title="Mali Video Engine" name="Output Flushes Requested" class="absolute" description="The number of requested flushes of the output queue"/>
-    <event counter="ARM_Mali-V500_cnt11" title="Mali Video Engine" name="Output Flushes Complete" class="absolute" description="The number of completed flushes of the output queue"/>
-    <event counter="ARM_Mali-V500_cnt12" title="Mali Video Engine" name="Queued Output Buffers (current)" description="The number of output-buffers that are currently queued for usage by the MVE"/>
-    <event counter="ARM_Mali-V500_cnt13" title="Mali Video Engine" name="Queued Input Buffers (current)" description="The number of input-buffers that are currently queued for consumption by the MVE"/>
-    <event counter="ARM_Mali-V500_cnt14" title="Mali Video Engine" name="Output Queue Flushes" description="The number of pending flushes for the MVE output-queue"/>
-    <event counter="ARM_Mali-V500_cnt15" title="Mali Video Engine" name="Input Queue Flushes" description="The number of pending flushes for the MVE input-queue"/>
-    <event counter="ARM_Mali-V500_cnt16" title="Mali Video Engine" name="Errors encountered" class="absolute" description="The number of errors encountered"/>
-    <event counter="ARM_Mali-V500_cnt17" title="Mali Video Engine" name="Bits consumed" class="absolute" description="The number of bits consumed during decode"/>
-    <event counter="ARM_Mali-V500_cnt18" title="Mali Video Engine" name="AFBC bandwidth" class="absolute" description="The amount of AFBC-encoded bytes read or written"/>
-    <event counter="ARM_Mali-V500_cnt19" title="Mali Video Engine" name="Bandwidth (read)" class="absolute" description="The amount of bytes read over the AXI bus"/>
-    <event counter="ARM_Mali-V500_cnt20" title="Mali Video Engine" name="Bandwidth (write)" class="absolute" description="The amount of bytes written over the AXI bus"/>
-    <event counter="ARM_Mali-V500_evn0" title="Mali Video Engine" name="Session created" description="Generated when a session has been created"/>
-    <event counter="ARM_Mali-V500_evn1" title="Mali Video Engine" name="Session destroyed" description="Generated when a session has been destroyed"/>
-    <event counter="ARM_Mali-V500_evn2" title="Mali Video Engine" name="Frame Processed" description="Generated when the MVE has finished processing a frame"/>
-    <event counter="ARM_Mali-V500_evn3" title="Mali Video Engine" name="Output buffer received" description="Generated when an an output buffer is returned to us from the MVE"/>
-    <event counter="ARM_Mali-V500_evn4" title="Mali Video Engine" name="Input buffer received" description="Generated when we an input buffer is returned to us from the MVE"/>
-    <!--event counter="ARM_Mali-V500_act" title="VPU" name="Activity" class="activity" activity1="Parsed" activity_color1="0x000000ff" activity2="Piped" activity_color2="0x0000ff00" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" cores="8" description="Mali-V500 Activity"/-->
+    <event counter="ARM_Mali-V500_cnt0" title="MVE-V500 Stats" name="Samples" class="absolute" description="The number of times we have taken a sample"/>
+    <event counter="ARM_Mali-V500_cnt1" title="MVE-V500 Input Totals" name="Queued input-buffers" class="absolute" description="The number of input-buffers that has been queued for consumption by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt2" title="MVE-V500 Input Totals" name="Consumed input-buffers" class="absolute" description="The number of input-buffers that has been consumed by the MVE and returned to the application"/>
+    <event counter="ARM_Mali-V500_cnt3" title="MVE-V500 Output Totals" name="Queued output-buffers" class="absolute" description="The number of output-buffers that has been queued for usage by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt4" title="MVE-V500 Output Totals" name="Consumed output-buffers" class="absolute" description="The number of output-buffers that has been consumed by the MVE and returned to the application"/>
+    <event counter="ARM_Mali-V500_cnt5" title="MVE-V500 Stats" name="Created Sessions" class="absolute" description="The number of created sessions throughout the lifetime of the process"/>
+    <event counter="ARM_Mali-V500_cnt6" title="MVE-V500 Sessions" name="Active Sessions" description="The number of currently existing sessions"/>
+    <event counter="ARM_Mali-V500_cnt7" title="MVE-V500 Stats" name="Processed Frames" class="absolute" description="The number of processed frames. A processed frame is one where the encode or decode is complete for that particular frame. Frames can be processed out of order so this is not the same as the number of output-buffers returned"/>
+    <event counter="ARM_Mali-V500_cnt8" title="MVE-V500 Input Totals" name="Input Flushes Requested" class="absolute" description="The number of requested flushes of the input queue"/>
+    <event counter="ARM_Mali-V500_cnt9" title="MVE-V500 Input Totals" name="Input Flushes Complete" class="absolute" description="The number of completed flushes of the input queue"/>
+    <event counter="ARM_Mali-V500_cnt10" title="MVE-V500 Output Totals" name="Output Flushes Requested" class="absolute" description="The number of requested flushes of the output queue"/>
+    <event counter="ARM_Mali-V500_cnt11" title="MVE-V500 Output Totals" name="Output Flushes Complete" class="absolute" description="The number of completed flushes of the output queue"/>
+    <event counter="ARM_Mali-V500_cnt12" title="MVE-V500 Output" name="Queued Output Buffers (current)" description="The number of output-buffers that are currently queued for usage by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt13" title="MVE-V500 Input" name="Queued Input Buffers (current)" description="The number of input-buffers that are currently queued for consumption by the MVE"/>
+    <event counter="ARM_Mali-V500_cnt14" title="MVE-V500 Output" name="Output Queue Flushes" description="The number of pending flushes for the MVE output-queue"/>
+    <event counter="ARM_Mali-V500_cnt15" title="MVE-V500 Input" name="Input Queue Flushes" description="The number of pending flushes for the MVE input-queue"/>
+    <event counter="ARM_Mali-V500_cnt16" title="MVE-V500 Stats" name="Errors encountered" class="absolute" description="The number of errors encountered"/>
+    <event counter="ARM_Mali-V500_cnt17" title="MVE-V500 Bandwidth" name="Bits consumed" class="absolute" description="The number of bits consumed during decode"/>
+    <event counter="ARM_Mali-V500_cnt18" title="MVE-V500 Bandwidth" name="AFBC bandwidth" class="absolute" description="The amount of AFBC-encoded bytes read or written"/>
+    <event counter="ARM_Mali-V500_cnt19" title="MVE-V500 Bandwidth" name="Bandwidth (read)" class="absolute" description="The amount of bytes read over the AXI bus"/>
+    <event counter="ARM_Mali-V500_cnt20" title="MVE-V500 Bandwidth" name="Bandwidth (write)" class="absolute" description="The amount of bytes written over the AXI bus"/>
+    <event counter="ARM_Mali-V500_evn0" title="MVE-V500 Sessions" name="Session created" description="Generated when a session has been created"/>
+    <event counter="ARM_Mali-V500_evn1" title="MVE-V500 Sessions" name="Session destroyed" description="Generated when a session has been destroyed"/>
+    <event counter="ARM_Mali-V500_evn2" title="MVE-V500 Frames" name="Frame Processed" description="Generated when the MVE has finished processing a frame"/>
+    <event counter="ARM_Mali-V500_evn3" title="MVE-V500 Output" name="Output buffer received" description="Generated when an an output buffer is returned to us from the MVE"/>
+    <event counter="ARM_Mali-V500_evn4" title="MVE-V500 Input" name="Input buffer received" description="Generated when we an input buffer is returned to us from the MVE"/>
+    <event counter="ARM_Mali-V500_act0" title="MVE-V500 Parsed" name="Activity" class="activity" activity1="activity" activity_color1="0x000000ff" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" cores="8" description="Mali-V500 Activity"/>
+    <event counter="ARM_Mali-V500_act1" title="MVE-V500 Piped" name="Activity" class="activity" activity1="activity" activity_color1="0x0000ff00" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" cores="8" description="Mali-V500 Activity"/>
   </category>
diff --git a/tools/gator/daemon/events-ftrace.xml b/tools/gator/daemon/events-ftrace.xml
new file mode 100644
index 000000000000..33ab7aab2196
--- /dev/null
+++ b/tools/gator/daemon/events-ftrace.xml
@@ -0,0 +1,7 @@
+  <category name="Ftrace">
+    <!-- counter attribute must start with ftrace_ and be unique -->
+    <!-- regex item in () is the value shown -->
+    <!--
+    <event counter="ftrace_trace_marker_numbers" title="ftrace" name="trace_marker" class="absolute" regex="([0-9]+)" description="Numbers written to /sys/kernel/debug/tracing/trace_marker, ex: echo 42 > /sys/kernel/debug/tracing/trace_marker"/>
+    -->
+  </category>
diff --git a/tools/gator/daemon/main.cpp b/tools/gator/daemon/main.cpp
index 2998c7012221..fbce1e15d0d0 100644
--- a/tools/gator/daemon/main.cpp
+++ b/tools/gator/daemon/main.cpp
@@ -19,16 +19,15 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#include "CCNDriver.h"
 #include "Child.h"
 #include "EventsXML.h"
-#include "KMod.h"
 #include "Logging.h"
 #include "Monitor.h"
 #include "OlySocket.h"
 #include "OlyUtility.h"
 #include "SessionData.h"
-
-#define DEBUG false
+#include "Setup.h"
 
 extern Child* child;
 static int shutdownFilesystem();
@@ -40,8 +39,9 @@ static bool driverRunningAtStart = false;
 static bool driverMountedAtStart = false;
 
 struct cmdline_t {
+	char *module;
 	int port;
-	char* module;
+	bool update;
 };
 
 #define DEFAULT_PORT 8080
@@ -105,7 +105,6 @@ static void child_exit(int) {
 	}
 }
 
-static const int UDP_ANS_PORT = 30000;
 static const int UDP_REQ_PORT = 30001;
 
 typedef struct {
@@ -125,11 +124,10 @@ static const char DST_REQ[] = { 'D', 'S', 'T', '_', 'R', 'E', 'Q', ' ', 0, 0, 0,
 
 class UdpListener {
 public:
-	UdpListener() : mDstAns(), mReq(-1), mAns(-1) {}
+	UdpListener() : mDstAns(), mReq(-1) {}
 
 	void setup(int port) {
 		mReq = udpPort(UDP_REQ_PORT);
-		mAns = udpPort(UDP_ANS_PORT);
 
 		// Format the answer buffer
 		memset(&mDstAns, 0, sizeof(mDstAns));
@@ -161,16 +159,13 @@ public:
 			logg->logError(__FILE__, __LINE__, "recvfrom failed");
 			handleException();
 		} else if ((read == 12) && (memcmp(buf, DST_REQ, sizeof(DST_REQ)) == 0)) {
-			if (sendto(mAns, &mDstAns, sizeof(mDstAns), 0, (struct sockaddr *)&sockaddr, addrlen) != sizeof(mDstAns)) {
-				logg->logError(__FILE__, __LINE__, "sendto failed");
-				handleException();
-			}
+			// Don't care if sendto fails - gatord shouldn't exit because of it and Streamline will retry
+			sendto(mReq, &mDstAns, sizeof(mDstAns), 0, (struct sockaddr *)&sockaddr, addrlen);
 		}
 	}
 
 	void close() {
 		::close(mReq);
-		::close(mAns);
 	}
 
 private:
@@ -180,10 +175,10 @@ private:
 		int on;
 		int family = AF_INET6;
 
-		s = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP);
+		s = socket_cloexec(AF_INET6, SOCK_DGRAM, IPPROTO_UDP);
 		if (s == -1) {
 			family = AF_INET;
-			s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+			s = socket_cloexec(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
 			if (s == -1) {
 				logg->logError(__FILE__, __LINE__, "socket failed");
 				handleException();
@@ -210,7 +205,6 @@ private:
 
 	RVIConfigureInfo mDstAns;
 	int mReq;
-	int mAns;
 };
 
 static UdpListener udpListener;
@@ -233,7 +227,7 @@ static int mountGatorFS() {
 
 static bool init_module (const char * const location) {
 	bool ret(false);
-	const int fd = open(location, O_RDONLY);
+	const int fd = open(location, O_RDONLY | O_CLOEXEC);
 	if (fd >= 0) {
 		struct stat st;
 		if (fstat(fd, &st) == 0) {
@@ -332,10 +326,26 @@ static int shutdownFilesystem() {
 	return 0; // success
 }
 
+static const char OPTSTRING[] = "hvudap:s:c:e:m:o:";
+
+static bool hasDebugFlag(int argc, char** argv) {
+	int c;
+
+	optind = 1;
+	while ((c = getopt(argc, argv, OPTSTRING)) != -1) {
+		if (c == 'd') {
+			return true;
+		}
+	}
+
+	return false;
+}
+
 static struct cmdline_t parseCommandLine(int argc, char** argv) {
 	struct cmdline_t cmdline;
 	cmdline.port = DEFAULT_PORT;
 	cmdline.module = NULL;
+	cmdline.update = false;
 	char version_string[256]; // arbitrary length to hold the version information
 	int c;
 
@@ -346,11 +356,15 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) {
 		snprintf(version_string, sizeof(version_string), "Streamline gatord development version %d", PROTOCOL_VERSION);
 	}
 
-	while ((c = getopt(argc, argv, "hvp:s:c:e:m:o:")) != -1) {
-		switch(c) {
+	optind = 1;
+	while ((c = getopt(argc, argv, OPTSTRING)) != -1) {
+		switch (c) {
 			case 'c':
 				gSessionData->mConfigurationXMLPath = optarg;
 				break;
+			case 'd':
+				// Already handled
+				break;
 			case 'e':
 				gSessionData->mEventsXMLPath = optarg;
 				break;
@@ -366,6 +380,12 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) {
 			case 'o':
 				gSessionData->mTargetPath = optarg;
 				break;
+			case 'u':
+				cmdline.update = true;
+				break;
+			case 'a':
+				gSessionData->mAllowCommands = true;
+				break;
 			case 'h':
 			case '?':
 				logg->logError(__FILE__, __LINE__,
@@ -375,9 +395,11 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) {
 					"-h              this help page\n"
 					"-m module       path and filename of gator.ko\n"
 					"-p port_number  port upon which the server listens; default is 8080\n"
-					"-s session_xml  path and filename of a session xml used for local capture\n"
+					"-s session_xml  path and filename of a session.xml used for local capture\n"
 					"-o apc_dir      path and name of the output for a local capture\n"
 					"-v              version information\n"
+					"-d              enable debug messages\n"
+					"-a              allow the user user to provide a command to run at the start of a capture"
 					, version_string);
 				handleException();
 				break;
@@ -407,7 +429,7 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) {
 	return cmdline;
 }
 
-void handleClient() {
+static void handleClient() {
 	OlySocket client(sock->acceptConnection());
 
 	int pid = fork();
@@ -452,12 +474,15 @@ int main(int argc, char** argv) {
 	//   e.g. it may not be the group leader when launched as 'sudo gatord'
 	setsid();
 
-	logg = new Logging(DEBUG);  // Set up global thread-safe logging
-	gSessionData = new SessionData(); // Global data class
-	util = new OlyUtility();	// Set up global utility class
+  // Set up global thread-safe logging
+	logg = new Logging(hasDebugFlag(argc, argv));
+	// Global data class
+	gSessionData = new SessionData();
+	// Set up global utility class
+	util = new OlyUtility();
 
 	// Initialize drivers
-	new KMod();
+	new CCNDriver();
 
 	prctl(PR_SET_NAME, (unsigned long)&"gatord-main", 0, 0, 0);
 	pthread_mutex_init(&numSessions_mutex, NULL);
@@ -474,6 +499,10 @@ int main(int argc, char** argv) {
 	// Parse the command line parameters
 	struct cmdline_t cmdline = parseCommandLine(argc, argv);
 
+	if (cmdline.update) {
+		return update(argv[0]);
+	}
+
 	// Verify root permissions
 	uid_t euid = geteuid();
 	if (euid) {
@@ -490,17 +519,18 @@ int main(int argc, char** argv) {
 				       "  >>> gator.ko should be co-located with gatord in the same directory\n"
 				       "  >>> OR insmod gator.ko prior to launching gatord\n"
 				       "  >>> OR specify the location of gator.ko on the command line\n"
-				       "  >>> OR run Linux 3.4 or later with perf (CONFIG_PERF_EVENTS and CONFIG_HW_PERF_EVENTS) and tracing (CONFIG_TRACING) support to collect data via userspace only");
+				       "  >>> OR run Linux 3.4 or later with perf (CONFIG_PERF_EVENTS and CONFIG_HW_PERF_EVENTS) and tracing (CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER) support to collect data via userspace only");
 			handleException();
 		}
 	}
 
-	gSessionData->hwmon.setup();
 	{
 		EventsXML eventsXML;
 		mxml_node_t *xml = eventsXML.getTree();
-		gSessionData->fsDriver.setup(xml);
-		gSessionData->maliVideo.setup(xml);
+		// Initialize all drivers
+		for (Driver *driver = Driver::getHead(); driver != NULL; driver = driver->getNext()) {
+			driver->readEvents(xml);
+		}
 		mxmlDelete(xml);
 	}
 
@@ -517,9 +547,14 @@ int main(int argc, char** argv) {
 		child->run();
 		delete child;
 	} else {
+		gSessionData->annotateListener.setup();
 		sock = new OlyServerSocket(cmdline.port);
 		udpListener.setup(cmdline.port);
-		if (!monitor.init() || !monitor.add(sock->getFd()) || !monitor.add(udpListener.getReq())) {
+		if (!monitor.init() ||
+				!monitor.add(sock->getFd()) ||
+				!monitor.add(udpListener.getReq()) ||
+				!monitor.add(gSessionData->annotateListener.getFd()) ||
+				false) {
 			logg->logError(__FILE__, __LINE__, "Monitor setup failed");
 			handleException();
 		}
@@ -537,6 +572,8 @@ int main(int argc, char** argv) {
 					handleClient();
 				} else if (events[i].data.fd == udpListener.getReq()) {
 					udpListener.handle();
+				} else if (events[i].data.fd == gSessionData->annotateListener.getFd()) {
+					gSessionData->annotateListener.handle();
 				}
 			}
 		}
diff --git a/tools/gator/daemon/mxml/config.h b/tools/gator/daemon/mxml/config.h
index 1f59ba34a474..ad6df1d7debe 100644
--- a/tools/gator/daemon/mxml/config.h
+++ b/tools/gator/daemon/mxml/config.h
@@ -1,10 +1,10 @@
 /* config.h.  Generated from config.h.in by configure.  */
 /*
- * "$Id: config.h.in 408 2010-09-19 05:26:46Z mike $"
+ * "$Id: config.h.in 451 2014-01-04 21:50:06Z msweet $"
  *
  * Configuration file for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2003-2010 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -12,7 +12,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -30,7 +30,7 @@
  * Version number...
  */
 
-#define MXML_VERSION "Mini-XML v2.7"
+#define MXML_VERSION "Mini-XML v2.8"
 
 
 /*
@@ -92,5 +92,5 @@ extern int	_mxml_vsnprintf(char *, size_t, const char *, va_list);
 #  endif /* !HAVE_VSNPRINTF */
 
 /*
- * End of "$Id: config.h.in 408 2010-09-19 05:26:46Z mike $".
+ * End of "$Id: config.h.in 451 2014-01-04 21:50:06Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-attr.c b/tools/gator/daemon/mxml/mxml-attr.c
index c9950f5fb732..8e89cc1474f8 100644
--- a/tools/gator/daemon/mxml/mxml-attr.c
+++ b/tools/gator/daemon/mxml/mxml-attr.c
@@ -1,9 +1,9 @@
 /*
- * "$Id: mxml-attr.c 408 2010-09-19 05:26:46Z mike $"
+ * "$Id: mxml-attr.c 451 2014-01-04 21:50:06Z msweet $"
  *
  * Attribute support code for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2003-2010 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -11,15 +11,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
- *
- * Contents:
- *
- *   mxmlElementDeleteAttr() - Delete an attribute.
- *   mxmlElementGetAttr()    - Get an attribute.
- *   mxmlElementSetAttr()    - Set an attribute.
- *   mxmlElementSetAttrf()   - Set an attribute with a formatted value.
- *   mxml_set_attr()         - Set or add an attribute name/value pair.
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -90,6 +82,9 @@ mxmlElementDeleteAttr(mxml_node_t *node,/* I - Element */
         memmove(attr, attr + 1, i * sizeof(mxml_attr_t));
 
       node->value.element.num_attrs --;
+
+      if (node->value.element.num_attrs == 0)
+        free(node->value.element.attrs);
       return;
     }
   }
@@ -315,5 +310,5 @@ mxml_set_attr(mxml_node_t *node,	/* I - Element node */
 
 
 /*
- * End of "$Id: mxml-attr.c 408 2010-09-19 05:26:46Z mike $".
+ * End of "$Id: mxml-attr.c 451 2014-01-04 21:50:06Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-entity.c b/tools/gator/daemon/mxml/mxml-entity.c
index c5c9f61f73c2..0d11df6a70bc 100644
--- a/tools/gator/daemon/mxml/mxml-entity.c
+++ b/tools/gator/daemon/mxml/mxml-entity.c
@@ -1,10 +1,10 @@
 /*
- * "$Id: mxml-entity.c 408 2010-09-19 05:26:46Z mike $"
+ * "$Id: mxml-entity.c 451 2014-01-04 21:50:06Z msweet $"
  *
  * Character entity support code for Mini-XML, a small XML-like
  * file parsing library.
  *
- * Copyright 2003-2010 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -12,18 +12,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
- *
- * Contents:
- *
- *   mxmlEntityAddCallback()    - Add a callback to convert entities to
- *                                Unicode.
- *   mxmlEntityGetName()        - Get the name that corresponds to the
- *                                character value.
- *   mxmlEntityGetValue()       - Get the character corresponding to a named
- *                                entity.
- *   mxmlEntityRemoveCallback() - Remove a callback.
- *   _mxml_entity_cb()          - Lookup standard (X)HTML entities.
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -456,5 +445,5 @@ _mxml_entity_cb(const char *name)	/* I - Entity name */
 
 
 /*
- * End of "$Id: mxml-entity.c 408 2010-09-19 05:26:46Z mike $".
+ * End of "$Id: mxml-entity.c 451 2014-01-04 21:50:06Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-file.c b/tools/gator/daemon/mxml/mxml-file.c
index 7860ee5f8370..3812c253fc3e 100644
--- a/tools/gator/daemon/mxml/mxml-file.c
+++ b/tools/gator/daemon/mxml/mxml-file.c
@@ -1,9 +1,9 @@
 /*
- * "$Id: mxml-file.c 438 2011-03-24 05:47:51Z mike $"
+ * "$Id: mxml-file.c 455 2014-01-05 03:28:03Z msweet $"
  *
  * File loading code for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2003-2011 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -11,44 +11,11 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
- *
- * Contents:
- *
- *   mxmlLoadFd()            - Load a file descriptor into an XML node tree.
- *   mxmlLoadFile()          - Load a file into an XML node tree.
- *   mxmlLoadString()        - Load a string into an XML node tree.
- *   mxmlSaveAllocString()   - Save an XML tree to an allocated string.
- *   mxmlSaveFd()            - Save an XML tree to a file descriptor.
- *   mxmlSaveFile()          - Save an XML tree to a file.
- *   mxmlSaveString()        - Save an XML node tree to a string.
- *   mxmlSAXLoadFd()         - Load a file descriptor into an XML node tree
- *                             using a SAX callback.
- *   mxmlSAXLoadFile()       - Load a file into an XML node tree
- *                             using a SAX callback.
- *   mxmlSAXLoadString()     - Load a string into an XML node tree
- *                             using a SAX callback.
- *   mxmlSetCustomHandlers() - Set the handling functions for custom data.
- *   mxmlSetErrorCallback()  - Set the error message callback.
- *   mxmlSetWrapMargin()     - Set the wrap margin when saving XML data.
- *   mxml_add_char()         - Add a character to a buffer, expanding as needed.
- *   mxml_fd_getc()          - Read a character from a file descriptor.
- *   mxml_fd_putc()          - Write a character to a file descriptor.
- *   mxml_fd_read()          - Read a buffer of data from a file descriptor.
- *   mxml_fd_write()         - Write a buffer of data to a file descriptor.
- *   mxml_file_getc()        - Get a character from a file.
- *   mxml_file_putc()        - Write a character to a file.
- *   mxml_get_entity()       - Get the character corresponding to an entity...
- *   mxml_load_data()        - Load data into an XML node tree.
- *   mxml_parse_element()    - Parse an element for any attributes...
- *   mxml_string_getc()      - Get a character from a string.
- *   mxml_string_putc()      - Write a character to a string.
- *   mxml_write_name()       - Write a name string.
- *   mxml_write_node()       - Save an XML node to a file.
- *   mxml_write_string()     - Write a string, escaping & and < as needed.
- *   mxml_write_ws()         - Do whitespace callback...
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
+/*** This file modified by ARM on 25 Aug 2014 to avoid pointer overflow when checking if the write position is beyond the end of the buffer in mxmlSaveString and mxml_string_putc ***/
+
 /*
  * Include necessary headers...
  */
@@ -128,7 +95,7 @@ static int		mxml_write_node(mxml_node_t *node, void *p,
 					_mxml_global_t *global);
 static int		mxml_write_string(const char *s, void *p,
 					  _mxml_putc_cb_t putc_cb);
-static int		mxml_write_ws(mxml_node_t *node, void *p, 
+static int		mxml_write_ws(mxml_node_t *node, void *p,
 			              mxml_save_cb_t cb, int ws,
 				      int col, _mxml_putc_cb_t putc_cb);
 
@@ -400,7 +367,7 @@ mxmlSaveString(mxml_node_t    *node,	/* I - Node to write */
                mxml_save_cb_t cb)	/* I - Whitespace callback or MXML_NO_CALLBACK */
 {
   int	col;				/* Final column */
-  char	*ptr[2];			/* Pointers for putc_cb */
+  char	*ptr[3];			/* Pointers for putc_cb */
   _mxml_global_t *global = _mxml_global();
 					/* Global data */
 
@@ -411,6 +378,7 @@ mxmlSaveString(mxml_node_t    *node,	/* I - Node to write */
 
   ptr[0] = buffer;
   ptr[1] = buffer + bufsize;
+  ptr[2] = 0;
 
   if ((col = mxml_write_node(node, ptr, cb, 0, mxml_string_putc, global)) < 0)
     return (-1);
@@ -422,7 +390,7 @@ mxmlSaveString(mxml_node_t    *node,	/* I - Node to write */
   * Nul-terminate the buffer...
   */
 
-  if (ptr[0] >= ptr[1])
+  if (ptr[2] != 0)
     buffer[bufsize - 1] = '\0';
   else
     ptr[0][0] = '\0';
@@ -567,7 +535,7 @@ mxmlSAXLoadString(
  *
  * The save function accepts a node pointer and must return a malloc'd
  * string on success and NULL on error.
- * 
+ *
  */
 
 void
@@ -756,7 +724,7 @@ mxml_fd_getc(void *p,			/* I  - File descriptor buffer */
 	      return (EOF);
 
 	  ch = *(buf->current)++;
-          
+
 	  if (ch != 0xff)
 	    return (EOF);
 
@@ -775,7 +743,7 @@ mxml_fd_getc(void *p,			/* I  - File descriptor buffer */
 	      return (EOF);
 
 	  ch = *(buf->current)++;
-          
+
 	  if (ch != 0xfe)
 	    return (EOF);
 
@@ -1287,8 +1255,8 @@ mxml_file_getc(void *p,			/* I  - Pointer to file */
 	  * Multi-word UTF-16 char...
 	  */
 
-          int lch = (getc(fp) << 8);
-          lch |= getc(fp);
+          int lch = getc(fp);
+          lch = (lch << 8) | getc(fp);
 
           if (lch < 0xdc00 || lch >= 0xdfff)
 	    return (EOF);
@@ -1317,7 +1285,7 @@ mxml_file_getc(void *p,			/* I  - Pointer to file */
 	  */
 
           int lch = getc(fp);
-		  lch |= (getc(fp) << 8);
+          lch |= (getc(fp) << 8);
 
           if (lch < 0xdc00 || lch >= 0xdfff)
 	    return (EOF);
@@ -1463,8 +1431,10 @@ mxml_load_data(
 
   if (cb && parent)
     type = (*cb)(parent);
-  else
+  else if (parent)
     type = MXML_TEXT;
+  else
+    type = MXML_IGNORE;
 
   while ((ch = (*getc_cb)(p, &encoding)) != EOF)
   {
@@ -1518,7 +1488,7 @@ mxml_load_data(
         default : /* Ignore... */
 	    node = NULL;
 	    break;
-      }	  
+      }
 
       if (*bufptr)
       {
@@ -1661,9 +1631,9 @@ mxml_load_data(
 	  * There can only be one root element!
 	  */
 
-	  mxml_error("<%s> cannot be a second root node after <%s>", 
+	  mxml_error("<%s> cannot be a second root node after <%s>",
 	             buffer, first->value.element.name);
-          goto error; 		     
+          goto error;
 	}
 
 	if ((node = mxmlNewElement(parent, buffer)) == NULL)
@@ -1729,9 +1699,9 @@ mxml_load_data(
 	  * There can only be one root element!
 	  */
 
-	  mxml_error("<%s> cannot be a second root node after <%s>", 
+	  mxml_error("<%s> cannot be a second root node after <%s>",
 	             buffer, first->value.element.name);
-          goto error; 		     
+          goto error;
 	}
 
 	if ((node = mxmlNewElement(parent, buffer)) == NULL)
@@ -1796,9 +1766,9 @@ mxml_load_data(
 	  * There can only be one root element!
 	  */
 
-	  mxml_error("<%s> cannot be a second root node after <%s>", 
+	  mxml_error("<%s> cannot be a second root node after <%s>",
 	             buffer, first->value.element.name);
-          goto error; 		     
+          goto error;
 	}
 
 	if ((node = mxmlNewElement(parent, buffer)) == NULL)
@@ -1882,9 +1852,9 @@ mxml_load_data(
 	  * There can only be one root element!
 	  */
 
-	  mxml_error("<%s> cannot be a second root node after <%s>", 
+	  mxml_error("<%s> cannot be a second root node after <%s>",
 	             buffer, first->value.element.name);
-          goto error; 		     
+          goto error;
 	}
 
 	if ((node = mxmlNewElement(parent, buffer)) == NULL)
@@ -1974,9 +1944,9 @@ mxml_load_data(
 	  * There can only be one root element!
 	  */
 
-	  mxml_error("<%s> cannot be a second root node after <%s>", 
+	  mxml_error("<%s> cannot be a second root node after <%s>",
 	             buffer, first->value.element.name);
-          goto error; 		     
+          goto error;
 	}
 
         if ((node = mxmlNewElement(parent, buffer)) == NULL)
@@ -2076,7 +2046,7 @@ mxml_load_data(
   {
     node = parent;
 
-    while (parent->parent != top && parent->parent)
+    while (parent != top && parent->parent)
       parent = parent->parent;
 
     if (node != parent)
@@ -2286,7 +2256,7 @@ mxml_parse_element(
 	    if (ch == '&')
 	      if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
 	        goto error;
-	      
+
 	    if (mxml_add_char(ch, &ptr, &value, &valsize))
 	      goto error;
 	  }
@@ -2310,7 +2280,7 @@ mxml_parse_element(
 	    if (ch == '&')
 	      if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
 	        goto error;
-	      
+
 	    if (mxml_add_char(ch, &ptr, &value, &valsize))
 	      goto error;
 	  }
@@ -2643,8 +2613,12 @@ mxml_string_putc(int  ch,		/* I - Character to write */
 
   pp = (char **)p;
 
-  if (pp[0] < pp[1])
-    pp[0][0] = ch;
+  if (pp[2] == 0) {
+    if (pp[0] < pp[1])
+      pp[0][0] = ch;
+    else
+      pp[2] = (char *)1;
+  }
 
   pp[0] ++;
 
@@ -3078,5 +3052,5 @@ mxml_write_ws(mxml_node_t     *node,	/* I - Current node */
 
 
 /*
- * End of "$Id: mxml-file.c 438 2011-03-24 05:47:51Z mike $".
+ * End of "$Id: mxml-file.c 455 2014-01-05 03:28:03Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-get.c b/tools/gator/daemon/mxml/mxml-get.c
index a5356d57e186..40ed3d0839b4 100644
--- a/tools/gator/daemon/mxml/mxml-get.c
+++ b/tools/gator/daemon/mxml/mxml-get.c
@@ -1,9 +1,9 @@
 /*
- * "$Id: mxml-get.c 427 2011-01-03 02:03:29Z mike $"
+ * "$Id: mxml-get.c 451 2014-01-04 21:50:06Z msweet $"
  *
  * Node get functions for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2011 by Michael R Sweet.
+ * Copyright 2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -11,26 +11,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
- *
- * Contents:
- *
- *   mxmlGetCDATA()       - Get the value for a CDATA node.
- *   mxmlGetCustom()      - Get the value for a custom node.
- *   mxmlGetElement()     - Get the name for an element node.
- *   mxmlGetFirstChild()  - Get the first child of an element node.
- *   mxmlGetInteger()     - Get the integer value from the specified node or its
- *                          first child.
- *   mxmlGetLastChild()   - Get the last child of an element node.
- *   mxmlGetNextSibling() - Get the next node for the current parent.
- *   mxmlGetOpaque()      - Get an opaque string value for a node or its first
- *                          child.
- *   mxmlGetParent()      - Get the parent node.
- *   mxmlGetPrevSibling() - Get the previous node for the current parent.
- *   mxmlGetReal()        - Get the real value for a node or its first child.
- *   mxmlGetText()        - Get the text value for a node or its first child.
- *   mxmlGetType()        - Get the node type.
- *   mxmlGetUserData()    - Get the user data pointer for a node.
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -467,5 +448,5 @@ mxmlGetUserData(mxml_node_t *node)	/* I - Node to get */
 
 
 /*
- * End of "$Id: mxml-get.c 427 2011-01-03 02:03:29Z mike $".
+ * End of "$Id: mxml-get.c 451 2014-01-04 21:50:06Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-index.c b/tools/gator/daemon/mxml/mxml-index.c
index b6efc66f055c..10814390d3a0 100644
--- a/tools/gator/daemon/mxml/mxml-index.c
+++ b/tools/gator/daemon/mxml/mxml-index.c
@@ -1,9 +1,9 @@
 /*
- * "$Id: mxml-index.c 426 2011-01-01 23:42:17Z mike $"
+ * "$Id: mxml-index.c 451 2014-01-04 21:50:06Z msweet $"
  *
  * Index support code for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2003-2011 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -11,10 +11,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
- *
- * Contents:
- *
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -658,5 +655,5 @@ index_sort(mxml_index_t *ind,		/* I - Index to sort */
 
 
 /*
- * End of "$Id: mxml-index.c 426 2011-01-01 23:42:17Z mike $".
+ * End of "$Id: mxml-index.c 451 2014-01-04 21:50:06Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-node.c b/tools/gator/daemon/mxml/mxml-node.c
index 44af759f9de3..128cda1a4cf2 100644
--- a/tools/gator/daemon/mxml/mxml-node.c
+++ b/tools/gator/daemon/mxml/mxml-node.c
@@ -1,9 +1,9 @@
 /*
- * "$Id: mxml-node.c 436 2011-01-22 01:02:05Z mike $"
+ * "$Id: mxml-node.c 451 2014-01-04 21:50:06Z msweet $"
  *
  * Node support code for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2003-2011 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -11,26 +11,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
- *
- * Contents:
- *
- *   mxmlAdd()         - Add a node to a tree.
- *   mxmlDelete()      - Delete a node and all of its children.
- *   mxmlGetRefCount() - Get the current reference (use) count for a node.
- *   mxmlNewCDATA()    - Create a new CDATA node.
- *   mxmlNewCustom()   - Create a new custom data node.
- *   mxmlNewElement()  - Create a new element node.
- *   mxmlNewInteger()  - Create a new integer node.
- *   mxmlNewOpaque()   - Create a new opaque string.
- *   mxmlNewReal()     - Create a new real number node.
- *   mxmlNewText()     - Create a new text fragment node.
- *   mxmlNewTextf()    - Create a new formatted text fragment node.
- *   mxmlRemove()      - Remove a node from its parent.
- *   mxmlNewXML()      - Create a new XML document tree.
- *   mxmlRelease()     - Release a node.
- *   mxmlRetain()      - Retain a node.
- *   mxml_new()        - Create a new node.
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -529,7 +510,7 @@ mxmlNewReal(mxml_node_t *parent,	/* I - Parent node or MXML_NO_PARENT */
  * list. The constant MXML_NO_PARENT can be used to specify that the new
  * text node has no parent. The whitespace parameter is used to specify
  * whether leading whitespace is present before the node. The text
- * string must be nul-terminated and is copied into the new node.  
+ * string must be nul-terminated and is copied into the new node.
  */
 
 mxml_node_t *				/* O - New node */
@@ -573,7 +554,7 @@ mxmlNewText(mxml_node_t *parent,	/* I - Parent node or MXML_NO_PARENT */
  * list. The constant MXML_NO_PARENT can be used to specify that the new
  * text node has no parent. The whitespace parameter is used to specify
  * whether leading whitespace is present before the node. The format
- * string must be nul-terminated and is formatted into the new node.  
+ * string must be nul-terminated and is formatted into the new node.
  */
 
 mxml_node_t *				/* O - New node */
@@ -803,5 +784,5 @@ mxml_new(mxml_node_t *parent,		/* I - Parent node */
 
 
 /*
- * End of "$Id: mxml-node.c 436 2011-01-22 01:02:05Z mike $".
+ * End of "$Id: mxml-node.c 451 2014-01-04 21:50:06Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-private.c b/tools/gator/daemon/mxml/mxml-private.c
index 72f3e2320c7c..bec4bbfbf378 100644
--- a/tools/gator/daemon/mxml/mxml-private.c
+++ b/tools/gator/daemon/mxml/mxml-private.c
@@ -1,9 +1,9 @@
 /*
- * "$Id: mxml-private.c 422 2010-11-07 22:55:11Z mike $"
+ * "$Id: mxml-private.c 451 2014-01-04 21:50:06Z msweet $"
  *
  * Private functions for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2003-2010 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -11,15 +11,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
- *
- * Contents:
- *
- *   mxml_error()      - Display an error message.
- *   mxml_integer_cb() - Default callback for integer values.
- *   mxml_opaque_cb()  - Default callback for opaque values.
- *   mxml_real_cb()    - Default callback for real number values.
- *   _mxml_global()    - Get global data.
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -238,7 +230,7 @@ static DWORD _mxml_tls_index;		/* Index for global storage */
 /*
  * 'DllMain()' - Main entry for library.
  */
- 
+
 BOOL WINAPI				/* O - Success/failure */
 DllMain(HINSTANCE hinst,		/* I - DLL module handle */
         DWORD     reason,		/* I - Reason */
@@ -250,28 +242,28 @@ DllMain(HINSTANCE hinst,		/* I - DLL module handle */
   (void)hinst;
   (void)reserved;
 
-  switch (reason) 
-  { 
+  switch (reason)
+  {
     case DLL_PROCESS_ATTACH :		/* Called on library initialization */
-        if ((_mxml_tls_index = TlsAlloc()) == TLS_OUT_OF_INDEXES) 
-          return (FALSE); 
-        break; 
+        if ((_mxml_tls_index = TlsAlloc()) == TLS_OUT_OF_INDEXES)
+          return (FALSE);
+        break;
 
     case DLL_THREAD_DETACH :		/* Called when a thread terminates */
         if ((global = (_mxml_global_t *)TlsGetValue(_mxml_tls_index)) != NULL)
           free(global);
-        break; 
+        break;
 
     case DLL_PROCESS_DETACH :		/* Called when library is unloaded */
         if ((global = (_mxml_global_t *)TlsGetValue(_mxml_tls_index)) != NULL)
           free(global);
 
-        TlsFree(_mxml_tls_index); 
-        break; 
+        TlsFree(_mxml_tls_index);
+        break;
 
-    default: 
-        break; 
-  } 
+    default:
+        break;
+  }
 
   return (TRUE);
 }
@@ -295,7 +287,7 @@ _mxml_global(void)
     global->entity_cbs[0]  = _mxml_entity_cb;
     global->wrap           = 72;
 
-    TlsSetValue(_mxml_tls_index, (LPVOID)global); 
+    TlsSetValue(_mxml_tls_index, (LPVOID)global);
   }
 
   return (global);
@@ -327,5 +319,5 @@ _mxml_global(void)
 
 
 /*
- * End of "$Id: mxml-private.c 422 2010-11-07 22:55:11Z mike $".
+ * End of "$Id: mxml-private.c 451 2014-01-04 21:50:06Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-private.h b/tools/gator/daemon/mxml/mxml-private.h
index 8789e6c52cbd..c5e4e6b6f27a 100644
--- a/tools/gator/daemon/mxml/mxml-private.h
+++ b/tools/gator/daemon/mxml/mxml-private.h
@@ -1,9 +1,9 @@
 /*
- * "$Id: mxml-private.h 408 2010-09-19 05:26:46Z mike $"
+ * "$Id: mxml-private.h 451 2014-01-04 21:50:06Z msweet $"
  *
  * Private definitions for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2003-2010 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -11,7 +11,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -46,5 +46,5 @@ extern int		_mxml_entity_cb(const char *name);
 
 
 /*
- * End of "$Id: mxml-private.h 408 2010-09-19 05:26:46Z mike $".
+ * End of "$Id: mxml-private.h 451 2014-01-04 21:50:06Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-search.c b/tools/gator/daemon/mxml/mxml-search.c
index f975af1543ca..313a52f0ce2f 100644
--- a/tools/gator/daemon/mxml/mxml-search.c
+++ b/tools/gator/daemon/mxml/mxml-search.c
@@ -1,10 +1,10 @@
 /*
- * "$Id: mxml-search.c 427 2011-01-03 02:03:29Z mike $"
+ * "$Id: mxml-search.c 451 2014-01-04 21:50:06Z msweet $"
  *
  * Search/navigation functions for Mini-XML, a small XML-like file
  * parsing library.
  *
- * Copyright 2003-2010 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -12,14 +12,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
- *
- * Contents:
- *
- *   mxmlFindElement() - Find the named element.
- *   mxmlFindValue()   - Find a value with the given path.
- *   mxmlWalkNext()    - Walk to the next logical node in the tree.
- *   mxmlWalkPrev()    - Walk to the previous logical node in the tree.
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -126,7 +119,7 @@ mxmlFindElement(mxml_node_t *node,	/* I - Current node */
  *
  * The first child node of the found node is returned if the given node has
  * children and the first child is a value node.
- * 
+ *
  * @since Mini-XML 2.7@
  */
 
@@ -283,5 +276,5 @@ mxmlWalkPrev(mxml_node_t *node,		/* I - Current node */
 
 
 /*
- * End of "$Id: mxml-search.c 427 2011-01-03 02:03:29Z mike $".
+ * End of "$Id: mxml-search.c 451 2014-01-04 21:50:06Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-set.c b/tools/gator/daemon/mxml/mxml-set.c
index b0bd52790b2e..16d4bf1050dd 100644
--- a/tools/gator/daemon/mxml/mxml-set.c
+++ b/tools/gator/daemon/mxml/mxml-set.c
@@ -1,9 +1,9 @@
 /*
- * "$Id: mxml-set.c 441 2011-12-09 23:49:00Z mike $"
+ * "$Id: mxml-set.c 451 2014-01-04 21:50:06Z msweet $"
  *
  * Node set functions for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2003-2011 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -11,19 +11,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
- *
- * Contents:
- *
- *   mxmlSetCDATA()    - Set the element name of a CDATA node.
- *   mxmlSetCustom()   - Set the data and destructor of a custom data node.
- *   mxmlSetElement()  - Set the name of an element node.
- *   mxmlSetInteger()  - Set the value of an integer node.
- *   mxmlSetOpaque()   - Set the value of an opaque node.
- *   mxmlSetReal()     - Set the value of a real number node.
- *   mxmlSetText()     - Set the value of a text node.
- *   mxmlSetTextf()    - Set the value of a text node to a formatted string.
- *   mxmlSetUserData() - Set the user data pointer for a node.
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -345,5 +333,5 @@ mxmlSetUserData(mxml_node_t *node,	/* I - Node to set */
 
 
 /*
- * End of "$Id: mxml-set.c 441 2011-12-09 23:49:00Z mike $".
+ * End of "$Id: mxml-set.c 451 2014-01-04 21:50:06Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml-string.c b/tools/gator/daemon/mxml/mxml-string.c
index 6be42523f95c..9d5b58e6adb7 100644
--- a/tools/gator/daemon/mxml/mxml-string.c
+++ b/tools/gator/daemon/mxml/mxml-string.c
@@ -1,9 +1,9 @@
 /*
- * "$Id: mxml-string.c 424 2010-12-25 16:21:50Z mike $"
+ * "$Id: mxml-string.c 454 2014-01-05 03:25:07Z msweet $"
  *
  * String functions for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2003-2010 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -11,15 +11,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
- *
- * Contents:
- *
- *   _mxml_snprintf()  - Format a string.
- *   _mxml_strdup()    - Duplicate a string.
- *   _mxml_strdupf()   - Format and duplicate a string.
- *   _mxml_vsnprintf() - Format a string into a fixed size buffer.
- *   _mxml_vstrdupf()  - Format and duplicate a string.
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -38,7 +30,7 @@
 #  ifdef __va_copy
 #    define va_copy(dst,src) __va_copy(dst,src)
 #  else
-#    define va_copy(dst,src) memcpy(&dst, &src, sizeof(va_list))
+#    define va_copy(dst,src) memcpy(&dst, src, sizeof(va_list))
 #  endif /* __va_copy */
 #endif /* va_copy */
 
@@ -157,7 +149,8 @@ _mxml_vsnprintf(char       *buffer,	/* O - Output buffer */
 
       if (*format == '%')
       {
-        if (bufptr && bufptr < bufend) *bufptr++ = *format;
+        if (bufptr && bufptr < bufend)
+          *bufptr++ = *format;
         bytes ++;
         format ++;
 	continue;
@@ -472,5 +465,5 @@ _mxml_vstrdupf(const char *format,	/* I - Printf-style format string */
 
 
 /*
- * End of "$Id: mxml-string.c 424 2010-12-25 16:21:50Z mike $".
+ * End of "$Id: mxml-string.c 454 2014-01-05 03:25:07Z msweet $".
  */
diff --git a/tools/gator/daemon/mxml/mxml.h b/tools/gator/daemon/mxml/mxml.h
index 79c711f4c80f..bba5fd23a67b 100644
--- a/tools/gator/daemon/mxml/mxml.h
+++ b/tools/gator/daemon/mxml/mxml.h
@@ -1,9 +1,9 @@
 /*
- * "$Id: mxml.h 427 2011-01-03 02:03:29Z mike $"
+ * "$Id: mxml.h 451 2014-01-04 21:50:06Z msweet $"
  *
  * Header file for Mini-XML, a small XML-like file parsing library.
  *
- * Copyright 2003-2011 by Michael R Sweet.
+ * Copyright 2003-2014 by Michael R Sweet.
  *
  * These coded instructions, statements, and computer programs are the
  * property of Michael R Sweet and are protected by Federal copyright
@@ -11,7 +11,7 @@
  * which should have been included with this file.  If this file is
  * missing or damaged, see the license at:
  *
- *     http://www.minixml.org/
+ *     http://www.msweet.org/projects.php/Mini-XML
  */
 
 /*
@@ -36,6 +36,9 @@
  * Constants...
  */
 
+#  define MXML_MAJOR_VERSION	2	/* Major version number */
+#  define MXML_MINOR_VERSION	8	/* Minor version number */
+
 #  define MXML_TAB		8	/* Tabs every N columns */
 
 #  define MXML_NO_CALLBACK	0	/* Don't use a type callback */
@@ -93,7 +96,7 @@ typedef enum mxml_type_e		/**** The XML node type. ****/
 typedef void (*mxml_custom_destroy_cb_t)(void *);
 					/**** Custom data destructor ****/
 
-typedef void (*mxml_error_cb_t)(const char *);  
+typedef void (*mxml_error_cb_t)(const char *);
 					/**** Error callback function ****/
 
 typedef struct mxml_attr_s		/**** An XML element attribute value. @private@ ****/
@@ -161,7 +164,7 @@ typedef struct mxml_index_s mxml_index_t;
 typedef int (*mxml_custom_load_cb_t)(mxml_node_t *, const char *);
 					/**** Custom data load callback function ****/
 
-typedef char *(*mxml_custom_save_cb_t)(mxml_node_t *);  
+typedef char *(*mxml_custom_save_cb_t)(mxml_node_t *);
 					/**** Custom data save callback function ****/
 
 typedef int (*mxml_entity_cb_t)(const char *);
@@ -173,7 +176,7 @@ typedef mxml_type_t (*mxml_load_cb_t)(mxml_node_t *);
 typedef const char *(*mxml_save_cb_t)(mxml_node_t *, int);
 					/**** Save callback function ****/
 
-typedef void (*mxml_sax_cb_t)(mxml_node_t *, mxml_sax_event_t, void *);  
+typedef void (*mxml_sax_cb_t)(mxml_node_t *, mxml_sax_event_t, void *);
 					/**** SAX callback function ****/
 
 
@@ -325,5 +328,5 @@ extern mxml_type_t	mxml_real_cb(mxml_node_t *node);
 
 
 /*
- * End of "$Id: mxml.h 427 2011-01-03 02:03:29Z mike $".
+ * End of "$Id: mxml.h 451 2014-01-04 21:50:06Z msweet $".
  */

From 560679f5923495a049796b7501038911bd8eb2cb Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@linaro.org>
Date: Thu, 10 May 2012 17:35:03 +0100
Subject: [PATCH 0976/1185] gator: Add config for building the module in-tree

Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 drivers/Kconfig  | 2 ++
 drivers/Makefile | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/Kconfig b/drivers/Kconfig
index 9953a42809ec..d27feb5460f3 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -166,4 +166,6 @@ source "drivers/ipack/Kconfig"
 
 source "drivers/reset/Kconfig"
 
+source "drivers/gator/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 130abc1dfd65..092a62e79688 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -152,3 +152,5 @@ obj-$(CONFIG_IIO)		+= iio/
 obj-$(CONFIG_VME_BUS)		+= vme/
 obj-$(CONFIG_IPACK_BUS)		+= ipack/
 obj-$(CONFIG_NTB)		+= ntb/
+
+obj-$(CONFIG_GATOR)		+= gator/

From f141171d7d56f603a43742af594b1d5549c73f34 Mon Sep 17 00:00:00 2001
From: Dmitry Shmidt <dimitrysh@google.com>
Date: Fri, 31 Oct 2014 16:05:46 -0700
Subject: [PATCH 0977/1185] power: Add check_wakeup_reason() to verify wakeup
 source irq

Wakeup reason is set before driver resume handlers are called.
It is cleared before driver suspend handlers are called, on
PM_SUSPEND_PREPARE.

Change-Id: I04218c9b0c115a7877e8029c73e6679ff82e0aa4
Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>
---
 include/linux/wakeup_reason.h |  2 ++
 kernel/power/wakeup_reason.c  | 19 +++++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
index 5f095da2c977..ad8b76936c7f 100644
--- a/include/linux/wakeup_reason.h
+++ b/include/linux/wakeup_reason.h
@@ -22,4 +22,6 @@
 
 void log_wakeup_reason(int irq);
 void log_suspend_abort_reason(const char *fmt, ...);
+int check_wakeup_reason(int irq);
+
 #endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
index 2aacc34ef17c..085c99edca06 100644
--- a/kernel/power/wakeup_reason.c
+++ b/kernel/power/wakeup_reason.c
@@ -34,7 +34,7 @@ static int irqcount;
 static bool suspend_abort;
 static char abort_reason[MAX_SUSPEND_ABORT_LEN];
 static struct kobject *wakeup_reason;
-static spinlock_t resume_reason_lock;
+static DEFINE_SPINLOCK(resume_reason_lock);
 
 static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr,
 		char *buf)
@@ -95,6 +95,21 @@ void log_wakeup_reason(int irq)
 	spin_unlock(&resume_reason_lock);
 }
 
+int check_wakeup_reason(int irq)
+{
+	int irq_no;
+	int ret = false;
+
+	spin_lock(&resume_reason_lock);
+	for (irq_no = 0; irq_no < irqcount; irq_no++)
+		if (irq_list[irq_no] == irq) {
+			ret = true;
+			break;
+	}
+	spin_unlock(&resume_reason_lock);
+	return ret;
+}
+
 void log_suspend_abort_reason(const char *fmt, ...)
 {
 	va_list args;
@@ -141,7 +156,7 @@ static struct notifier_block wakeup_reason_pm_notifier_block = {
 int __init wakeup_reason_init(void)
 {
 	int retval;
-	spin_lock_init(&resume_reason_lock);
+
 	retval = register_pm_notifier(&wakeup_reason_pm_notifier_block);
 	if (retval)
 		printk(KERN_WARNING "[%s] failed to register PM notifier %d\n",

From 2ce95507d5ce6a5d3fd7993c35667a98c2f11f3b Mon Sep 17 00:00:00 2001
From: Erik Kline <ek@google.com>
Date: Tue, 28 Oct 2014 18:11:14 +0900
Subject: [PATCH 0978/1185] net: ipv6: Add a sysctl to make optimistic
 addresses useful candidates

Add a sysctl that causes an interface's optimistic addresses
to be considered equivalent to other non-deprecated addresses
for source address selection purposes.  Preferred addresses
will still take precedence over optimistic addresses, subject
to other ranking in the source address selection algorithm.

This is useful where different interfaces are connected to
different networks from different ISPs (e.g., a cell network
and a home wifi network).

The current behaviour complies with RFC 3484/6724, and it
makes sense if the host has only one interface, or has
multiple interfaces on the same network (same or cooperating
administrative domain(s), but not in the multiple distinct
networks case.

For example, if a mobile device has an IPv6 address on an LTE
network and then connects to IPv6-enabled wifi, while the wifi
IPv6 address is undergoing DAD, IPv6 connections will try use
the wifi default route with the LTE IPv6 address, and will get
stuck until they time out.

Also, because optimistic nodes can receive frames, issue
an RTM_NEWADDR as soon as DAD starts (with the IFA_F_OPTIMSTIC
flag appropriately set).  A second RTM_NEWADDR is sent if DAD
completes (the address flags have changed), otherwise an
RTM_DELADDR is sent.

Also: add an entry in ip-sysctl.txt for optimistic_dad.

[cherry-pick of net-next 7fd2561e4ebdd070ebba6d3326c4c5b13942323f]

Signed-off-by: Erik Kline <ek@google.com>
Acked-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Bug: 17769720
Change-Id: Ic7e50781c607e1f3a492d9ce7395946efb95c533
---
 Documentation/networking/ip-sysctl.txt | 13 ++++++++
 include/linux/ipv6.h                   |  1 +
 include/uapi/linux/ipv6.h              |  1 +
 net/ipv6/addrconf.c                    | 46 ++++++++++++++++++++++++--
 4 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 6e5c7c7333bd..74b49ba13b1f 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1333,6 +1333,19 @@ ndisc_notify - BOOLEAN
 	1 - Generate unsolicited neighbour advertisements when device is brought
 	    up or hardware address changes.
 
+optimistic_dad - BOOLEAN
+	Whether to perform Optimistic Duplicate Address Detection (RFC 4429).
+		0: disabled (default)
+		1: enabled
+
+use_optimistic - BOOLEAN
+	If enabled, do not classify optimistic addresses as deprecated during
+	source address selection.  Preferred addresses will still be chosen
+	before optimistic addresses, subject to other ranking in the source
+	address selection algorithm.
+		0: disabled (default)
+		1: enabled
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 867833ba6bd1..76b5114e9d82 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -41,6 +41,7 @@ struct ipv6_devconf {
 	__s32		accept_source_route;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	__s32		optimistic_dad;
+	__s32		use_optimistic;
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 	__s32		mc_forwarding;
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 4214fac1bf4f..e9d0f7efde3b 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -161,6 +161,7 @@ enum {
 	DEVCONF_FORCE_TLLAO,
 	DEVCONF_NDISC_NOTIFY,
 	DEVCONF_ACCEPT_RA_RT_TABLE,
+	DEVCONF_USE_OPTIMISTIC,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cec8cb4d292d..e1381119a6d8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1174,6 +1174,9 @@ enum {
 #endif
 	IPV6_SADDR_RULE_ORCHID,
 	IPV6_SADDR_RULE_PREFIX,
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	IPV6_SADDR_RULE_NOT_OPTIMISTIC,
+#endif
 	IPV6_SADDR_RULE_MAX
 };
 
@@ -1201,6 +1204,15 @@ static inline int ipv6_saddr_preferred(int type)
 	return 0;
 }
 
+static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic;
+#else
+	return false;
+#endif
+}
+
 static int ipv6_get_saddr_eval(struct net *net,
 			       struct ipv6_saddr_score *score,
 			       struct ipv6_saddr_dst *dst,
@@ -1261,10 +1273,16 @@ static int ipv6_get_saddr_eval(struct net *net,
 		score->scopedist = ret;
 		break;
 	case IPV6_SADDR_RULE_PREFERRED:
+	    {
 		/* Rule 3: Avoid deprecated and optimistic addresses */
+		u8 avoid = IFA_F_DEPRECATED;
+
+		if (!ipv6_use_optimistic_addr(score->ifa->idev))
+			avoid |= IFA_F_OPTIMISTIC;
 		ret = ipv6_saddr_preferred(score->addr_type) ||
-		      !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC));
+		      !(score->ifa->flags & avoid);
 		break;
+	    }
 #ifdef CONFIG_IPV6_MIP6
 	case IPV6_SADDR_RULE_HOA:
 	    {
@@ -1312,6 +1330,14 @@ static int ipv6_get_saddr_eval(struct net *net,
 			ret = score->ifa->prefix_len;
 		score->matchlen = ret;
 		break;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	case IPV6_SADDR_RULE_NOT_OPTIMISTIC:
+		/* Optimistic addresses still have lower precedence than other
+		 * preferred addresses.
+		 */
+		ret = !(score->ifa->flags & IFA_F_OPTIMISTIC);
+		break;
+#endif
 	default:
 		ret = 0;
 	}
@@ -3245,8 +3271,15 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp)
 	 * Optimistic nodes can start receiving
 	 * Frames right away
 	 */
-	if (ifp->flags & IFA_F_OPTIMISTIC)
+	if (ifp->flags & IFA_F_OPTIMISTIC) {
 		ip6_ins_rt(ifp->rt);
+		if (ipv6_use_optimistic_addr(idev)) {
+			/* Because optimistic nodes can use this address,
+			 * notify listeners. If DAD fails, RTM_DELADDR is sent.
+			 */
+			ipv6_ifa_notify(RTM_NEWADDR, ifp);
+		}
+	}
 
 	addrconf_dad_kick(ifp);
 out:
@@ -4192,6 +4225,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+	array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 	array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
@@ -4926,6 +4960,14 @@ static struct addrconf_sysctl_table
 			.proc_handler   = proc_dointvec,
 
 		},
+		{
+			.procname       = "use_optimistic",
+			.data           = &ipv6_devconf.use_optimistic,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec,
+
+		},
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 		{

From 9aed3d0f495e8d1c53c8041cd4f18cab81affd7b Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Mon, 16 Dec 2013 14:55:34 +0530
Subject: [PATCH 0979/1185] ALSA: compress: change the way sample rates are
 sent to kernel

The usage of SNDRV_RATES is not effective as we can have rates like 12000 or
some other ones used by decoders. This change the usage of this to use the raw
Hz values to be sent to kernel

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
(cherry picked from commit f0e9c08065dc31210fc4cf313c4ecaa088187dc5)
Signed-off-by: Yuchen Song <yuchens@nvidia.com>
Change-Id: Ia4c67405b9cf9aef9c641bce9b02a994939eae00
---
 include/uapi/sound/compress_params.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h
index 602dc6c45d1a..1114e380aecd 100644
--- a/include/uapi/sound/compress_params.h
+++ b/include/uapi/sound/compress_params.h
@@ -324,7 +324,7 @@ union snd_codec_options {
 
 /** struct snd_codec_desc - description of codec capabilities
  * @max_ch: Maximum number of audio channels
- * @sample_rates: Sampling rates in Hz, use SNDRV_PCM_RATE_xxx for this
+ * @sample_rates: Sampling rates in Hz, use values like 48000 for this
  * @bit_rate: Indexed array containing supported bit rates
  * @num_bitrates: Number of valid values in bit_rate array
  * @rate_control: value is specified by SND_RATECONTROLMODE defines.

From 083799f1d4a337e55ce9bdfaf40e4ba9ba37e0c8 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Sat, 4 Jan 2014 16:59:11 +0530
Subject: [PATCH 0980/1185] ALSA: compress: remove the sample rate check

commit f0e9c080 - "ALSA: compress: change the way sample rates are sent to
kernel" changed the way sample rates are sent. So now we don't need to check for
PCM_RATE_xxx in kernel

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
(cherry picked from commit 2aac06f787940543fb37bfdb982eb99431bc6094)
Signed-off-by: Yuchen Song <yuchens@nvidia.com>
Change-Id: I6448d844fb31097bf33e52c23a7e38d6b089ce69
---
 sound/core/compress_offload.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 99db892d7299..fe399f8c18f4 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -490,9 +490,6 @@ static int snd_compress_check_input(struct snd_compr_params *params)
 	if (params->codec.ch_in == 0 || params->codec.ch_out == 0)
 		return -EINVAL;
 
-	if (!(params->codec.sample_rate & SNDRV_PCM_RATE_8000_192000))
-		return -EINVAL;
-
 	return 0;
 }
 

From 55fc15e33d13dd6f230fd86c6c9f9837c30b8bc6 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Sat, 4 Jan 2014 16:59:12 +0530
Subject: [PATCH 0981/1185] ALSA: compress: update comment for sample rate in
 snd_codec

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
(cherry picked from commit d9afee6904caa7cf3c7f417f02e765db89d2b5dc)
Signed-off-by: Yuchen Song <yuchens@nvidia.com>
Change-Id: I7608d924613611222766a898a97c856a64c2eb68
---
 include/uapi/sound/compress_params.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h
index 1114e380aecd..8c23aebc82a5 100644
--- a/include/uapi/sound/compress_params.h
+++ b/include/uapi/sound/compress_params.h
@@ -364,7 +364,8 @@ struct snd_codec_desc {
  * @ch_out: Number of output channels. In case of contradiction between
  *		this field and the channelMode field, the channelMode field
  *		overrides.
- * @sample_rate: Audio sample rate of input data
+ * @sample_rate: Audio sample rate of input data in Hz, use values like 48000
+ *		for this.
  * @bit_rate: Bitrate of encoded data. May be ignored by decoders
  * @rate_control: Encoding rate control. See SND_RATECONTROLMODE defines.
  *               Encoders may rely on profiles for quality levels.

From 371e4108dd8b345b0144fa3ccc83d84d19918c20 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Sat, 4 Jan 2014 16:59:13 +0530
Subject: [PATCH 0982/1185] ALSA: compress: update struct snd_codec_desc for
 sample rate

Now that we don't use SNDRV_PCM_RATE_xxx bit fields for sample rate, we need to
change the description to an array for describing the sample rates supported by
the sink/source

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
(cherry picked from commit b8bab04829ab190f71921d4180bda438ba6124ae)
Signed-off-by: Yuchen Song <yuchens@nvidia.com>
Change-Id: I6c2fa5a5034ec749e9d7a71c49a1108af2416848
---
 include/uapi/sound/compress_params.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h
index 8c23aebc82a5..b62b24b7f834 100644
--- a/include/uapi/sound/compress_params.h
+++ b/include/uapi/sound/compress_params.h
@@ -57,6 +57,7 @@
 #define MAX_NUM_CODECS 32
 #define MAX_NUM_CODEC_DESCRIPTORS 32
 #define MAX_NUM_BITRATES 32
+#define MAX_NUM_SAMPLE_RATES 32
 
 /* Codecs are listed linearly to allow for extensibility */
 #define SND_AUDIOCODEC_PCM                   ((__u32) 0x00000001)
@@ -346,7 +347,7 @@ union snd_codec_options {
 
 struct snd_codec_desc {
 	__u32 max_ch;
-	__u32 sample_rates;
+	__u32 sample_rates[MAX_NUM_SAMPLE_RATES];
 	__u32 bit_rate[MAX_NUM_BITRATES];
 	__u32 num_bitrates;
 	__u32 rate_control;

From c5400a9083607af9852cf2c0ac0093a128cd3c18 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Tue, 7 Jan 2014 21:55:42 +0530
Subject: [PATCH 0983/1185] ALSA: compress: add num_sample_rates in
 snd_codec_desc

this gives ability to convey the valid values of supported rates in
sample_rates array

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
(cherry picked from commit 929559be6d2c494e25bb58b730da4a78c1459e7b)
Signed-off-by: Yuchen Song <yuchens@nvidia.com>
Change-Id: Icfbb6d272a70c0a94719613c00bac18c5a0e3f87
---
 include/uapi/sound/compress_params.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h
index b62b24b7f834..165e7059de75 100644
--- a/include/uapi/sound/compress_params.h
+++ b/include/uapi/sound/compress_params.h
@@ -326,6 +326,7 @@ union snd_codec_options {
 /** struct snd_codec_desc - description of codec capabilities
  * @max_ch: Maximum number of audio channels
  * @sample_rates: Sampling rates in Hz, use values like 48000 for this
+ * @num_sample_rates: Number of valid values in sample_rates array
  * @bit_rate: Indexed array containing supported bit rates
  * @num_bitrates: Number of valid values in bit_rate array
  * @rate_control: value is specified by SND_RATECONTROLMODE defines.
@@ -348,6 +349,7 @@ union snd_codec_options {
 struct snd_codec_desc {
 	__u32 max_ch;
 	__u32 sample_rates[MAX_NUM_SAMPLE_RATES];
+	__u32 num_sample_rates;
 	__u32 bit_rate[MAX_NUM_BITRATES];
 	__u32 num_bitrates;
 	__u32 rate_control;

From 57114e95e8c4f5035c993fc74bbe94cd9573f1bb Mon Sep 17 00:00:00 2001
From: Rom Lemarchand <romlem@android.com>
Date: Fri, 7 Nov 2014 12:48:17 -0800
Subject: [PATCH 0984/1185] cgroup: refactor allow_attach function into common
 code

move cpu_cgroup_allow_attach to a common subsys_cgroup_allow_attach.
This allows any process with CAP_SYS_NICE to move tasks across cgroups if
they use this function as their allow_attach handler.

Bug: 18260435
Change-Id: I6bb4933d07e889d0dc39e33b4e71320c34a2c90f
Signed-off-by: Rom Lemarchand <romlem@android.com>
---
 include/linux/cgroup.h | 16 ++++++++++++++++
 kernel/cgroup.c        | 19 +++++++++++++++++++
 kernel/sched/core.c    | 19 +------------------
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8f73d835d4d5..a2bcbd2e0f9a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -831,6 +831,17 @@ unsigned short css_id(struct cgroup_subsys_state *css);
 unsigned short css_depth(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
 
+/*
+ * Default Android check for whether the current process is allowed to move a
+ * task across cgroups, either because CAP_SYS_NICE is set or because the uid
+ * of the calling process is the same as the moved task or because we are
+ * running as root.
+ * Returns 0 if this is allowed, or -EACCES otherwise.
+ */
+int subsys_cgroup_allow_attach(struct cgroup *cgrp,
+			       struct cgroup_taskset *tset);
+
+
 #else /* !CONFIG_CGROUPS */
 
 static inline int cgroup_init_early(void) { return 0; }
@@ -854,6 +865,11 @@ static inline int cgroup_attach_task_all(struct task_struct *from,
 	return 0;
 }
 
+static inline int subsys_cgroup_allow_attach(struct cgroup *cgrp,
+					     struct cgroup_taskset *tset)
+{
+	return 0;
+}
 #endif /* !CONFIG_CGROUPS */
 
 #endif /* _LINUX_CGROUP_H */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1f5338773862..8dc7ec1de429 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2116,6 +2116,25 @@ static int cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 	return 0;
 }
 
+int subsys_cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+	const struct cred *cred = current_cred(), *tcred;
+	struct task_struct *task;
+
+	if (capable(CAP_SYS_NICE))
+		return 0;
+
+	cgroup_taskset_for_each(task, cgrp, tset) {
+		tcred = __task_cred(task);
+
+		if (current != task && cred->euid != tcred->uid &&
+		    cred->euid != tcred->suid)
+			return -EACCES;
+	}
+
+	return 0;
+}
+
 /*
  * Find the task_struct of the task to attach by vpid and pass it along to the
  * function to attach either it or all tasks in its threadgroup. Will lock
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 014040fa3d21..d5c5c9824511 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7716,23 +7716,6 @@ static void cpu_cgroup_css_offline(struct cgroup *cgrp)
 	sched_offline_group(tg);
 }
 
-static int
-cpu_cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
-{
-	const struct cred *cred = current_cred(), *tcred;
-	struct task_struct *task;
-
-	cgroup_taskset_for_each(task, cgrp, tset) {
-		tcred = __task_cred(task);
-
-		if ((current != task) && !capable(CAP_SYS_NICE) &&
-		    cred->euid != tcred->uid && cred->euid != tcred->suid)
-			return -EACCES;
-	}
-
-	return 0;
-}
-
 static int cpu_cgroup_can_attach(struct cgroup *cgrp,
 				 struct cgroup_taskset *tset)
 {
@@ -8092,7 +8075,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.css_offline	= cpu_cgroup_css_offline,
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,
-	.allow_attach	= cpu_cgroup_allow_attach,
+	.allow_attach	= subsys_cgroup_allow_attach,
 	.exit		= cpu_cgroup_exit,
 	.subsys_id	= cpu_cgroup_subsys_id,
 	.base_cftypes	= cpu_files,

From cce78bc02ff0ea2d21e88e3438d65272b898aa35 Mon Sep 17 00:00:00 2001
From: Rom Lemarchand <romlem@android.com>
Date: Fri, 7 Nov 2014 09:42:40 -0800
Subject: [PATCH 0985/1185] memcg: add permission check

Use the 'allow_attach' handler for the 'mem' cgroup to allow
non-root processes to add arbitrary processes to a 'mem' cgroup
if it has the CAP_SYS_NICE capability set.

Bug: 18260435
Change-Id: If7d37bf90c1544024c4db53351adba6a64966250
Signed-off-by: Rom Lemarchand <romlem@android.com>
---
 mm/memcontrol.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 194721839cf5..338d62a05200 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6753,6 +6753,12 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup,
 	return ret;
 }
 
+static int mem_cgroup_allow_attach(struct cgroup *cgroup,
+				   struct cgroup_taskset *tset)
+{
+	return subsys_cgroup_allow_attach(cgroup, tset);
+}
+
 static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
 				     struct cgroup_taskset *tset)
 {
@@ -6921,6 +6927,11 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup,
 {
 	return 0;
 }
+static int mem_cgroup_allow_attach(struct cgroup *cgroup,
+				   struct cgroup_taskset *tset)
+{
+	return 0;
+}
 static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
 				     struct cgroup_taskset *tset)
 {
@@ -6956,6 +6967,7 @@ struct cgroup_subsys mem_cgroup_subsys = {
 	.can_attach = mem_cgroup_can_attach,
 	.cancel_attach = mem_cgroup_cancel_attach,
 	.attach = mem_cgroup_move_task,
+	.allow_attach = mem_cgroup_allow_attach,
 	.bind = mem_cgroup_bind,
 	.base_cftypes = mem_cgroup_files,
 	.early_init = 0,

From 3ad3add775181f56f51ed14324ed4e7f1c9d3d1e Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Wed, 29 Oct 2014 23:06:58 +0100
Subject: [PATCH 0986/1185] tracing/syscalls: Ignore numbers outside
 NR_syscalls' range

commit 086ba77a6db00ed858ff07451bedee197df868c9 upstream.

ARM has some private syscalls (for example, set_tls(2)) which lie
outside the range of NR_syscalls.  If any of these are called while
syscall tracing is being performed, out-of-bounds array access will
occur in the ftrace and perf sys_{enter,exit} handlers.

 # trace-cmd record -e raw_syscalls:* true && trace-cmd report
 ...
 true-653   [000]   384.675777: sys_enter:            NR 192 (0, 1000, 3, 4000022, ffffffff, 0)
 true-653   [000]   384.675812: sys_exit:             NR 192 = 1995915264
 true-653   [000]   384.675971: sys_enter:            NR 983045 (76f74480, 76f74000, 76f74b28, 76f74480, 76f76f74, 1)
 true-653   [000]   384.675988: sys_exit:             NR 983045 = 0
 ...

 # trace-cmd record -e syscalls:* true
 [   17.289329] Unable to handle kernel paging request at virtual address aaaaaace
 [   17.289590] pgd = 9e71c000
 [   17.289696] [aaaaaace] *pgd=00000000
 [   17.289985] Internal error: Oops: 5 [#1] PREEMPT SMP ARM
 [   17.290169] Modules linked in:
 [   17.290391] CPU: 0 PID: 704 Comm: true Not tainted 3.18.0-rc2+ #21
 [   17.290585] task: 9f4dab00 ti: 9e710000 task.ti: 9e710000
 [   17.290747] PC is at ftrace_syscall_enter+0x48/0x1f8
 [   17.290866] LR is at syscall_trace_enter+0x124/0x184

Fix this by ignoring out-of-NR_syscalls-bounds syscall numbers.

Commit cd0980fc8add "tracing: Check invalid syscall nr while tracing syscalls"
added the check for less than zero, but it should have also checked
for greater than NR_syscalls.

Link: http://lkml.kernel.org/p/1414620418-29472-1-git-send-email-rabin@rab.in

Fixes: cd0980fc8add "tracing: Check invalid syscall nr while tracing syscalls"
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_syscalls.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 322e16461072..bdb9ee0af991 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -312,7 +312,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 	int size;
 
 	syscall_nr = trace_get_syscall_nr(current, regs);
-	if (syscall_nr < 0)
+	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
 		return;
 	if (!test_bit(syscall_nr, tr->enabled_enter_syscalls))
 		return;
@@ -354,7 +354,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
 	int syscall_nr;
 
 	syscall_nr = trace_get_syscall_nr(current, regs);
-	if (syscall_nr < 0)
+	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
 		return;
 	if (!test_bit(syscall_nr, tr->enabled_exit_syscalls))
 		return;
@@ -557,7 +557,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 	int size;
 
 	syscall_nr = trace_get_syscall_nr(current, regs);
-	if (syscall_nr < 0)
+	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
 		return;
 	if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
 		return;
@@ -633,7 +633,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 	int size;
 
 	syscall_nr = trace_get_syscall_nr(current, regs);
-	if (syscall_nr < 0)
+	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
 		return;
 	if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
 		return;

From 4449361e6feca3caa91749ad3f8333f9ac5502ab Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Mon, 13 Oct 2014 16:34:10 +0200
Subject: [PATCH 0987/1185] ipv4: fix nexthop attlen check in fib_nh_match

[ Upstream commit f76936d07c4eeb36d8dbb64ebd30ab46ff85d9f7 ]

fib_nh_match does not match nexthops correctly. Example:

ip route add 172.16.10/24 nexthop via 192.168.122.12 dev eth0 \
                          nexthop via 192.168.122.13 dev eth0
ip route del 172.16.10/24 nexthop via 192.168.122.14 dev eth0 \
                          nexthop via 192.168.122.15 dev eth0

Del command is successful and route is removed. After this patch
applied, the route is correctly matched and result is:
RTNETLINK answers: No such process

Please consider this for stable trees as well.

Fixes: 4e902c57417c4 ("[IPv4]: FIB configuration using struct fib_config")
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_semantics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 9c3979a50804..bc773a10dca6 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -533,7 +533,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 			return 1;
 
 		attrlen = rtnh_attrlen(rtnh);
-		if (attrlen < 0) {
+		if (attrlen > 0) {
 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);

From 9744aac69371ecac7ac6bef8a7a4a65d4e949dfa Mon Sep 17 00:00:00 2001
From: Ian Morgan <imorgan@primordial.ca>
Date: Sun, 19 Oct 2014 08:05:13 -0400
Subject: [PATCH 0988/1185] ax88179_178a: fix bonding failure

[ Upstream commit 95ff88688781db2f64042e69bd499e518bbb36e5 ]

The following patch fixes a bug which causes the ax88179_178a driver to be
incapable of being added to a bond.

When I brought up the issue with the bonding maintainers, they indicated
that the real problem was with the NIC driver which must return zero for
success (of setting the MAC address). I see that several other NIC drivers
follow that pattern by either simply always returing zero, or by passing
through a negative (error) result while rewriting any positive return code
to zero. With that same philisophy applied to the ax88179_178a driver, it
allows it to work correctly with the bonding driver.

I believe this is suitable for queuing in -stable, as it's a small, simple,
and obvious fix that corrects a defect with no other known workaround.

This patch is against vanilla 3.17(.0).

Signed-off-by: Ian Morgan <imorgan@primordial.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/ax88179_178a.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index d33c3ae2fcea..3b449c4ecf72 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -695,6 +695,7 @@ static int ax88179_set_mac_addr(struct net_device *net, void *p)
 {
 	struct usbnet *dev = netdev_priv(net);
 	struct sockaddr *addr = p;
+	int ret;
 
 	if (netif_running(net))
 		return -EBUSY;
@@ -704,8 +705,12 @@ static int ax88179_set_mac_addr(struct net_device *net, void *p)
 	memcpy(net->dev_addr, addr->sa_data, ETH_ALEN);
 
 	/* Set the MAC address */
-	return ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
+	ret = ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
 				 ETH_ALEN, net->dev_addr);
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static const struct net_device_ops ax88179_netdev_ops = {

From 9a8955adfba0821f5b354d400303436b6e4b2e13 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@parallels.com>
Date: Wed, 15 Oct 2014 16:24:02 +0400
Subject: [PATCH 0989/1185] ipv4: dst_entry leak in ip_send_unicast_reply()

[ Upstream commit 4062090e3e5caaf55bed4523a69f26c3265cc1d2 ]

ip_setup_cork() called inside ip_append_data() steals dst entry from rt to cork
and in case errors in __ip_append_data() nobody frees stolen dst entry

Fixes: 2e77d89b2fa8 ("net: avoid a pair of dst_hold()/dst_release() in ip_append_data()")
Signed-off-by: Vasily Averin <vvs@parallels.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_output.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5afbbbe03b0e..22fa05e041ea 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1481,6 +1481,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 	struct sk_buff *nskb;
 	struct sock *sk;
 	struct inet_sock *inet;
+	int err;
 
 	if (ip_options_echo(&replyopts.opt.opt, skb))
 		return;
@@ -1517,8 +1518,13 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 	sock_net_set(sk, net);
 	__skb_queue_head_init(&sk->sk_write_queue);
 	sk->sk_sndbuf = sysctl_wmem_default;
-	ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
-		       &ipc, &rt, MSG_DONTWAIT);
+	err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
+			     len, 0, &ipc, &rt, MSG_DONTWAIT);
+	if (unlikely(err)) {
+		ip_flush_pending_frames(sk);
+		goto out;
+	}
+
 	nskb = skb_peek(&sk->sk_write_queue);
 	if (nskb) {
 		if (arg->csumoffset >= 0)
@@ -1530,7 +1536,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 		skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
 		ip_push_pending_frames(sk, &fl4);
 	}
-
+out:
 	put_cpu_var(unicast_sock);
 
 	ip_rt_put(rt);

From f6ca437d7c4e250e454ed29e0e66d47fdc430d1d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 31 Oct 2014 03:10:31 +0000
Subject: [PATCH 0990/1185] drivers/net: macvtap and tun depend on INET

[ Upstream commit de11b0e8c569b96c2cf6a811e3805b7aeef498a3 ]

These drivers now call ipv6_proxy_select_ident(), which is defined
only if CONFIG_INET is enabled.  However, they have really depended
on CONFIG_INET for as long as they have allowed sending GSO packets
from userland.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr")
Fixes: b9fb9ee07e67 ("macvtap: add GSO/csum offload support")
Fixes: 5188cd44c55d ("drivers/net, ipv6: Select IPv6 fragment idents for virtio UFO packets")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 3835321b8cf3..3bc3ebc0882f 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -139,6 +139,7 @@ config MACVLAN
 config MACVTAP
 	tristate "MAC-VLAN based tap driver"
 	depends on MACVLAN
+	depends on INET
 	help
 	  This adds a specialized tap character device driver that is based
 	  on the MAC-VLAN network interface, called macvtap. A macvtap device
@@ -209,6 +210,7 @@ config RIONET_RX_SIZE
 
 config TUN
 	tristate "Universal TUN/TAP device driver support"
+	depends on INET
 	select CRC32
 	---help---
 	  TUN/TAP provides packet reception and transmission for user space

From 2d8912138519b31539cf5b6a4471d02dbcf2bd1a Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 23 Sep 2014 12:26:20 -0400
Subject: [PATCH 0991/1185] lockd: Try to reconnect if statd has moved

commit 173b3afceebe76fa2205b2c8808682d5b541fe3c upstream.

If rpc.statd is restarted, upcalls to monitor hosts can fail with
ECONNREFUSED.  In that case force a lookup of statd's new port and retry the
upcall.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/lockd/mon.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1812f026960c..6ae664b489af 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -159,6 +159,12 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
 
 	msg.rpc_proc = &clnt->cl_procinfo[proc];
 	status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
+	if (status == -ECONNREFUSED) {
+		dprintk("lockd:	NSM upcall RPC failed, status=%d, forcing rebind\n",
+				status);
+		rpc_force_rebind(clnt);
+		status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
+	}
 	if (status < 0)
 		dprintk("lockd: NSM upcall RPC failed, status=%d\n",
 				status);

From 8eef30a98711910beba01de2cc382d124b2cb7a9 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 21 Sep 2014 15:04:53 -0700
Subject: [PATCH 0992/1185] Revert "percpu: free percpu allocation info for
 uniprocessor system"

commit bb2e226b3bef596dd56be97df655d857b4603923 upstream.

This reverts commit 3189eddbcafc ("percpu: free percpu allocation info for
uniprocessor system").

The commit causes a hang with a crisv32 image. This may be an architecture
problem, but at least for now the revert is necessary to be able to boot a
crisv32 image.

Cc: Tejun Heo <tj@kernel.org>
Cc: Honggang Li <enjoymindful@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 3189eddbcafc ("percpu: free percpu allocation info for uniprocessor system")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/percpu.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index 9bc1bf914cc8..25e2ea52db82 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1910,8 +1910,6 @@ void __init setup_per_cpu_areas(void)
 
 	if (pcpu_setup_first_chunk(ai, fc) < 0)
 		panic("Failed to initialize percpu areas.");
-
-	pcpu_free_alloc_info(ai);
 }
 
 #endif	/* CONFIG_SMP */

From dd1e981a08ca5da000d7fb2c384a6b02e9b5b08b Mon Sep 17 00:00:00 2001
From: Scott Carter <ccscott@funsoft.com>
Date: Wed, 24 Sep 2014 18:13:09 -0700
Subject: [PATCH 0993/1185] pata_serverworks: disable 64-KB DMA transfers on
 Broadcom OSB4 IDE Controller

commit 37017ac6849e772e67dd187ba2fbd056c4afa533 upstream.

The Broadcom OSB4 IDE Controller (vendor and device IDs: 1166:0211)
does not support 64-KB DMA transfers.
Whenever a 64-KB DMA transfer is attempted,
the transfer fails and messages similar to the following
are written to the console log:

   [ 2431.851125] sr 0:0:0:0: [sr0] Unhandled sense code
   [ 2431.851139] sr 0:0:0:0: [sr0]  Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
   [ 2431.851152] sr 0:0:0:0: [sr0]  Sense Key : Hardware Error [current]
   [ 2431.851166] sr 0:0:0:0: [sr0]  Add. Sense: Logical unit communication time-out
   [ 2431.851182] sr 0:0:0:0: [sr0] CDB: Read(10): 28 00 00 00 76 f4 00 00 40 00
   [ 2431.851210] end_request: I/O error, dev sr0, sector 121808

When the libata and pata_serverworks modules
are recompiled with ATA_DEBUG and ATA_VERBOSE_DEBUG defined in libata.h,
the 64-KB transfer size in the scatter-gather list can be seen
in the console log:

   [ 2664.897267] sr 9:0:0:0: [sr0] Send:
   [ 2664.897274] 0xf63d85e0
   [ 2664.897283] sr 9:0:0:0: [sr0] CDB:
   [ 2664.897288] Read(10): 28 00 00 00 7f b4 00 00 40 00
   [ 2664.897319] buffer = 0xf6d6fbc0, bufflen = 131072, queuecommand 0xf81b7700
   [ 2664.897331] ata_scsi_dump_cdb: CDB (1:0,0,0) 28 00 00 00 7f b4 00 00 40
   [ 2664.897338] ata_scsi_translate: ENTER
   [ 2664.897345] ata_sg_setup: ENTER, ata1
   [ 2664.897356] ata_sg_setup: 3 sg elements mapped
   [ 2664.897364] ata_bmdma_fill_sg: PRD[0] = (0x66FD2000, 0xE000)
   [ 2664.897371] ata_bmdma_fill_sg: PRD[1] = (0x65000000, 0x10000)
   ------------------------------------------------------> =======
   [ 2664.897378] ata_bmdma_fill_sg: PRD[2] = (0x66A10000, 0x2000)
   [ 2664.897386] ata1: ata_dev_select: ENTER, device 0, wait 1
   [ 2664.897422] ata_sff_tf_load: feat 0x1 nsect 0x0 lba 0x0 0x0 0xFC
   [ 2664.897428] ata_sff_tf_load: device 0xA0
   [ 2664.897448] ata_sff_exec_command: ata1: cmd 0xA0
   [ 2664.897457] ata_scsi_translate: EXIT
   [ 2664.897462] leaving scsi_dispatch_cmnd()
   [ 2664.897497] Doing sr request, dev = sr0, block = 0
   [ 2664.897507] sr0 : reading 64/256 512 byte blocks.
   [ 2664.897553] ata_sff_hsm_move: ata1: protocol 7 task_state 1 (dev_stat 0x58)
   [ 2664.897560] atapi_send_cdb: send cdb
   [ 2666.910058] ata_bmdma_port_intr: ata1: host_stat 0x64
   [ 2666.910079] __ata_sff_port_intr: ata1: protocol 7 task_state 3
   [ 2666.910093] ata_sff_hsm_move: ata1: protocol 7 task_state 3 (dev_stat 0x51)
   [ 2666.910101] ata_sff_hsm_move: ata1: protocol 7 task_state 4 (dev_stat 0x51)
   [ 2666.910129] sr 9:0:0:0: [sr0] Done:
   [ 2666.910136] 0xf63d85e0 TIMEOUT

lspci shows that the driver used for the Broadcom OSB4 IDE Controller is
pata_serverworks:

   00:0f.1 IDE interface: Broadcom OSB4 IDE Controller (prog-if 8e [Master SecP SecO PriP])
           Flags: bus master, medium devsel, latency 64
           [virtual] Memory at 000001f0 (32-bit, non-prefetchable) [size=8]
           [virtual] Memory at 000003f0 (type 3, non-prefetchable) [size=1]
           I/O ports at 0170 [size=8]
           I/O ports at 0374 [size=4]
           I/O ports at 1440 [size=16]
           Kernel driver in use: pata_serverworks

The pata_serverworks driver supports five distinct device IDs,
one being the OSB4 and the other four belonging to the CSB series.
The CSB series appears to support 64-KB DMA transfers,
as tests on a machine with an SAI2 motherboard
containing a Broadcom CSB5 IDE Controller (vendor and device IDs: 1166:0212)
showed no problems with 64-KB DMA transfers.

This problem was first discovered when attempting to install openSUSE
from a DVD on a machine with an STL2 motherboard.
Using the pata_serverworks module,
older releases of openSUSE will not install at all due to the timeouts.
Releases of openSUSE prior to 11.3 can be installed by disabling
the pata_serverworks module using the brokenmodules boot parameter,
which causes the serverworks module to be used instead.
Recent releases of openSUSE (12.2 and later) include better error recovery and
will install, though very slowly.
On all openSUSE releases, the problem can be recreated
on a machine containing a Broadcom OSB4 IDE Controller
by mounting an install DVD and running a command similar to the following:

   find /mnt -type f -print | xargs cat > /dev/null

The patch below corrects the problem.
Similar to the other ATA drivers that do not support 64-KB DMA transfers,
the patch changes the ata_port_operations qc_prep vector to point to a routine
that breaks any 64-KB segment into two 32-KB segments and
changes the scsi_host_template sg_tablesize element to reduce by half
the number of scatter/gather elements allowed.
These two changes affect only the OSB4.

Signed-off-by: Scott Carter <ccscott@funsoft.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/pata_serverworks.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c
index f3febbce6c46..34c91ac3a814 100644
--- a/drivers/ata/pata_serverworks.c
+++ b/drivers/ata/pata_serverworks.c
@@ -252,12 +252,18 @@ static void serverworks_set_dmamode(struct ata_port *ap, struct ata_device *adev
 	pci_write_config_byte(pdev, 0x54, ultra_cfg);
 }
 
-static struct scsi_host_template serverworks_sht = {
+static struct scsi_host_template serverworks_osb4_sht = {
+	ATA_BMDMA_SHT(DRV_NAME),
+	.sg_tablesize	= LIBATA_DUMB_MAX_PRD,
+};
+
+static struct scsi_host_template serverworks_csb_sht = {
 	ATA_BMDMA_SHT(DRV_NAME),
 };
 
 static struct ata_port_operations serverworks_osb4_port_ops = {
 	.inherits	= &ata_bmdma_port_ops,
+	.qc_prep	= ata_bmdma_dumb_qc_prep,
 	.cable_detect	= serverworks_cable_detect,
 	.mode_filter	= serverworks_osb4_filter,
 	.set_piomode	= serverworks_set_piomode,
@@ -266,6 +272,7 @@ static struct ata_port_operations serverworks_osb4_port_ops = {
 
 static struct ata_port_operations serverworks_csb_port_ops = {
 	.inherits	= &serverworks_osb4_port_ops,
+	.qc_prep	= ata_bmdma_qc_prep,
 	.mode_filter	= serverworks_csb_filter,
 };
 
@@ -405,6 +412,7 @@ static int serverworks_init_one(struct pci_dev *pdev, const struct pci_device_id
 		}
 	};
 	const struct ata_port_info *ppi[] = { &info[id->driver_data], NULL };
+	struct scsi_host_template *sht = &serverworks_csb_sht;
 	int rc;
 
 	rc = pcim_enable_device(pdev);
@@ -418,6 +426,7 @@ static int serverworks_init_one(struct pci_dev *pdev, const struct pci_device_id
 		/* Select non UDMA capable OSB4 if we can't do fixups */
 		if (rc < 0)
 			ppi[0] = &info[1];
+		sht = &serverworks_osb4_sht;
 	}
 	/* setup CSB5/CSB6 : South Bridge and IDE option RAID */
 	else if ((pdev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) ||
@@ -434,7 +443,7 @@ static int serverworks_init_one(struct pci_dev *pdev, const struct pci_device_id
 			ppi[1] = &ata_dummy_port_info;
 	}
 
-	return ata_pci_bmdma_init_one(pdev, ppi, &serverworks_sht, NULL, 0);
+	return ata_pci_bmdma_init_one(pdev, ppi, sht, NULL, 0);
 }
 
 #ifdef CONFIG_PM

From fa6709b913588a07cc0286c9ca52f17e5276dd6b Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Sat, 27 Sep 2014 00:04:46 +0200
Subject: [PATCH 0994/1185] libata-sff: Fix controllers with no ctl port

commit 6d8ca28fa688a9354bc9fbc935bdaeb3651b6677 upstream.

Currently, ata_sff_softreset is skipped for controllers with no ctl port.
But that also skips ata_sff_dev_classify required for device detection.
This means that libata is currently broken on controllers with no ctl port.

No device connected:
[    1.872480] pata_isapnp 01:01.02: activated
[    1.889823] scsi2 : pata_isapnp
[    1.890109] ata3: PATA max PIO0 cmd 0x1e8 ctl 0x0 irq 11
[    6.888110] ata3.01: qc timeout (cmd 0xec)
[    6.888179] ata3.01: failed to IDENTIFY (I/O error, err_mask=0x5)
[   16.888085] ata3.01: qc timeout (cmd 0xec)
[   16.888147] ata3.01: failed to IDENTIFY (I/O error, err_mask=0x5)
[   46.888086] ata3.01: qc timeout (cmd 0xec)
[   46.888148] ata3.01: failed to IDENTIFY (I/O error, err_mask=0x5)
[   51.888100] ata3.00: qc timeout (cmd 0xec)
[   51.888160] ata3.00: failed to IDENTIFY (I/O error, err_mask=0x5)
[   61.888079] ata3.00: qc timeout (cmd 0xec)
[   61.888141] ata3.00: failed to IDENTIFY (I/O error, err_mask=0x5)
[   91.888089] ata3.00: qc timeout (cmd 0xec)
[   91.888152] ata3.00: failed to IDENTIFY (I/O error, err_mask=0x5)

ATAPI device connected:
[    1.882061] pata_isapnp 01:01.02: activated
[    1.893430] scsi2 : pata_isapnp
[    1.893719] ata3: PATA max PIO0 cmd 0x1e8 ctl 0x0 irq 11
[    6.892107] ata3.01: qc timeout (cmd 0xec)
[    6.892171] ata3.01: failed to IDENTIFY (I/O error, err_mask=0x5)
[   16.892079] ata3.01: qc timeout (cmd 0xec)
[   16.892138] ata3.01: failed to IDENTIFY (I/O error, err_mask=0x5)
[   46.892079] ata3.01: qc timeout (cmd 0xec)
[   46.892138] ata3.01: failed to IDENTIFY (I/O error, err_mask=0x5)
[   46.908586] ata3.00: ATAPI: ACER CD-767E/O, V1.5X, max PIO2, CDB intr
[   46.924570] ata3.00: configured for PIO0 (device error ignored)
[   46.926295] scsi 2:0:0:0: CD-ROM            ACER     CD-767E/O        1.5X PQ: 0 ANSI: 5
[   46.984519] sr0: scsi3-mmc drive: 6x/6x xa/form2 tray
[   46.984592] cdrom: Uniform CD-ROM driver Revision: 3.20

So don't skip ata_sff_softreset, just skip the reset part of ata_bus_softreset
if the ctl port is not available.

This makes IDE port on ES968 behave correctly:

No device connected:
[    4.670888] pata_isapnp 01:01.02: activated
[    4.673207] scsi host2: pata_isapnp
[    4.673675] ata3: PATA max PIO0 cmd 0x1e8 ctl 0x0 irq 11
[    7.081840] Adding 2541652k swap on /dev/sda2.  Priority:-1 extents:1 across:2541652k

ATAPI device connected:
[    4.704362] pata_isapnp 01:01.02: activated
[    4.706620] scsi host2: pata_isapnp
[    4.706877] ata3: PATA max PIO0 cmd 0x1e8 ctl 0x0 irq 11
[    4.872782] ata3.00: ATAPI: ACER CD-767E/O, V1.5X, max PIO2, CDB intr
[    4.888673] ata3.00: configured for PIO0 (device error ignored)
[    4.893984] scsi 2:0:0:0: CD-ROM            ACER     CD-767E/O        1.5X PQ: 0 ANSI: 5
[    7.015578] Adding 2541652k swap on /dev/sda2.  Priority:-1 extents:1 across:2541652k

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libata-sff.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index b603720b877d..37acda6fa7e4 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2008,13 +2008,15 @@ static int ata_bus_softreset(struct ata_port *ap, unsigned int devmask,
 
 	DPRINTK("ata%u: bus reset via SRST\n", ap->print_id);
 
-	/* software reset.  causes dev0 to be selected */
-	iowrite8(ap->ctl, ioaddr->ctl_addr);
-	udelay(20);	/* FIXME: flush */
-	iowrite8(ap->ctl | ATA_SRST, ioaddr->ctl_addr);
-	udelay(20);	/* FIXME: flush */
-	iowrite8(ap->ctl, ioaddr->ctl_addr);
-	ap->last_ctl = ap->ctl;
+	if (ap->ioaddr.ctl_addr) {
+		/* software reset.  causes dev0 to be selected */
+		iowrite8(ap->ctl, ioaddr->ctl_addr);
+		udelay(20);	/* FIXME: flush */
+		iowrite8(ap->ctl | ATA_SRST, ioaddr->ctl_addr);
+		udelay(20);	/* FIXME: flush */
+		iowrite8(ap->ctl, ioaddr->ctl_addr);
+		ap->last_ctl = ap->ctl;
+	}
 
 	/* wait the port to become ready */
 	return ata_sff_wait_after_reset(&ap->link, devmask, deadline);
@@ -2215,10 +2217,6 @@ void ata_sff_error_handler(struct ata_port *ap)
 
 	spin_unlock_irqrestore(ap->lock, flags);
 
-	/* ignore ata_sff_softreset if ctl isn't accessible */
-	if (softreset == ata_sff_softreset && !ap->ioaddr.ctl_addr)
-		softreset = NULL;
-
 	/* ignore built-in hardresets if SCR access is not available */
 	if ((hardreset == sata_std_hardreset ||
 	     hardreset == sata_sff_hardreset) && !sata_scr_valid(&ap->link))

From b6e03bbd143af13a8bff1322c07da2d2ef894815 Mon Sep 17 00:00:00 2001
From: Roger Tseng <rogerable@realtek.com>
Date: Fri, 15 Aug 2014 14:06:00 +0800
Subject: [PATCH 0995/1185] mmc: rtsx_pci_sdmmc: fix incorrect last byte in R2
 response

commit d1419d50c1bf711e9fd27b516a739c86b23f7cf9 upstream.

Current code erroneously fill the last byte of R2 response with an undefined
value. In addition, the controller actually 'offloads' the last byte
(CRC7, end bit) while receiving R2 response and thus it's impossible to get the
actual value. This could cause mmc stack to obtain inconsistent CID from the
same card after resume and misidentify it as a different card.

Fix by assigning dummy CRC and end bit: {7'b0, 1} = 0x1 to the last byte of R2.

Fixes: ff984e57d36e ("mmc: Add realtek pcie sdmmc host driver")
Signed-off-by: Roger Tseng <rogerable@realtek.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/rtsx_pci_sdmmc.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 7ffb5cba30a9..4c65a5a4d8f4 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -341,6 +341,13 @@ static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host,
 	}
 
 	if (rsp_type == SD_RSP_TYPE_R2) {
+		/*
+		 * The controller offloads the last byte {CRC-7, end bit 1'b1}
+		 * of response type R2. Assign dummy CRC, 0, and end bit to the
+		 * byte(ptr[16], goes into the LSB of resp[3] later).
+		 */
+		ptr[16] = 1;
+
 		for (i = 0; i < 4; i++) {
 			cmd->resp[i] = get_unaligned_be32(ptr + 1 + i * 4);
 			dev_dbg(sdmmc_dev(host), "cmd->resp[%d] = 0x%08x\n",

From f83813a8aff1f5af9f4a02d5ce0a29be40f45a41 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 27 Jul 2014 13:00:41 -0400
Subject: [PATCH 0996/1185] fs: make cont_expand_zero interruptible

commit c2ca0fcd202863b14bd041a7fece2e789926c225 upstream.

This patch makes it possible to kill a process looping in
cont_expand_zero. A process may spend a lot of time in this function, so
it is desirable to be able to kill it.

It happened to me that I wanted to copy a piece data from the disk to a
file. By mistake, I used the "seek" parameter to dd instead of "skip". Due
to the "seek" parameter, dd attempted to extend the file and became stuck
doing so - the only possibility was to reset the machine or wait many
hours until the filesystem runs out of space and cont_expand_zero fails.
We need this patch to be able to terminate the process.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/buffer.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/buffer.c b/fs/buffer.c
index 10fca21ee8aa..d0b4646dc8c8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2254,6 +2254,11 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
 		err = 0;
 
 		balance_dirty_pages_ratelimited(mapping);
+
+		if (unlikely(fatal_signal_pending(current))) {
+			err = -EINTR;
+			goto out;
+		}
 	}
 
 	/* page covers the boundary, find the boundary offset */

From d016a08a18158fd7002ad24aea8a0224ce2a3d0c Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 17 May 2014 20:56:38 +0900
Subject: [PATCH 0997/1185] fs: Fix theoretical division by 0 in
 super_cache_scan().

commit 475d0db742e3755c6b267f48577ff7cbb7dfda0d upstream.

total_objects could be 0 and is used as a denom.

While total_objects is a "long", total_objects == 0 unlikely happens for
3.12 and later kernels because 32-bit architectures would not be able to
hold (1 << 32) objects. However, total_objects == 0 may happen for kernels
between 3.1 and 3.11 because total_objects in prune_super() was an "int"
and (e.g.) x86_64 architecture might be able to hold (1 << 32) objects.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/super.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/super.c b/fs/super.c
index 68307c029228..e028b508db25 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -76,6 +76,8 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
 
 	total_objects = sb->s_nr_dentry_unused +
 			sb->s_nr_inodes_unused + fs_objects + 1;
+	if (!total_objects)
+		total_objects = 1;
 
 	if (sc->nr_to_scan) {
 		int	dentries;

From c4e70e76860cc84cebd719fbd89637fdd226cf94 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Sun, 29 Jun 2014 16:55:02 +0300
Subject: [PATCH 0998/1185] UBIFS: remove mst_mutex

commit 07e19dff63e3d5d6500d831e36554ac9b1b0560e upstream.

The 'mst_mutex' is not needed since because 'ubifs_write_master()' is only
called on the mount path and commit path. The mount path is sequential and
there is no parallelism, and the commit path is also serialized - there is only
one commit going on at a time.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ubifs/commit.c | 2 --
 fs/ubifs/master.c | 7 +++----
 fs/ubifs/super.c  | 1 -
 fs/ubifs/ubifs.h  | 2 --
 4 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index ff8229340cd5..aa13ad053b14 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -174,7 +174,6 @@ static int do_commit(struct ubifs_info *c)
 	if (err)
 		goto out;
 
-	mutex_lock(&c->mst_mutex);
 	c->mst_node->cmt_no      = cpu_to_le64(c->cmt_no);
 	c->mst_node->log_lnum    = cpu_to_le32(new_ltail_lnum);
 	c->mst_node->root_lnum   = cpu_to_le32(zroot.lnum);
@@ -204,7 +203,6 @@ static int do_commit(struct ubifs_info *c)
 	else
 		c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
 	err = ubifs_write_master(c);
-	mutex_unlock(&c->mst_mutex);
 	if (err)
 		goto out;
 
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index ab83ace9910a..1a4bb9e8b3b8 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -352,10 +352,9 @@ int ubifs_read_master(struct ubifs_info *c)
  * ubifs_write_master - write master node.
  * @c: UBIFS file-system description object
  *
- * This function writes the master node. The caller has to take the
- * @c->mst_mutex lock before calling this function. Returns zero in case of
- * success and a negative error code in case of failure. The master node is
- * written twice to enable recovery.
+ * This function writes the master node. Returns zero in case of success and a
+ * negative error code in case of failure. The master node is written twice to
+ * enable recovery.
  */
 int ubifs_write_master(struct ubifs_info *c)
 {
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 879b9976c12b..05115d719408 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1970,7 +1970,6 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
 		mutex_init(&c->lp_mutex);
 		mutex_init(&c->tnc_mutex);
 		mutex_init(&c->log_mutex);
-		mutex_init(&c->mst_mutex);
 		mutex_init(&c->umount_mutex);
 		mutex_init(&c->bu_mutex);
 		mutex_init(&c->write_reserve_mutex);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index b2babce4d70f..bd51277f6fe1 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1042,7 +1042,6 @@ struct ubifs_debug_info;
  *
  * @mst_node: master node
  * @mst_offs: offset of valid master node
- * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
  *
  * @max_bu_buf_len: maximum bulk-read buffer length
  * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
@@ -1282,7 +1281,6 @@ struct ubifs_info {
 
 	struct ubifs_mst_node *mst_node;
 	int mst_offs;
-	struct mutex mst_mutex;
 
 	int max_bu_buf_len;
 	struct mutex bu_mutex;

From 918ecf66a11bb3bdc818a264319dcaf984c11a3f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Sun, 29 Jun 2014 17:00:45 +0300
Subject: [PATCH 0999/1185] UBIFS: fix a race condition

commit 052c28073ff26f771d44ef33952a41d18dadd255 upstream.

Hu (hujianyang@huawei.com) discovered a race condition which may lead to a
situation when UBIFS is unable to mount the file-system after an unclean
reboot. The problem is theoretical, though.

In UBIFS, we have the log, which basically a set of LEBs in a certain area. The
log has the tail and the head.

Every time user writes data to the file-system, the UBIFS journal grows, and
the log grows as well, because we append new reference nodes to the head of the
log. So the head moves forward all the time, while the log tail stays at the
same position.

At any time, the UBIFS master node points to the tail of the log. When we mount
the file-system, we scan the log, and we always start from its tail, because
this is where the master node points to. The only occasion when the tail of the
log changes is the commit operation.

The commit operation has 2 phases - "commit start" and "commit end". The former
is relatively short, and does not involve much I/O. During this phase we mostly
just build various in-memory lists of the things which have to be written to
the flash media during "commit end" phase.

During the commit start phase, what we do is we "clean" the log. Indeed, the
commit operation will index all the data in the journal, so the entire journal
"disappears", and therefore the data in the log become unneeded. So we just
move the head of the log to the next LEB, and write the CS node there. This LEB
will be the tail of the new log when the commit operation finishes.

When the "commit start" phase finishes, users may write more data to the
file-system, in parallel with the ongoing "commit end" operation. At this point
the log tail was not changed yet, it is the same as it had been before we
started the commit. The log head keeps moving forward, though.

The commit operation now needs to write the new master node, and the new master
node should point to the new log tail. After this the LEBs between the old log
tail and the new log tail can be unmapped and re-used again.

And here is the possible problem. We do 2 operations: (a) We first update the
log tail position in memory (see 'ubifs_log_end_commit()'). (b) And then we
write the master node (see the big lock of code in 'do_commit()').

But nothing prevents the log head from moving forward between (a) and (b), and
the log head may "wrap" now to the old log tail. And when the "wrap" happens,
the contends of the log tail gets erased. Now a power cut happens and we are in
trouble. We end up with the old master node pointing to the old tail, which was
erased. And replay fails because it expects the master node to point to the
correct log tail at all times.

This patch merges the abovementioned (a) and (b) operations by moving the master
node change code to the 'ubifs_log_end_commit()' function, so that it runs with
the log mutex locked, which will prevent the log from being changed benween
operations (a) and (b).

Reported-by: hujianyang <hujianyang@huawei.com>
Tested-by: hujianyang <hujianyang@huawei.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ubifs/commit.c |  8 +++-----
 fs/ubifs/log.c    | 11 ++++++++---
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index aa13ad053b14..26b69b2d4a45 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -164,10 +164,6 @@ static int do_commit(struct ubifs_info *c)
 	if (err)
 		goto out;
 	err = ubifs_orphan_end_commit(c);
-	if (err)
-		goto out;
-	old_ltail_lnum = c->ltail_lnum;
-	err = ubifs_log_end_commit(c, new_ltail_lnum);
 	if (err)
 		goto out;
 	err = dbg_check_old_index(c, &zroot);
@@ -202,7 +198,9 @@ static int do_commit(struct ubifs_info *c)
 		c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
 	else
 		c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
-	err = ubifs_write_master(c);
+
+	old_ltail_lnum = c->ltail_lnum;
+	err = ubifs_log_end_commit(c, new_ltail_lnum);
 	if (err)
 		goto out;
 
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 36bd4efd0819..be67120fb919 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -447,9 +447,9 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
  * @ltail_lnum: new log tail LEB number
  *
  * This function is called on when the commit operation was finished. It
- * moves log tail to new position and unmaps LEBs which contain obsolete data.
- * Returns zero in case of success and a negative error code in case of
- * failure.
+ * moves log tail to new position and updates the master node so that it stores
+ * the new log tail LEB number. Returns zero in case of success and a negative
+ * error code in case of failure.
  */
 int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
 {
@@ -477,7 +477,12 @@ int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
 	spin_unlock(&c->buds_lock);
 
 	err = dbg_check_bud_bytes(c);
+	if (err)
+		goto out;
 
+	err = ubifs_write_master(c);
+
+out:
 	mutex_unlock(&c->log_mutex);
 	return err;
 }

From 6f1aec53eded9399e6b44cab8c9aa36c65a8f402 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 16 Jul 2014 15:22:29 +0300
Subject: [PATCH 1000/1185] UBIFS: fix free log space calculation

commit ba29e721eb2df6df8f33c1f248388bb037a47914 upstream.

Hu (hujianyang <hujianyang@huawei.com>) discovered an issue in the
'empty_log_bytes()' function, which calculates how many bytes are left in the
log:

"
If 'c->lhead_lnum + 1 == c->ltail_lnum' and 'c->lhead_offs == c->leb_size', 'h'
would equalent to 't' and 'empty_log_bytes()' would return 'c->log_bytes'
instead of 0.
"

At this point it is not clear what would be the consequences of this, and
whether this may lead to any problems, but this patch addresses the issue just
in case.

Tested-by: hujianyang <hujianyang@huawei.com>
Reported-by: hujianyang <hujianyang@huawei.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ubifs/log.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index be67120fb919..06649d21b056 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -106,10 +106,14 @@ static inline long long empty_log_bytes(const struct ubifs_info *c)
 	h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
 	t = (long long)c->ltail_lnum * c->leb_size;
 
-	if (h >= t)
+	if (h > t)
 		return c->log_bytes - h + t;
-	else
+	else if (h != t)
 		return t - h;
+	else if (c->lhead_lnum != c->ltail_lnum)
+		return 0;
+	else
+		return c->log_bytes;
 }
 
 /**

From 6cbdf1151168e44f93866f6af751442b937f8989 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 1 Oct 2014 21:49:18 -0400
Subject: [PATCH 1001/1185] vfs: fix data corruption when blocksize < pagesize
 for mmaped data

commit 90a8020278c1598fafd071736a0846b38510309c upstream.

->page_mkwrite() is used by filesystems to allocate blocks under a page
which is becoming writeably mmapped in some process' address space. This
allows a filesystem to return a page fault if there is not enough space
available, user exceeds quota or similar problem happens, rather than
silently discarding data later when writepage is called.

However VFS fails to call ->page_mkwrite() in all the cases where
filesystems need it when blocksize < pagesize. For example when
blocksize = 1024, pagesize = 4096 the following is problematic:
  ftruncate(fd, 0);
  pwrite(fd, buf, 1024, 0);
  map = mmap(NULL, 1024, PROT_WRITE, MAP_SHARED, fd, 0);
  map[0] = 'a';       ----> page_mkwrite() for index 0 is called
  ftruncate(fd, 10000); /* or even pwrite(fd, buf, 1, 10000) */
  mremap(map, 1024, 10000, 0);
  map[4095] = 'a';    ----> no page_mkwrite() called

At the moment ->page_mkwrite() is called, filesystem can allocate only
one block for the page because i_size == 1024. Otherwise it would create
blocks beyond i_size which is generally undesirable. But later at
->writepage() time, we also need to store data at offset 4095 but we
don't have block allocated for it.

This patch introduces a helper function filesystems can use to have
->page_mkwrite() called at all the necessary moments.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/buffer.c        |  3 +++
 include/linux/mm.h |  1 +
 mm/truncate.c      | 59 +++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index d0b4646dc8c8..83fedaa53b55 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2018,6 +2018,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 			struct page *page, void *fsdata)
 {
 	struct inode *inode = mapping->host;
+	loff_t old_size = inode->i_size;
 	int i_size_changed = 0;
 
 	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2037,6 +2038,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 	unlock_page(page);
 	page_cache_release(page);
 
+	if (old_size < pos)
+		pagecache_isize_extended(inode, old_size, pos);
 	/*
 	 * Don't mark the inode dirty under page lock. First, it unnecessarily
 	 * makes the holding time of page lock longer. Second, it forces lock
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a9a48309f045..7da14357aa76 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1004,6 +1004,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 
 extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/mm/truncate.c b/mm/truncate.c
index c75b736e54b7..2f03c3ac7ab7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
 #include <linux/buffer_head.h>	/* grr. try_to_release_page,
 				   do_invalidatepage */
 #include <linux/cleancache.h>
+#include <linux/rmap.h>
 #include "internal.h"
 
 
@@ -567,15 +568,67 @@ EXPORT_SYMBOL(truncate_pagecache);
  */
 void truncate_setsize(struct inode *inode, loff_t newsize)
 {
-	loff_t oldsize;
+	loff_t oldsize = inode->i_size;
 
-	oldsize = inode->i_size;
 	i_size_write(inode, newsize);
-
+	if (newsize > oldsize)
+		pagecache_isize_extended(inode, oldsize, newsize);
 	truncate_pagecache(inode, oldsize, newsize);
 }
 EXPORT_SYMBOL(truncate_setsize);
 
+/**
+ * pagecache_isize_extended - update pagecache after extension of i_size
+ * @inode:	inode for which i_size was extended
+ * @from:	original inode size
+ * @to:		new inode size
+ *
+ * Handle extension of inode size either caused by extending truncate or by
+ * write starting after current i_size. We mark the page straddling current
+ * i_size RO so that page_mkwrite() is called on the nearest write access to
+ * the page.  This way filesystem can be sure that page_mkwrite() is called on
+ * the page before user writes to the page via mmap after the i_size has been
+ * changed.
+ *
+ * The function must be called after i_size is updated so that page fault
+ * coming after we unlock the page will already see the new i_size.
+ * The function must be called while we still hold i_mutex - this not only
+ * makes sure i_size is stable but also that userspace cannot observe new
+ * i_size value before we are prepared to store mmap writes at new inode size.
+ */
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
+{
+	int bsize = 1 << inode->i_blkbits;
+	loff_t rounded_from;
+	struct page *page;
+	pgoff_t index;
+
+	WARN_ON(!mutex_is_locked(&inode->i_mutex));
+	WARN_ON(to > inode->i_size);
+
+	if (from >= to || bsize == PAGE_CACHE_SIZE)
+		return;
+	/* Page straddling @from will not have any hole block created? */
+	rounded_from = round_up(from, bsize);
+	if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
+		return;
+
+	index = from >> PAGE_CACHE_SHIFT;
+	page = find_lock_page(inode->i_mapping, index);
+	/* Page not cached? Nothing to do */
+	if (!page)
+		return;
+	/*
+	 * See clear_page_dirty_for_io() for details why set_page_dirty()
+	 * is needed.
+	 */
+	if (page_mkclean(page))
+		set_page_dirty(page);
+	unlock_page(page);
+	page_cache_release(page);
+}
+EXPORT_SYMBOL(pagecache_isize_extended);
+
 /**
  * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
  * @inode: inode

From 6507b92a66c9551b0555edc5a92e27027c3b990e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 7 Sep 2014 21:05:05 +0100
Subject: [PATCH 1002/1185] x86: Reject x32 executables if x32 ABI not
 supported

commit 0e6d3112a4e95d55cf6dca88f298d5f4b8f29bd1 upstream.

It is currently possible to execve() an x32 executable on an x86_64
kernel that has only ia32 compat enabled.  However all its syscalls
will fail, even _exit().  This usually causes it to segfault.

Change the ELF compat architecture check so that x32 executables are
rejected if we don't support the x32 ABI.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Link: http://lkml.kernel.org/r/1410120305.6822.9.camel@decadent.org.uk
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/elf.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 9c999c1674fa..01f15b227d7e 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -155,8 +155,9 @@ do {						\
 #define elf_check_arch(x)			\
 	((x)->e_machine == EM_X86_64)
 
-#define compat_elf_check_arch(x)		\
-	(elf_check_arch_ia32(x) || (x)->e_machine == EM_X86_64)
+#define compat_elf_check_arch(x)					\
+	(elf_check_arch_ia32(x) ||					\
+	 (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64))
 
 #if __USER32_DS != __USER_DS
 # error "The following code assumes __USER32_DS == __USER_DS"

From b888e3d442069e3107d9b4a43c1321e4d555b6cd Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 2 Sep 2014 19:57:17 +0200
Subject: [PATCH 1003/1185] x86, fpu:
 __restore_xstate_sig()->math_state_restore() needs preempt_disable()

commit df24fb859a4e200d9324e2974229fbb7adf00aef upstream.

Add preempt_disable() + preempt_enable() around math_state_restore() in
__restore_xstate_sig(). Otherwise __switch_to() after __thread_fpu_begin()
can overwrite fpu->state we are going to restore.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Link: http://lkml.kernel.org/r/20140902175717.GA21649@redhat.com
Reviewed-by: Suresh Siddha <sbsiddha@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/xsave.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index ada87a329edc..92a099fabd53 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -400,8 +400,11 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 			set_used_math();
 		}
 
-		if (use_eager_fpu())
+		if (use_eager_fpu()) {
+			preempt_disable();
 			math_state_restore();
+			preempt_enable();
+		}
 
 		return err;
 	} else {

From fb5b6e7ecfefa65efd7280f4824741ac76e10c4b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 2 Sep 2014 19:57:13 +0200
Subject: [PATCH 1004/1185] x86, fpu: shift drop_init_fpu() from
 save_xstate_sig() to handle_signal()

commit 66463db4fc5605d51c7bb81d009d5bf30a783a2c upstream.

save_xstate_sig()->drop_init_fpu() doesn't look right. setup_rt_frame()
can fail after that, in this case the next setup_rt_frame() triggered
by SIGSEGV won't save fpu simply because the old state was lost. This
obviously mean that fpu won't be restored after sys_rt_sigreturn() from
SIGSEGV handler.

Shift drop_init_fpu() into !failed branch in handle_signal().

Test-case (needs -O2):

	#include <stdio.h>
	#include <signal.h>
	#include <unistd.h>
	#include <sys/syscall.h>
	#include <sys/mman.h>
	#include <pthread.h>
	#include <assert.h>

	volatile double D;

	void test(double d)
	{
		int pid = getpid();

		for (D = d; D == d; ) {
			/* sys_tkill(pid, SIGHUP); asm to avoid save/reload
			 * fp regs around "C" call */
			asm ("" : : "a"(200), "D"(pid), "S"(1));
			asm ("syscall" : : : "ax");
		}

		printf("ERR!!\n");
	}

	void sigh(int sig)
	{
	}

	char altstack[4096 * 10] __attribute__((aligned(4096)));

	void *tfunc(void *arg)
	{
		for (;;) {
			mprotect(altstack, sizeof(altstack), PROT_READ);
			mprotect(altstack, sizeof(altstack), PROT_READ|PROT_WRITE);
		}
	}

	int main(void)
	{
		stack_t st = {
			.ss_sp = altstack,
			.ss_size = sizeof(altstack),
			.ss_flags = SS_ONSTACK,
		};

		struct sigaction sa = {
			.sa_handler = sigh,
		};

		pthread_t pt;

		sigaction(SIGSEGV, &sa, NULL);
		sigaltstack(&st, NULL);
		sa.sa_flags = SA_ONSTACK;
		sigaction(SIGHUP, &sa, NULL);

		pthread_create(&pt, NULL, tfunc, NULL);

		test(123.456);
		return 0;
	}

Reported-by: Bean Anderson <bean@azulsystems.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Link: http://lkml.kernel.org/r/20140902175713.GA21646@redhat.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/signal.c | 5 +++++
 arch/x86/kernel/xsave.c  | 2 --
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 087ab2af381a..66deef41512f 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -677,6 +677,11 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 		 * handler too.
 		 */
 		regs->flags &= ~X86_EFLAGS_TF;
+		/*
+		 * Ensure the signal handler starts with the new fpu state.
+		 */
+		if (used_math())
+			drop_init_fpu(current);
 	}
 	signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
 }
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 92a099fabd53..1ee723298e90 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -268,8 +268,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 	if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
 		return -1;
 
-	drop_init_fpu(tsk);	/* trigger finit */
-
 	return 0;
 }
 

From 9f03d6fef32533a2d62d65656afba90ccd3a57d6 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Sat, 27 Apr 2013 16:10:11 -0700
Subject: [PATCH 1005/1185] x86, flags: Rename X86_EFLAGS_BIT1 to
 X86_EFLAGS_FIXED

commit 1adfa76a95fe4444124a502f7cc858a39d5b8e01 upstream.

Bit 1 in the x86 EFLAGS is always set.  Name the macro something that
actually tries to explain what it is all about, rather than being a
tautology.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Link: http://lkml.kernel.org/n/tip-f10rx5vjjm6tfnt8o1wseb3v@git.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/uapi/asm/processor-flags.h | 2 +-
 arch/x86/kernel/entry_64.S                  | 2 +-
 arch/x86/kernel/process_32.c                | 2 +-
 arch/x86/kernel/process_64.c                | 2 +-
 arch/x86/kvm/vmx.c                          | 2 +-
 drivers/lguest/x86/core.c                   | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 54991a746043..b16e6d28f149 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -6,7 +6,7 @@
  * EFLAGS bits
  */
 #define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
-#define X86_EFLAGS_BIT1	0x00000002 /* Bit 1 - always on */
+#define X86_EFLAGS_FIXED 0x00000002 /* Bit 1 - always on */
 #define X86_EFLAGS_PF	0x00000004 /* Parity Flag */
 #define X86_EFLAGS_AF	0x00000010 /* Auxiliary carry Flag */
 #define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 39ba6914bbc6..8c6b5c2284c7 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -366,7 +366,7 @@ ENDPROC(native_usergs_sysret64)
 	/*CFI_REL_OFFSET	ss,0*/
 	pushq_cfi %rax /* rsp */
 	CFI_REL_OFFSET	rsp,0
-	pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
+	pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
 	/*CFI_REL_OFFSET	rflags,0*/
 	pushq_cfi $__KERNEL_CS /* cs */
 	/*CFI_REL_OFFSET	cs,0*/
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7305f7dfc7ab..0339f5c14bf9 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -147,7 +147,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		childregs->bp = arg;
 		childregs->orig_ax = -1;
 		childregs->cs = __KERNEL_CS | get_kernel_rpl();
-		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
 		p->fpu_counter = 0;
 		p->thread.io_bitmap_ptr = NULL;
 		memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 355ae06dbf94..f99a242730e9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -176,7 +176,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		childregs->bp = arg;
 		childregs->orig_ax = -1;
 		childregs->cs = __KERNEL_CS | get_kernel_rpl();
-		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
 		return 0;
 	}
 	*childregs = *current_pt_regs();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7cdafb6dc705..8d9d37ff8250 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7949,7 +7949,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 
 	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
-	vmx_set_rflags(vcpu, X86_EFLAGS_BIT1);
+	vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
 	/*
 	 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
 	 * actually changed, because it depends on the current state of
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index f0a3347b6441..516923926335 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -700,7 +700,7 @@ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start)
 	 * interrupts are enabled.  We always leave interrupts enabled while
 	 * running the Guest.
 	 */
-	regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+	regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
 
 	/*
 	 * The "Extended Instruction Pointer" register says where the Guest is

From b1a9c1e7969403e9d31ac335f4ebb43c9461ab59 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Wed, 1 Oct 2014 11:49:04 -0700
Subject: [PATCH 1006/1185] x86_64, entry: Filter RFLAGS.NT on entry from
 userspace

commit 8c7aa698baca5e8f1ba9edb68081f1e7a1abf455 upstream.

The NT flag doesn't do anything in long mode other than causing IRET
to #GP.  Oddly, CPL3 code can still set NT using popf.

Entry via hardware or software interrupt clears NT automatically, so
the only relevant entries are fast syscalls.

If user code causes kernel code to run with NT set, then there's at
least some (small) chance that it could cause trouble.  For example,
user code could cause a call to EFI code with NT set, and who knows
what would happen?  Apparently some games on Wine sometimes do
this (!), and, if an IRET return happens, they will segfault.  That
segfault cannot be handled, because signal delivery fails, too.

This patch programs the CPU to clear NT on entry via SYSCALL (both
32-bit and 64-bit, by my reading of the AMD APM), and it clears NT
in software on entry via SYSENTER.

To save a few cycles, this borrows a trick from Jan Beulich in Xen:
it checks whether NT is set before trying to clear it.  As a result,
it seems to have very little effect on SYSENTER performance on my
machine.

There's another minor bug fix in here: it looks like the CFI
annotations were wrong if CONFIG_AUDITSYSCALL=n.

Testers beware: on Xen, SYSENTER with NT set turns into a GPF.

I haven't touched anything on 32-bit kernels.

The syscall mask change comes from a variant of this patch by Anish
Bhatt.

Note to stable maintainers: there is no known security issue here.
A misguided program can set NT and cause the kernel to try and fail
to deliver SIGSEGV, crashing the program.  This patch fixes Far Cry
on Wine: https://bugs.winehq.org/show_bug.cgi?id=33275

Reported-by: Anish Bhatt <anish@chelsio.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/395749a5d39a29bd3e4b35899cf3a3c1340e5595.1412189265.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/ia32/ia32entry.S    | 18 +++++++++++++++++-
 arch/x86/kernel/cpu/common.c |  2 +-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 474dc1b59f72..22417f6fc3ab 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -151,6 +151,16 @@ ENTRY(ia32_sysenter_target)
 1:	movl	(%rbp),%ebp
 	_ASM_EXTABLE(1b,ia32_badarg)
 	ASM_CLAC
+
+	/*
+	 * Sysenter doesn't filter flags, so we need to clear NT
+	 * ourselves.  To save a few cycles, we can check whether
+	 * NT was set instead of doing an unconditional popfq.
+	 */
+	testl $X86_EFLAGS_NT,EFLAGS(%rsp)	/* saved EFLAGS match cpu */
+	jnz sysenter_fix_flags
+sysenter_flags_fixed:
+
 	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	CFI_REMEMBER_STATE
@@ -184,6 +194,8 @@ sysexit_from_sys_call:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS_SYSEXIT32
 
+	CFI_RESTORE_STATE
+
 #ifdef CONFIG_AUDITSYSCALL
 	.macro auditsys_entry_common
 	movl %esi,%r9d			/* 6th arg: 4th syscall arg */
@@ -226,7 +238,6 @@ sysexit_from_sys_call:
 	.endm
 
 sysenter_auditsys:
-	CFI_RESTORE_STATE
 	auditsys_entry_common
 	movl %ebp,%r9d			/* reload 6th syscall arg */
 	jmp sysenter_dispatch
@@ -235,6 +246,11 @@ sysexit_audit:
 	auditsys_exit sysexit_from_sys_call
 #endif
 
+sysenter_fix_flags:
+	pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
+	popfq_cfi
+	jmp sysenter_flags_fixed
+
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
 	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index deeb48d9459b..6a7e3e9cffc3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1134,7 +1134,7 @@ void syscall_init(void)
 	/* Flags to clear on syscall */
 	wrmsrl(MSR_SYSCALL_MASK,
 	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
-	       X86_EFLAGS_IOPL|X86_EFLAGS_AC);
+	       X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
 }
 
 /*

From 621be26198eba8272e702ccbd27648c6aae01bc0 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Fri, 31 Oct 2014 18:08:45 -0700
Subject: [PATCH 1007/1185] x86_64, entry: Fix out of bounds read on sysenter

commit 653bc77af60911ead1f423e588f54fc2547c4957 upstream.

Rusty noticed a Really Bad Bug (tm) in my NT fix.  The entry code
reads out of bounds, causing the NT fix to be unreliable.  But, and
this is much, much worse, if your stack is somehow just below the
top of the direct map (or a hole), you read out of bounds and crash.

Excerpt from the crash:

[    1.129513] RSP: 0018:ffff88001da4bf88  EFLAGS: 00010296

  2b:*    f7 84 24 90 00 00 00     testl  $0x4000,0x90(%rsp)

That read is deterministically above the top of the stack.  I
thought I even single-stepped through this code when I wrote it to
check the offset, but I clearly screwed it up.

Fixes: 8c7aa698baca ("x86_64, entry: Filter RFLAGS.NT on entry from userspace")
Reported-by: Rusty Russell <rusty@ozlabs.org>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/ia32/ia32entry.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 22417f6fc3ab..c9305ef1d411 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -157,7 +157,7 @@ ENTRY(ia32_sysenter_target)
 	 * ourselves.  To save a few cycles, we can check whether
 	 * NT was set instead of doing an unconditional popfq.
 	 */
-	testl $X86_EFLAGS_NT,EFLAGS(%rsp)	/* saved EFLAGS match cpu */
+	testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
 	jnz sysenter_fix_flags
 sysenter_flags_fixed:
 

From 04157ab004f712e514bdfbd4f59c3a45f562496c Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 29 Oct 2014 03:53:37 -0700
Subject: [PATCH 1008/1185] x86, pageattr: Prevent overflow in
 slow_virt_to_phys() for X86_PAE

commit d1cd1210834649ce1ca6bafe5ac25d2f40331343 upstream.

pte_pfn() returns a PFN of long (32 bits in 32-PAE), so "long <<
PAGE_SHIFT" will overflow for PFNs above 4GB.

Due to this issue, some Linux 32-PAE distros, running as guests on Hyper-V,
with 5GB memory assigned, can't load the netvsc driver successfully and
hence the synthetic network device can't work (we can use the kernel parameter
mem=3000M to work around the issue).

Cast pte_pfn() to phys_addr_t before shifting.

Fixes: "commit d76565344512: x86, mm: Create slow_virt_to_phys()"
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: gregkh@linuxfoundation.org
Cc: linux-mm@kvack.org
Cc: olaf@aepfle.de
Cc: apw@canonical.com
Cc: jasowang@redhat.com
Cc: dave.hansen@intel.com
Cc: riel@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1414580017-27444-1-git-send-email-decui@microsoft.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/pageattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index bb32480c2d71..aabdf762f592 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -389,7 +389,7 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
 	psize = page_level_size(level);
 	pmask = page_level_mask(level);
 	offset = virt_addr & ~pmask;
-	phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+	phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
 	return (phys_addr | offset);
 }
 EXPORT_SYMBOL_GPL(slow_virt_to_phys);

From bcaf8f4d1aa458626c5563e60b3005ceea2327e5 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <d.kasatkin@samsung.com>
Date: Tue, 28 Oct 2014 14:28:49 +0200
Subject: [PATCH 1009/1185] evm: check xattr value length and type in
 evm_inode_setxattr()

commit 3b1deef6b1289a99505858a3b212c5b50adf0c2f upstream.

evm_inode_setxattr() can be called with no value. The function does not
check the length so that following command can be used to produce the
kernel oops: setfattr -n security.evm FOO. This patch fixes it.

Changes in v3:
* there is no reason to return different error codes for EVM_XATTR_HMAC
  and non EVM_XATTR_HMAC. Remove unnecessary test then.

Changes in v2:
* testing for validity of xattr type

[ 1106.396921] BUG: unable to handle kernel NULL pointer dereference at           (null)
[ 1106.398192] IP: [<ffffffff812af7b8>] evm_inode_setxattr+0x2a/0x48
[ 1106.399244] PGD 29048067 PUD 290d7067 PMD 0
[ 1106.399953] Oops: 0000 [#1] SMP
[ 1106.400020] Modules linked in: bridge stp llc evdev serio_raw i2c_piix4 button fuse
[ 1106.400020] CPU: 0 PID: 3635 Comm: setxattr Not tainted 3.16.0-kds+ #2936
[ 1106.400020] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 1106.400020] task: ffff8800291a0000 ti: ffff88002917c000 task.ti: ffff88002917c000
[ 1106.400020] RIP: 0010:[<ffffffff812af7b8>]  [<ffffffff812af7b8>] evm_inode_setxattr+0x2a/0x48
[ 1106.400020] RSP: 0018:ffff88002917fd50  EFLAGS: 00010246
[ 1106.400020] RAX: 0000000000000000 RBX: ffff88002917fdf8 RCX: 0000000000000000
[ 1106.400020] RDX: 0000000000000000 RSI: ffffffff818136d3 RDI: ffff88002917fdf8
[ 1106.400020] RBP: ffff88002917fd68 R08: 0000000000000000 R09: 00000000003ec1df
[ 1106.400020] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800438a0a00
[ 1106.400020] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 1106.400020] FS:  00007f7dfa7d7740(0000) GS:ffff88005da00000(0000) knlGS:0000000000000000
[ 1106.400020] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1106.400020] CR2: 0000000000000000 CR3: 000000003763e000 CR4: 00000000000006f0
[ 1106.400020] Stack:
[ 1106.400020]  ffff8800438a0a00 ffff88002917fdf8 0000000000000000 ffff88002917fd98
[ 1106.400020]  ffffffff812a1030 ffff8800438a0a00 ffff88002917fdf8 0000000000000000
[ 1106.400020]  0000000000000000 ffff88002917fde0 ffffffff8116d08a ffff88002917fdc8
[ 1106.400020] Call Trace:
[ 1106.400020]  [<ffffffff812a1030>] security_inode_setxattr+0x5d/0x6a
[ 1106.400020]  [<ffffffff8116d08a>] vfs_setxattr+0x6b/0x9f
[ 1106.400020]  [<ffffffff8116d1e0>] setxattr+0x122/0x16c
[ 1106.400020]  [<ffffffff811687e8>] ? mnt_want_write+0x21/0x45
[ 1106.400020]  [<ffffffff8114d011>] ? __sb_start_write+0x10f/0x143
[ 1106.400020]  [<ffffffff811687e8>] ? mnt_want_write+0x21/0x45
[ 1106.400020]  [<ffffffff811687c0>] ? __mnt_want_write+0x48/0x4f
[ 1106.400020]  [<ffffffff8116d3e6>] SyS_setxattr+0x6e/0xb0
[ 1106.400020]  [<ffffffff81529da9>] system_call_fastpath+0x16/0x1b
[ 1106.400020] Code: c3 0f 1f 44 00 00 55 48 89 e5 41 55 49 89 d5 41 54 49 89 fc 53 48 89 f3 48 c7 c6 d3 36 81 81 48 89 df e8 18 22 04 00 85 c0 75 07 <41> 80 7d 00 02 74 0d 48 89 de 4c 89 e7 e8 5a fe ff ff eb 03 83
[ 1106.400020] RIP  [<ffffffff812af7b8>] evm_inode_setxattr+0x2a/0x48
[ 1106.400020]  RSP <ffff88002917fd50>
[ 1106.400020] CR2: 0000000000000000
[ 1106.428061] ---[ end trace ae08331628ba3050 ]---

Reported-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dmitry Kasatkin <d.kasatkin@samsung.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/integrity/evm/evm_main.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index b9b2bebeb350..b980a6ce5c79 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -286,9 +286,12 @@ int evm_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 {
 	const struct evm_ima_xattr_data *xattr_data = xattr_value;
 
-	if ((strcmp(xattr_name, XATTR_NAME_EVM) == 0)
-	    && (xattr_data->type == EVM_XATTR_HMAC))
-		return -EPERM;
+	if (strcmp(xattr_name, XATTR_NAME_EVM) == 0) {
+		if (!xattr_value_len)
+			return -EINVAL;
+		if (xattr_data->type != EVM_IMA_XATTR_DIGSIG)
+			return -EPERM;
+	}
 	return evm_protect_xattr(dentry, xattr_name, xattr_value,
 				 xattr_value_len);
 }

From a58d9ee3e1b064e8b449177dedab4d2da162fdd0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 28 Oct 2014 12:42:19 +0100
Subject: [PATCH 1010/1185] ALSA: pcm: Zero-clear reserved fields of PCM status
 ioctl in compat mode

commit 317168d0c766defd14b3d0e9c2c4a9a258b803ee upstream.

In compat mode, we copy each field of snd_pcm_status struct but don't
touch the reserved fields, and this leaves uninitialized values
there.  Meanwhile the native ioctl does zero-clear the whole
structure, so we should follow the same rule in compat mode, too.

Reported-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/pcm_compat.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index af49721ba0e3..c4ac3c1e19af 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -206,6 +206,8 @@ static int snd_pcm_status_user_compat(struct snd_pcm_substream *substream,
 	if (err < 0)
 		return err;
 
+	if (clear_user(src, sizeof(*src)))
+		return -EFAULT;
 	if (put_user(status.state, &src->state) ||
 	    compat_put_timespec(&status.trigger_tstamp, &src->trigger_tstamp) ||
 	    compat_put_timespec(&status.tstamp, &src->tstamp) ||

From 5f73dee2222e4b602c88419a19dd02ef896513f8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Fri, 1 Aug 2014 20:13:40 +0100
Subject: [PATCH 1011/1185] kill wbuf_queued/wbuf_dwork_lock

commit 99358a1ca53e8e6ce09423500191396f0e6584d2 upstream.

schedule_delayed_work() happening when the work is already pending is
a cheap no-op.  Don't bother with ->wbuf_queued logics - it's both
broken (cancelling ->wbuf_dwork leaves it set, as spotted by Jeff Harris)
and pointless.  It's cheaper to let schedule_delayed_work() handle that
case.

Reported-by: Jeff Harris <jefftharris@gmail.com>
Tested-by: Jeff Harris <jefftharris@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jffs2/jffs2_fs_sb.h |  2 --
 fs/jffs2/wbuf.c        | 17 ++---------------
 2 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 413ef89c2d1b..046fee8b6e9b 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -134,8 +134,6 @@ struct jffs2_sb_info {
 	struct rw_semaphore wbuf_sem;	/* Protects the write buffer */
 
 	struct delayed_work wbuf_dwork; /* write-buffer write-out work */
-	int wbuf_queued;                /* non-zero delayed work is queued */
-	spinlock_t wbuf_dwork_lock;     /* protects wbuf_dwork and and wbuf_queued */
 
 	unsigned char *oobbuf;
 	int oobavail; /* How many bytes are available for JFFS2 in OOB */
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index a6597d60d76d..09ed55190ee2 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1162,10 +1162,6 @@ static void delayed_wbuf_sync(struct work_struct *work)
 	struct jffs2_sb_info *c = work_to_sb(work);
 	struct super_block *sb = OFNI_BS_2SFFJ(c);
 
-	spin_lock(&c->wbuf_dwork_lock);
-	c->wbuf_queued = 0;
-	spin_unlock(&c->wbuf_dwork_lock);
-
 	if (!(sb->s_flags & MS_RDONLY)) {
 		jffs2_dbg(1, "%s()\n", __func__);
 		jffs2_flush_wbuf_gc(c, 0);
@@ -1180,14 +1176,9 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c)
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	spin_lock(&c->wbuf_dwork_lock);
-	if (!c->wbuf_queued) {
+	delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+	if (queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay))
 		jffs2_dbg(1, "%s()\n", __func__);
-		delay = msecs_to_jiffies(dirty_writeback_interval * 10);
-		queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay);
-		c->wbuf_queued = 1;
-	}
-	spin_unlock(&c->wbuf_dwork_lock);
 }
 
 int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
@@ -1211,7 +1202,6 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
 
 	/* Initialise write buffer */
 	init_rwsem(&c->wbuf_sem);
-	spin_lock_init(&c->wbuf_dwork_lock);
 	INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
 	c->wbuf_pagesize = c->mtd->writesize;
 	c->wbuf_ofs = 0xFFFFFFFF;
@@ -1251,7 +1241,6 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
 
 	/* Initialize write buffer */
 	init_rwsem(&c->wbuf_sem);
-	spin_lock_init(&c->wbuf_dwork_lock);
 	INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
 	c->wbuf_pagesize =  c->mtd->erasesize;
 
@@ -1311,7 +1300,6 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) {
 
 	/* Initialize write buffer */
 	init_rwsem(&c->wbuf_sem);
-	spin_lock_init(&c->wbuf_dwork_lock);
 	INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
 
 	c->wbuf_pagesize = c->mtd->writesize;
@@ -1346,7 +1334,6 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) {
 		return 0;
 
 	init_rwsem(&c->wbuf_sem);
-	spin_lock_init(&c->wbuf_dwork_lock);
 	INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
 
 	c->wbuf_pagesize =  c->mtd->writesize;

From 0af0e1dba97dc53b9f14795ca49b0b1b24aa0ce1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 8 Oct 2014 23:44:00 -0400
Subject: [PATCH 1012/1185] fix misuses of f_count() in ppp and netlink

commit 24dff96a37a2ca319e75a74d3929b2de22447ca6 upstream.

we used to check for "nobody else could start doing anything with
that opened file" by checking that refcount was 2 or less - one
for descriptor table and one we'd acquired in fget() on the way to
wherever we are.  That was race-prone (somebody else might have
had a reference to descriptor table and do fget() just as we'd
been checking) and it had become flat-out incorrect back when
we switched to fget_light() on those codepaths - unlike fget(),
it doesn't grab an extra reference unless the descriptor table
is shared.  The same change allowed a race-free check, though -
we are safe exactly when refcount is less than 2.

It was a long time ago; pre-2.6.12 for ioctl() (the codepath leading
to ppp one) and 2.6.17 for sendmsg() (netlink one).  OTOH,
netlink hadn't grown that check until 3.9 and ppp used to live
in drivers/net, not drivers/net/ppp until 3.1.  The bug existed
well before that, though, and the same fix used to apply in old
location of file.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ppp/ppp_generic.c | 2 +-
 net/netlink/af_netlink.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 72ff14b811c6..5a1897d86e94 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -601,7 +601,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			if (file == ppp->owner)
 				ppp_shutdown_interface(ppp);
 		}
-		if (atomic_long_read(&file->f_count) <= 2) {
+		if (atomic_long_read(&file->f_count) < 2) {
 			ppp_release(NULL, file);
 			err = 0;
 		} else
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5ed562dfe743..afe41178c9fb 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -571,7 +571,7 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
 	 * after validation, the socket and the ring may only be used by a
 	 * single process, otherwise we fall back to copying.
 	 */
-	if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
+	if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 ||
 	    atomic_read(&nlk->mapped) > 1)
 		excl = false;
 

From 620c41147d873223d8aac0aa64793882b6217f09 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.eti.br>
Date: Mon, 25 Nov 2013 22:00:41 -0200
Subject: [PATCH 1013/1185] crypto: more robust crypto_memneq

commit fe8c8a126806fea4465c43d62a1f9d273a572bf5 upstream.

[Only use the compiler.h portion of this patch, to get the
OPTIMIZER_HIDE_VAR() macro, which we need for other -stable patches
- gregkh]

Disabling compiler optimizations can be fragile, since a new
optimization could be added to -O0 or -Os that breaks the assumptions
the code is making.

Instead of disabling compiler optimizations, use a dummy inline assembly
(based on RELOC_HIDE) to block the problematic kinds of optimization,
while still allowing other optimizations to be applied to the code.

The dummy inline assembly is added after every OR, and has the
accumulator variable as its input and output. The compiler is forced to
assume that the dummy inline assembly could both depend on the
accumulator variable and change the accumulator variable, so it is
forced to compute the value correctly before the inline assembly, and
cannot assume anything about its value after the inline assembly.

This change should be enough to make crypto_memneq work correctly (with
data-independent timing) even if it is inlined at its call sites. That
can be done later in a followup patch.

Compile-tested on x86_64.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.eti.br>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-gcc.h   | 3 +++
 include/linux/compiler-intel.h | 7 +++++++
 include/linux/compiler.h       | 4 ++++
 3 files changed, 14 insertions(+)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 24545cd90a25..02ae99e8e6d3 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -37,6 +37,9 @@
     __asm__ ("" : "=r"(__ptr) : "0"(ptr));		\
     (typeof(ptr)) (__ptr + (off)); })
 
+/* Make the optimizer believe the variable can be manipulated arbitrarily. */
+#define OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var))
+
 #ifdef __CHECKER__
 #define __must_be_array(arr) 0
 #else
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index dc1bd3dcf11f..5529c5239421 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -15,6 +15,7 @@
  */
 #undef barrier
 #undef RELOC_HIDE
+#undef OPTIMIZER_HIDE_VAR
 
 #define barrier() __memory_barrier()
 
@@ -23,6 +24,12 @@
      __ptr = (unsigned long) (ptr);				\
     (typeof(ptr)) (__ptr + (off)); })
 
+/* This should act as an optimization barrier on var.
+ * Given that this compiler does not have inline assembly, a compiler barrier
+ * is the best we can do.
+ */
+#define OPTIMIZER_HIDE_VAR(var) barrier()
+
 /* Intel ECC compiler doesn't support __builtin_types_compatible_p() */
 #define __must_be_array(a) 0
 
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 92669cd182a6..a2329c5e6206 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -170,6 +170,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
     (typeof(ptr)) (__ptr + (off)); })
 #endif
 
+#ifndef OPTIMIZER_HIDE_VAR
+#define OPTIMIZER_HIDE_VAR(var) barrier()
+#endif
+
 /* Not-quite-unique ID. */
 #ifndef __UNIQUE_ID
 # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)

From 25e1465ac3cfeafce34b3a47e773c4bc950054a3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 26 Aug 2014 23:16:35 -0400
Subject: [PATCH 1014/1185] random: add and use memzero_explicit() for clearing
 data

commit d4c5efdb97773f59a2b711754ca0953f24516739 upstream.

zatimend has reported that in his environment (3.16/gcc4.8.3/corei7)
memset() calls which clear out sensitive data in extract_{buf,entropy,
entropy_user}() in random driver are being optimized away by gcc.

Add a helper memzero_explicit() (similarly as explicit_bzero() variants)
that can be used in such cases where a variable with sensitive data is
being cleared out in the end. Other use cases might also be in crypto
code. [ I have put this into lib/string.c though, as it's always built-in
and doesn't need any dependencies then. ]

Fixes kernel bugzilla: 82041

Reported-by: zatimend@hotmail.co.uk
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/random.c  | 10 +++++-----
 include/linux/string.h |  5 +++--
 lib/string.c           | 16 ++++++++++++++++
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 81eefa1c0d3f..aee3464a5bdc 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -933,8 +933,8 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
 	 * pool while mixing, and hash one final time.
 	 */
 	sha_transform(hash.w, extract, workspace);
-	memset(extract, 0, sizeof(extract));
-	memset(workspace, 0, sizeof(workspace));
+	memzero_explicit(extract, sizeof(extract));
+	memzero_explicit(workspace, sizeof(workspace));
 
 	/*
 	 * In case the hash function has some recognizable output
@@ -957,7 +957,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
 	}
 
 	memcpy(out, &hash, EXTRACT_SIZE);
-	memset(&hash, 0, sizeof(hash));
+	memzero_explicit(&hash, sizeof(hash));
 }
 
 static ssize_t extract_entropy(struct entropy_store *r, void *buf,
@@ -1005,7 +1005,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
 	}
 
 	/* Wipe data just returned from memory */
-	memset(tmp, 0, sizeof(tmp));
+	memzero_explicit(tmp, sizeof(tmp));
 
 	return ret;
 }
@@ -1043,7 +1043,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
 	}
 
 	/* Wipe data just returned from memory */
-	memset(tmp, 0, sizeof(tmp));
+	memzero_explicit(tmp, sizeof(tmp));
 
 	return ret;
 }
diff --git a/include/linux/string.h b/include/linux/string.h
index ac889c5ea11b..0ed878d0465c 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -129,7 +129,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
 #endif
 
 extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
-			const void *from, size_t available);
+				       const void *from, size_t available);
 
 /**
  * strstarts - does @str start with @prefix?
@@ -141,7 +141,8 @@ static inline bool strstarts(const char *str, const char *prefix)
 	return strncmp(str, prefix, strlen(prefix)) == 0;
 }
 
-extern size_t memweight(const void *ptr, size_t bytes);
+size_t memweight(const void *ptr, size_t bytes);
+void memzero_explicit(void *s, size_t count);
 
 /**
  * kbasename - return the last part of a pathname.
diff --git a/lib/string.c b/lib/string.c
index e5878de4f101..43d0781daf47 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -586,6 +586,22 @@ void *memset(void *s, int c, size_t count)
 EXPORT_SYMBOL(memset);
 #endif
 
+/**
+ * memzero_explicit - Fill a region of memory (e.g. sensitive
+ *		      keying data) with 0s.
+ * @s: Pointer to the start of the area.
+ * @count: The size of the area.
+ *
+ * memzero_explicit() doesn't need an arch-specific version as
+ * it just invokes the one of memset() implicitly.
+ */
+void memzero_explicit(void *s, size_t count)
+{
+	memset(s, 0, count);
+	OPTIMIZER_HIDE_VAR(s);
+}
+EXPORT_SYMBOL(memzero_explicit);
+
 #ifndef __HAVE_ARCH_MEMCPY
 /**
  * memcpy - Copy one area of memory to another

From 7a6f66a916d51ac1210c9658b223cd154d92e676 Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Tue, 9 Sep 2014 14:25:01 +0200
Subject: [PATCH 1015/1185] UBI: add missing kmem_cache_free() in
 process_pool_aeb error path

commit 1bf1890e86869032099b539bc83b098be12fc5a7 upstream.

I ran into this error after a ubiupdatevol, because I forgot to backport
e9110361a9a4 UBI: fix the volumes tree sorting criteria.

UBI error: process_pool_aeb: orphaned volume in fastmap pool
UBI error: ubi_scan_fastmap: Attach by fastmap failed, doing a full scan!
kmem_cache_destroy ubi_ainf_peb_slab: Slab cache still has objects
CPU: 0 PID: 1 Comm: swapper Not tainted 3.14.18-00053-gf05cac8dbf85 #1
[<c000d298>] (unwind_backtrace) from [<c000baa8>] (show_stack+0x10/0x14)
[<c000baa8>] (show_stack) from [<c01b7a68>] (destroy_ai+0x230/0x244)
[<c01b7a68>] (destroy_ai) from [<c01b8fd4>] (ubi_attach+0x98/0x1ec)
[<c01b8fd4>] (ubi_attach) from [<c01ade90>] (ubi_attach_mtd_dev+0x2b8/0x868)
[<c01ade90>] (ubi_attach_mtd_dev) from [<c038b510>] (ubi_init+0x1dc/0x2ac)
[<c038b510>] (ubi_init) from [<c0008860>] (do_one_initcall+0x94/0x140)
[<c0008860>] (do_one_initcall) from [<c037aadc>] (kernel_init_freeable+0xe8/0x1b0)
[<c037aadc>] (kernel_init_freeable) from [<c02730ac>] (kernel_init+0x8/0xe4)
[<c02730ac>] (kernel_init) from [<c00093f0>] (ret_from_fork+0x14/0x24)
UBI: scanning is finished

Freeing the cache in the error path fixes the Slab error.

Tested on at91sam9g35 (3.14.18+fastmap backports)

Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ubi/fastmap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index 0648c6996d43..bf8108d65b73 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -330,6 +330,7 @@ static int process_pool_aeb(struct ubi_device *ubi, struct ubi_attach_info *ai,
 		av = tmp_av;
 	else {
 		ubi_err("orphaned volume in fastmap pool!");
+		kmem_cache_free(ai->aeb_slab_cache, new_aeb);
 		return UBI_BAD_FASTMAP;
 	}
 

From 315a75ea5d19a4cbc68b96024de8e36eb1db68b0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 8 Oct 2014 10:42:27 -0700
Subject: [PATCH 1016/1185] mnt: Prevent pivot_root from creating a loop in the
 mount tree

commit 0d0826019e529f21c84687521d03f60cd241ca7d upstream.

Andy Lutomirski recently demonstrated that when chroot is used to set
the root path below the path for the new ``root'' passed to pivot_root
the pivot_root system call succeeds and leaks mounts.

In examining the code I see that starting with a new root that is
below the current root in the mount tree will result in a loop in the
mount tree after the mounts are detached and then reattached to one
another.  Resulting in all kinds of ugliness including a leak of that
mounts involved in the leak of the mount loop.

Prevent this problem by ensuring that the new mount is reachable from
the current root of the mount tree.

[Added stable cc.  Fixes CVE-2014-7970.  --Andy]

Reported-by: Andy Lutomirski <luto@amacapital.net>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/87bnpmihks.fsf@x220.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/namespace.c b/fs/namespace.c
index 7f6a9348c589..154822397780 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2696,6 +2696,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	/* make sure we can reach put_old from new_root */
 	if (!is_path_reachable(old_mnt, old.dentry, &new))
 		goto out4;
+	/* make certain new is below the root */
+	if (!is_path_reachable(new_mnt, new.dentry, &root))
+		goto out4;
 	root_mp->m_count++; /* pin it so it won't go away */
 	br_write_lock(&vfsmount_lock);
 	detach_mnt(new_mnt, &parent_path);

From 003f558269ba36d7b3b84c7dde9a77dd3646d0a3 Mon Sep 17 00:00:00 2001
From: Chris Ball <chris@printf.net>
Date: Thu, 4 Sep 2014 17:11:53 +0100
Subject: [PATCH 1017/1185] mfd: rtsx_pcr: Fix MSI enable error handling

commit 5152970538a5e16c03bbcb9f1c780489a795ed40 upstream.

pci_enable_msi() can return failure with both positive and negative
integers -- it returns 0 for success -- but is only tested here for
"if (ret < 0)".  This causes us to try to use MSI on the RTS5249 SD
reader in the Dell XPS 11 when enabling MSI failed, causing:

[    1.737110] rtsx_pci: probe of 0000:05:00.0 failed with error -110

Reported-by: D. Jared Dominguez <Jared_Dominguez@Dell.com>
Tested-by: D. Jared Dominguez <Jared_Dominguez@Dell.com>
Signed-off-by: Chris Ball <chris@printf.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/rtsx_pcr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 45f26be359ea..7e28bd0de554 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -1137,7 +1137,7 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
 	pcr->msi_en = msi_en;
 	if (pcr->msi_en) {
 		ret = pci_enable_msi(pcidev);
-		if (ret < 0)
+		if (ret)
 			pcr->msi_en = false;
 	}
 

From 1cde964e47f70c89e84618884a1258e325f9b856 Mon Sep 17 00:00:00 2001
From: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Date: Sun, 12 Oct 2014 23:09:08 -0400
Subject: [PATCH 1018/1185] pstore: Fix duplicate {console,ftrace}-efi entries

commit d4bf205da618bbd0b038e404d646f14e76915718 upstream.

The pstore filesystem still creates duplicate filename/inode pairs for
some pstore types.  Add the id to the filename to prevent that.

Before patch:

[/sys/fs/pstore] ls -li
total 0
1250 -r--r--r--. 1 root root 67 Sep 29 17:09 console-efi
1250 -r--r--r--. 1 root root 67 Sep 29 17:09 console-efi
1250 -r--r--r--. 1 root root 67 Sep 29 17:09 console-efi
1250 -r--r--r--. 1 root root 67 Sep 29 17:09 console-efi
1250 -r--r--r--. 1 root root 67 Sep 29 17:09 console-efi
1250 -r--r--r--. 1 root root 67 Sep 29 17:09 console-efi
1250 -r--r--r--. 1 root root 67 Sep 29 17:09 console-efi
1250 -r--r--r--. 1 root root 67 Sep 29 17:09 console-efi
1250 -r--r--r--. 1 root root 67 Sep 29 17:09 console-efi

After:

[/sys/fs/pstore] ls -li
total 0
1232 -r--r--r--. 1 root root 148 Sep 29 17:09 console-efi-141202499100000
1231 -r--r--r--. 1 root root  67 Sep 29 17:09 console-efi-141202499200000
1230 -r--r--r--. 1 root root 148 Sep 29 17:44 console-efi-141202705400000
1229 -r--r--r--. 1 root root  67 Sep 29 17:44 console-efi-141202705500000
1228 -r--r--r--. 1 root root  67 Sep 29 20:42 console-efi-141203772600000
1227 -r--r--r--. 1 root root 148 Sep 29 23:42 console-efi-141204854900000
1226 -r--r--r--. 1 root root  67 Sep 29 23:42 console-efi-141204855000000
1225 -r--r--r--. 1 root root 148 Sep 29 23:59 console-efi-141204954200000
1224 -r--r--r--. 1 root root  67 Sep 29 23:59 console-efi-141204954400000

Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/pstore/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index e4bcb2cf055a..3ba30825f387 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -316,10 +316,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
 		sprintf(name, "dmesg-%s-%lld", psname, id);
 		break;
 	case PSTORE_TYPE_CONSOLE:
-		sprintf(name, "console-%s", psname);
+		sprintf(name, "console-%s-%lld", psname, id);
 		break;
 	case PSTORE_TYPE_FTRACE:
-		sprintf(name, "ftrace-%s", psname);
+		sprintf(name, "ftrace-%s-%lld", psname, id);
 		break;
 	case PSTORE_TYPE_MCE:
 		sprintf(name, "mce-%s-%lld", psname, id);

From e38e049b0eaa19affedf3be7f3b569ebe3b5e72b Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Mon, 6 Oct 2014 16:32:52 -0400
Subject: [PATCH 1019/1185] selinux: fix inode security list corruption

commit 923190d32de4428afbea5e5773be86bea60a9925 upstream.

sb_finish_set_opts() can race with inode_free_security()
when initializing inode security structures for inodes
created prior to initial policy load or by the filesystem
during ->mount().   This appears to have always been
a possible race, but commit 3dc91d4 ("SELinux:  Fix possible
NULL pointer dereference in selinux_inode_permission()")
made it more evident by immediately reusing the unioned
list/rcu element  of the inode security structure for call_rcu()
upon an inode_free_security().  But the underlying issue
was already present before that commit as a possible use-after-free
of isec.

Shivnandan Kumar reported the list corruption and proposed
a patch to split the list and rcu elements out of the union
as separate fields of the inode_security_struct so that setting
the rcu element would not affect the list element.  However,
this would merely hide the issue and not truly fix the code.

This patch instead moves up the deletion of the list entry
prior to dropping the sbsec->isec_lock initially.  Then,
if the inode is dropped subsequently, there will be no further
references to the isec.

Reported-by: Shivnandan Kumar <shivnandan.k@samsung.com>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <pmoore@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/selinux/hooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 70d4a8a7f21c..fdd6e4f8be39 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -437,6 +437,7 @@ static int sb_finish_set_opts(struct super_block *sb)
 				list_entry(sbsec->isec_head.next,
 					   struct inode_security_struct, list);
 		struct inode *inode = isec->inode;
+		list_del_init(&isec->list);
 		spin_unlock(&sbsec->isec_lock);
 		inode = igrab(inode);
 		if (inode) {
@@ -445,7 +446,6 @@ static int sb_finish_set_opts(struct super_block *sb)
 			iput(inode);
 		}
 		spin_lock(&sbsec->isec_lock);
-		list_del_init(&isec->list);
 		goto next_inode;
 	}
 	spin_unlock(&sbsec->isec_lock);

From d96f66edf7efb2b7e396d20891a13e677624c085 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 14 Oct 2014 10:40:29 +1030
Subject: [PATCH 1020/1185] virtio_pci: fix virtio spec compliance on restore

commit 6fbc198cf623944ab60a1db6d306a4d55cdd820d upstream.

On restore, virtio pci does the following:
+ set features
+ init vqs etc - device can be used at this point!
+ set ACKNOWLEDGE,DRIVER and DRIVER_OK status bits

This is in violation of the virtio spec, which
requires the following order:
- ACKNOWLEDGE
- DRIVER
- init vqs
- DRIVER_OK

This behaviour will break with hypervisors that assume spec compliant
behaviour.  It seems like a good idea to have this patch applied to
stable branches to reduce the support butden for the hypervisors.

Cc: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/virtio/virtio_pci.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index a7ce73029f59..933241a6ab10 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -791,6 +791,7 @@ static int virtio_pci_restore(struct device *dev)
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 	struct virtio_driver *drv;
+	unsigned status = 0;
 	int ret;
 
 	drv = container_of(vp_dev->vdev.dev.driver,
@@ -801,14 +802,40 @@ static int virtio_pci_restore(struct device *dev)
 		return ret;
 
 	pci_set_master(pci_dev);
+	/* We always start by resetting the device, in case a previous
+	 * driver messed it up. */
+	vp_reset(&vp_dev->vdev);
+
+	/* Acknowledge that we've seen the device. */
+	status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
+	vp_set_status(&vp_dev->vdev, status);
+
+	/* Maybe driver failed before freeze.
+	 * Restore the failed status, for debugging. */
+	status |= vp_dev->saved_status & VIRTIO_CONFIG_S_FAILED;
+	vp_set_status(&vp_dev->vdev, status);
+
+	if (!drv)
+		return 0;
+
+	/* We have a driver! */
+	status |= VIRTIO_CONFIG_S_DRIVER;
+	vp_set_status(&vp_dev->vdev, status);
+
 	vp_finalize_features(&vp_dev->vdev);
 
-	if (drv && drv->restore)
+	if (drv->restore) {
 		ret = drv->restore(&vp_dev->vdev);
+		if (ret) {
+			status |= VIRTIO_CONFIG_S_FAILED;
+			vp_set_status(&vp_dev->vdev, status);
+			return ret;
+		}
+	}
 
 	/* Finally, tell the device we're all set */
-	if (!ret)
-		vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+	status |= VIRTIO_CONFIG_S_DRIVER_OK;
+	vp_set_status(&vp_dev->vdev, status);
 
 	return ret;
 }

From 3f626317894a923edea99de4f18590c008375890 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Tue, 30 Sep 2014 09:32:46 +0100
Subject: [PATCH 1021/1185] dm bufio: update last_accessed when relinking a
 buffer

commit eb76faf53b1ff7a77ce3f78cc98ad392ac70c2a0 upstream.

The 'last_accessed' member of the dm_buffer structure was only set when
the the buffer was created.  This led to each buffer being discarded
after dm_bufio_max_age time even if it was used recently.  In practice
this resulted in all thinp metadata being evicted soon after being read
-- this is particularly problematic for metadata intensive workloads
like multithreaded small random IO.

'last_accessed' is now updated each time the buffer is moved to the head
of the LRU list, so the buffer is now properly discarded if it was not
used in dm_bufio_max_age time.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-bufio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index a6e985fcceb8..c9b4ca9e0696 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -462,6 +462,7 @@ static void __relink_lru(struct dm_buffer *b, int dirty)
 	c->n_buffers[dirty]++;
 	b->list_mode = dirty;
 	list_move(&b->lru_list, &c->lru[dirty]);
+	b->last_accessed = jiffies;
 }
 
 /*----------------------------------------------------------------

From 29772e3e199f2c13cd5f00694e633fb80bbb0415 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 18 Sep 2014 16:49:41 +0200
Subject: [PATCH 1022/1185] drbd: compute the end before rb_insert_augmented()

commit 82cfb90bc99d7b7e0ec62d0505b9d4f06805d5db upstream.

Commit 98683650 "Merge branch 'drbd-8.4_ed6' into
for-3.8-drivers-drbd-8.4_ed6" switches to the new augment API, but the
new API requires that the tree is augmented before rb_insert_augmented()
is called, which is missing.

So we add the augment-code to drbd_insert_interval() when it travels the
tree up to down before rb_insert_augmented().  See the example in
include/linux/interval_tree_generic.h or Documentation/rbtree.txt.

drbd_insert_interval() may cancel the insertion when traveling, in this
case, the just added augment-code does nothing before cancel since the
@this node is already in the subtrees in this case.

CC: Michel Lespinasse <walken@google.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/drbd/drbd_interval.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c
index 89c497c630b4..04a14e0f8878 100644
--- a/drivers/block/drbd/drbd_interval.c
+++ b/drivers/block/drbd/drbd_interval.c
@@ -79,6 +79,7 @@ bool
 drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
 {
 	struct rb_node **new = &root->rb_node, *parent = NULL;
+	sector_t this_end = this->sector + (this->size >> 9);
 
 	BUG_ON(!IS_ALIGNED(this->size, 512));
 
@@ -87,6 +88,8 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
 			rb_entry(*new, struct drbd_interval, rb);
 
 		parent = *new;
+		if (here->end < this_end)
+			here->end = this_end;
 		if (this->sector < here->sector)
 			new = &(*new)->rb_left;
 		else if (this->sector > here->sector)
@@ -99,6 +102,7 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
 			return false;
 	}
 
+	this->end = this_end;
 	rb_link_node(&this->rb, parent, new);
 	rb_insert_augmented(&this->rb, root, &augment_callbacks);
 	return true;

From a63bea06c1617175c68677da4810dfd120da2bd5 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 8 Oct 2014 18:26:13 -0400
Subject: [PATCH 1023/1185] block: fix alignment_offset math that assumes
 io_min is a power-of-2

commit b8839b8c55f3fdd60dc36abcda7e0266aff7985c upstream.

The math in both blk_stack_limits() and queue_limit_alignment_offset()
assume that a block device's io_min (aka minimum_io_size) is always a
power-of-2.  Fix the math such that it works for non-power-of-2 io_min.

This issue (of alignment_offset != 0) became apparent when testing
dm-thinp with a thinp blocksize that matches a RAID6 stripesize of
1280K.  Commit fdfb4c8c1 ("dm thin: set minimum_io_size to pool's data
block size") unlocked the potential for alignment_offset != 0 due to
the dm-thin-pool's io_min possibly being a non-power-of-2.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/blk-settings.c   | 4 ++--
 include/linux/blkdev.h | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 53309333c2f0..ec00a0f75212 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -553,7 +553,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		bottom = max(b->physical_block_size, b->io_min) + alignment;
 
 		/* Verify that top and bottom intervals line up */
-		if (max(top, bottom) & (min(top, bottom) - 1)) {
+		if (max(top, bottom) % min(top, bottom)) {
 			t->misaligned = 1;
 			ret = -1;
 		}
@@ -594,7 +594,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
 	/* Find lowest common alignment_offset */
 	t->alignment_offset = lcm(t->alignment_offset, alignment)
-		& (max(t->physical_block_size, t->io_min) - 1);
+		% max(t->physical_block_size, t->io_min);
 
 	/* Verify that new alignment_offset is on a logical block boundary */
 	if (t->alignment_offset & (t->logical_block_size - 1)) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2fdb4a451b49..494d228a91dd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1187,10 +1187,9 @@ static inline int queue_alignment_offset(struct request_queue *q)
 static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
 {
 	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
-	unsigned int alignment = (sector << 9) & (granularity - 1);
+	unsigned int alignment = sector_div(sector, granularity >> 9) << 9;
 
-	return (granularity + lim->alignment_offset - alignment)
-		& (granularity - 1);
+	return (granularity + lim->alignment_offset - alignment) % granularity;
 }
 
 static inline int bdev_alignment_offset(struct block_device *bdev)

From fdef68fb1be8fe1e02f2029b25e4a87211d884e5 Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Wed, 1 Oct 2014 22:58:35 +0200
Subject: [PATCH 1024/1185] dm log userspace: fix memory leak in
 dm_ulog_tfr_init failure path

commit 56ec16cb1e1ce46354de8511eef962a417c32c92 upstream.

If cn_add_callback() fails in dm_ulog_tfr_init(), it does not
deallocate prealloced memory but calls cn_del_callback().

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Reviewed-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-log-userspace-transfer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 08d9a207259a..c69d0b787746 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -272,7 +272,7 @@ int dm_ulog_tfr_init(void)
 
 	r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback);
 	if (r) {
-		cn_del_callback(&ulog_cn_id);
+		kfree(prealloced_cn_msg);
 		return r;
 	}
 

From 5ade16953067e758d0ea86f133f74319a851328e Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 14 Oct 2014 02:51:39 +1030
Subject: [PATCH 1025/1185] modules, lock around setting of
 MODULE_STATE_UNFORMED

commit d3051b489aa81ca9ba62af366149ef42b8dae97c upstream.

A panic was seen in the following sitation.

There are two threads running on the system. The first thread is a system
monitoring thread that is reading /proc/modules. The second thread is
loading and unloading a module (in this example I'm using my simple
dummy-module.ko).  Note, in the "real world" this occurred with the qlogic
driver module.

When doing this, the following panic occurred:

 ------------[ cut here ]------------
 kernel BUG at kernel/module.c:3739!
 invalid opcode: 0000 [#1] SMP
 Modules linked in: binfmt_misc sg nfsv3 rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache intel_powerclamp coretemp kvm_intel kvm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel lrw igb gf128mul glue_helper iTCO_wdt iTCO_vendor_support ablk_helper ptp sb_edac cryptd pps_core edac_core shpchp i2c_i801 pcspkr wmi lpc_ich ioatdma mfd_core dca ipmi_si nfsd ipmi_msghandler auth_rpcgss nfs_acl lockd sunrpc xfs libcrc32c sr_mod cdrom sd_mod crc_t10dif crct10dif_common mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit drm_kms_helper ttm isci drm libsas ahci libahci scsi_transport_sas libata i2c_core dm_mirror dm_region_hash dm_log dm_mod [last unloaded: dummy_module]
 CPU: 37 PID: 186343 Comm: cat Tainted: GF          O--------------   3.10.0+ #7
 Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013
 task: ffff8807fd2d8000 ti: ffff88080fa7c000 task.ti: ffff88080fa7c000
 RIP: 0010:[<ffffffff810d64c5>]  [<ffffffff810d64c5>] module_flags+0xb5/0xc0
 RSP: 0018:ffff88080fa7fe18  EFLAGS: 00010246
 RAX: 0000000000000003 RBX: ffffffffa03b5200 RCX: 0000000000000000
 RDX: 0000000000001000 RSI: ffff88080fa7fe38 RDI: ffffffffa03b5000
 RBP: ffff88080fa7fe28 R08: 0000000000000010 R09: 0000000000000000
 R10: 0000000000000000 R11: 000000000000000f R12: ffffffffa03b5000
 R13: ffffffffa03b5008 R14: ffffffffa03b5200 R15: ffffffffa03b5000
 FS:  00007f6ae57ef740(0000) GS:ffff88101e7a0000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000404f70 CR3: 0000000ffed48000 CR4: 00000000001407e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
 Stack:
  ffffffffa03b5200 ffff8810101e4800 ffff88080fa7fe70 ffffffff810d666c
  ffff88081e807300 000000002e0f2fbf 0000000000000000 ffff88100f257b00
  ffffffffa03b5008 ffff88080fa7ff48 ffff8810101e4800 ffff88080fa7fee0
 Call Trace:
  [<ffffffff810d666c>] m_show+0x19c/0x1e0
  [<ffffffff811e4d7e>] seq_read+0x16e/0x3b0
  [<ffffffff812281ed>] proc_reg_read+0x3d/0x80
  [<ffffffff811c0f2c>] vfs_read+0x9c/0x170
  [<ffffffff811c1a58>] SyS_read+0x58/0xb0
  [<ffffffff81605829>] system_call_fastpath+0x16/0x1b
 Code: 48 63 c2 83 c2 01 c6 04 03 29 48 63 d2 eb d9 0f 1f 80 00 00 00 00 48 63 d2 c6 04 13 2d 41 8b 0c 24 8d 50 02 83 f9 01 75 b2 eb cb <0f> 0b 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41
 RIP  [<ffffffff810d64c5>] module_flags+0xb5/0xc0
  RSP <ffff88080fa7fe18>

    Consider the two processes running on the system.

    CPU 0 (/proc/modules reader)
    CPU 1 (loading/unloading module)

    CPU 0 opens /proc/modules, and starts displaying data for each module by
    traversing the modules list via fs/seq_file.c:seq_open() and
    fs/seq_file.c:seq_read().  For each module in the modules list, seq_read
    does

            op->start()  <-- this is a pointer to m_start()
            op->show()   <- this is a pointer to m_show()
            op->stop()   <-- this is a pointer to m_stop()

    The m_start(), m_show(), and m_stop() module functions are defined in
    kernel/module.c. The m_start() and m_stop() functions acquire and release
    the module_mutex respectively.

    ie) When reading /proc/modules, the module_mutex is acquired and released
    for each module.

    m_show() is called with the module_mutex held.  It accesses the module
    struct data and attempts to write out module data.  It is in this code
    path that the above BUG_ON() warning is encountered, specifically m_show()
    calls

    static char *module_flags(struct module *mod, char *buf)
    {
            int bx = 0;

            BUG_ON(mod->state == MODULE_STATE_UNFORMED);
    ...

    The other thread, CPU 1, in unloading the module calls the syscall
    delete_module() defined in kernel/module.c.  The module_mutex is acquired
    for a short time, and then released.  free_module() is called without the
    module_mutex.  free_module() then sets mod->state = MODULE_STATE_UNFORMED,
    also without the module_mutex.  Some additional code is called and then the
    module_mutex is reacquired to remove the module from the modules list:

        /* Now we can delete it from the lists */
        mutex_lock(&module_mutex);
        stop_machine(__unlink_module, mod, NULL);
        mutex_unlock(&module_mutex);

This is the sequence of events that leads to the panic.

CPU 1 is removing dummy_module via delete_module().  It acquires the
module_mutex, and then releases it.  CPU 1 has NOT set dummy_module->state to
MODULE_STATE_UNFORMED yet.

CPU 0, which is reading the /proc/modules, acquires the module_mutex and
acquires a pointer to the dummy_module which is still in the modules list.
CPU 0 calls m_show for dummy_module.  The check in m_show() for
MODULE_STATE_UNFORMED passed for dummy_module even though it is being
torn down.

Meanwhile CPU 1, which has been continuing to remove dummy_module without
holding the module_mutex, now calls free_module() and sets
dummy_module->state to MODULE_STATE_UNFORMED.

CPU 0 now calls module_flags() with dummy_module and ...

static char *module_flags(struct module *mod, char *buf)
{
        int bx = 0;

        BUG_ON(mod->state == MODULE_STATE_UNFORMED);

and BOOM.

Acquire and release the module_mutex lock around the setting of
MODULE_STATE_UNFORMED in the teardown path, which should resolve the
problem.

Testing: In the unpatched kernel I can panic the system within 1 minute by
doing

while (true) do insmod dummy_module.ko; rmmod dummy_module.ko; done

and

while (true) do cat /proc/modules; done

in separate terminals.

In the patched kernel I was able to run just over one hour without seeing
any issues.  I also verified the output of panic via sysrq-c and the output
of /proc/modules looks correct for all three states for the dummy_module.

        dummy_module 12661 0 - Unloading 0xffffffffa03a5000 (OE-)
        dummy_module 12661 0 - Live 0xffffffffa03bb000 (OE)
        dummy_module 14015 1 - Loading 0xffffffffa03a5000 (OE+)

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/module.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/module.c b/kernel/module.c
index 10a3af821d28..61fb677211cb 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1866,7 +1866,9 @@ static void free_module(struct module *mod)
 
 	/* We leave it in list to prevent duplicate loads, but make sure
 	 * that noone uses it while it's being deconstructed. */
+	mutex_lock(&module_mutex);
 	mod->state = MODULE_STATE_UNFORMED;
+	mutex_unlock(&module_mutex);
 
 	/* Remove dynamic debug info */
 	ddebug_remove_module(mod->name);

From a276227fc44ed806f6e3f5d6e27339a526838876 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 16 Sep 2014 12:40:26 -0400
Subject: [PATCH 1026/1185] framebuffer: fix border color

commit f74a289b9480648a654e5afd8458c2263c03a1e1 upstream.

The framebuffer code uses the current background color to fill the border
when switching consoles, however, this results in inconsistent behavior.
For example:
- start Midnigh Commander
- the border is black
- switch to another console and switch back
- the border is cyan
- type something into the command line in mc
- the border is cyan
- switch to another console and switch back
- the border is black
- press F9 to go to menu
- the border is black
- switch to another console and switch back
- the border is dark blue

When switching to a console with Midnight Commander, the border is random
color that was left selected by the slang subsystem.

This patch fixes this inconsistency by always using black as the
background color when switching consoles.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/console/bitblit.c   | 3 +--
 drivers/video/console/fbcon_ccw.c | 3 +--
 drivers/video/console/fbcon_cw.c  | 3 +--
 drivers/video/console/fbcon_ud.c  | 3 +--
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/video/console/bitblit.c b/drivers/video/console/bitblit.c
index 61b182bf32a2..dbfe4eecf12e 100644
--- a/drivers/video/console/bitblit.c
+++ b/drivers/video/console/bitblit.c
@@ -205,7 +205,6 @@ static void bit_putcs(struct vc_data *vc, struct fb_info *info,
 static void bit_clear_margins(struct vc_data *vc, struct fb_info *info,
 			      int bottom_only)
 {
-	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
 	unsigned int cw = vc->vc_font.width;
 	unsigned int ch = vc->vc_font.height;
 	unsigned int rw = info->var.xres - (vc->vc_cols*cw);
@@ -214,7 +213,7 @@ static void bit_clear_margins(struct vc_data *vc, struct fb_info *info,
 	unsigned int bs = info->var.yres - bh;
 	struct fb_fillrect region;
 
-	region.color = attr_bgcol_ec(bgshift, vc, info);
+	region.color = 0;
 	region.rop = ROP_COPY;
 
 	if (rw && !bottom_only) {
diff --git a/drivers/video/console/fbcon_ccw.c b/drivers/video/console/fbcon_ccw.c
index 41b32ae23dac..5a3cbf6dff4d 100644
--- a/drivers/video/console/fbcon_ccw.c
+++ b/drivers/video/console/fbcon_ccw.c
@@ -197,9 +197,8 @@ static void ccw_clear_margins(struct vc_data *vc, struct fb_info *info,
 	unsigned int bh = info->var.xres - (vc->vc_rows*ch);
 	unsigned int bs = vc->vc_rows*ch;
 	struct fb_fillrect region;
-	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
 
-	region.color = attr_bgcol_ec(bgshift,vc,info);
+	region.color = 0;
 	region.rop = ROP_COPY;
 
 	if (rw && !bottom_only) {
diff --git a/drivers/video/console/fbcon_cw.c b/drivers/video/console/fbcon_cw.c
index a93670ef7f89..e7ee44db4e98 100644
--- a/drivers/video/console/fbcon_cw.c
+++ b/drivers/video/console/fbcon_cw.c
@@ -180,9 +180,8 @@ static void cw_clear_margins(struct vc_data *vc, struct fb_info *info,
 	unsigned int bh = info->var.xres - (vc->vc_rows*ch);
 	unsigned int rs = info->var.yres - rw;
 	struct fb_fillrect region;
-	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
 
-	region.color = attr_bgcol_ec(bgshift,vc,info);
+	region.color = 0;
 	region.rop = ROP_COPY;
 
 	if (rw && !bottom_only) {
diff --git a/drivers/video/console/fbcon_ud.c b/drivers/video/console/fbcon_ud.c
index ff0872c0498b..19e3714abfe8 100644
--- a/drivers/video/console/fbcon_ud.c
+++ b/drivers/video/console/fbcon_ud.c
@@ -227,9 +227,8 @@ static void ud_clear_margins(struct vc_data *vc, struct fb_info *info,
 	unsigned int rw = info->var.xres - (vc->vc_cols*cw);
 	unsigned int bh = info->var.yres - (vc->vc_rows*ch);
 	struct fb_fillrect region;
-	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
 
-	region.color = attr_bgcol_ec(bgshift,vc,info);
+	region.color = 0;
 	region.rop = ROP_COPY;
 
 	if (rw && !bottom_only) {

From 3b8bb8fbc67135d8fc5f63e4547241d7f342e747 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 11 Oct 2014 11:27:37 -0700
Subject: [PATCH 1027/1185] Input: i8042 - add noloop quirk for Asus X750LN

commit 9ff84a17302aeb8913ff244ecc0d8f9d219fecb5 upstream.

Without this the aux port does not get detected, and consequently the
touchpad will not work.

https://bugzilla.redhat.com/show_bug.cgi?id=1110011

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/serio/i8042-x86ia64io.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 1291673bd57e..40ff49489ff5 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -99,6 +99,12 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = {
 			DMI_MATCH(DMI_BOARD_VERSION, "REV 2.X"),
 		},
 	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X750LN"),
+		},
+	},
 	{
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),

From 55a72275d9c026e9ceae4260b42db595f50215f8 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 24 Oct 2014 14:55:24 -0700
Subject: [PATCH 1028/1185] Input: i8042 - quirks for Fujitsu Lifebook A544 and
 Lifebook AH544

commit 993b3a3f80a7842a48cd46c2b41e1b3ef6302468 upstream.

These models need i8042.notimeout, otherwise the touchpad will not work.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=69731
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1111138
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/serio/i8042-x86ia64io.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 40ff49489ff5..ce715b1bee46 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -614,6 +614,22 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv4 Notebook PC"),
 		},
 	},
+	{
+		/* Fujitsu A544 laptop */
+		/* https://bugzilla.redhat.com/show_bug.cgi?id=1111138 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK A544"),
+		},
+	},
+	{
+		/* Fujitsu AH544 laptop */
+		/* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK AH544"),
+		},
+	},
 	{
 		/* Fujitsu U574 laptop */
 		/* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */

From da1185cf703a9709fa83842d978e14fbf1b10a69 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 7 Oct 2014 19:04:58 +1100
Subject: [PATCH 1029/1185] drm/ast: Fix HW cursor image

commit 1e99cfa8de0f0879091e33cd65fd60418d006ad9 upstream.

The translation from the X driver to the KMS one typo'ed a couple
of array indices, causing the HW cursor to look weird (blocky with
leaking edge colors). This fixes it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/ast/ast_mode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 7fc9f7272b56..e8f6418b6dec 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -1012,8 +1012,8 @@ static u32 copy_cursor_image(u8 *src, u8 *dst, int width, int height)
 			srcdata32[1].ul = *((u32 *)(srcxor + 4)) & 0xf0f0f0f0;
 			data32.b[0] = srcdata32[0].b[1] | (srcdata32[0].b[0] >> 4);
 			data32.b[1] = srcdata32[0].b[3] | (srcdata32[0].b[2] >> 4);
-			data32.b[2] = srcdata32[0].b[1] | (srcdata32[1].b[0] >> 4);
-			data32.b[3] = srcdata32[0].b[3] | (srcdata32[1].b[2] >> 4);
+			data32.b[2] = srcdata32[1].b[1] | (srcdata32[1].b[0] >> 4);
+			data32.b[3] = srcdata32[1].b[3] | (srcdata32[1].b[2] >> 4);
 
 			writel(data32.ul, dstxor);
 			csum += data32.ul;

From a3cb445501d51791746af4066e480650d85d2f97 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Date: Tue, 2 Sep 2014 09:51:15 -0300
Subject: [PATCH 1030/1185] drm/tilcdc: Fix the error path in tilcdc_load()

commit b478e336b3e75505707a11e78ef8b964ef0a03af upstream.

The current error path calls tilcdc_unload() in case of an error to release
the resources. However, this is wrong because not all resources have been
allocated by the time an error occurs in tilcdc_load().

To fix it, this commit adds proper labels to bail out at the different
stages in the load function, and release only the resources actually allocated.

Tested-by: Darren Etheridge <detheridge@ti.com>
Tested-by: Johannes Pointner <johannes.pointner@br-automation.com>
Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Fixes: 3a49012224ca ("drm/tilcdc: panel: fix leak when unloading the module")
Signed-off-by: Matwey V. Kornilov <matwey.kornilov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/tilcdc/tilcdc_drv.c | 60 ++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index f060b7487c34..f5ddd3550796 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -78,6 +78,7 @@ static int modeset_init(struct drm_device *dev)
 	if ((priv->num_encoders == 0) || (priv->num_connectors == 0)) {
 		/* oh nos! */
 		dev_err(dev->dev, "no encoders/connectors found\n");
+		drm_mode_config_cleanup(dev);
 		return -ENXIO;
 	}
 
@@ -170,33 +171,37 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 	dev->dev_private = priv;
 
 	priv->wq = alloc_ordered_workqueue("tilcdc", 0);
+	if (!priv->wq) {
+		ret = -ENOMEM;
+		goto fail_free_priv;
+	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		dev_err(dev->dev, "failed to get memory resource\n");
 		ret = -EINVAL;
-		goto fail;
+		goto fail_free_wq;
 	}
 
 	priv->mmio = ioremap_nocache(res->start, resource_size(res));
 	if (!priv->mmio) {
 		dev_err(dev->dev, "failed to ioremap\n");
 		ret = -ENOMEM;
-		goto fail;
+		goto fail_free_wq;
 	}
 
 	priv->clk = clk_get(dev->dev, "fck");
 	if (IS_ERR(priv->clk)) {
 		dev_err(dev->dev, "failed to get functional clock\n");
 		ret = -ENODEV;
-		goto fail;
+		goto fail_iounmap;
 	}
 
 	priv->disp_clk = clk_get(dev->dev, "dpll_disp_ck");
 	if (IS_ERR(priv->clk)) {
 		dev_err(dev->dev, "failed to get display clock\n");
 		ret = -ENODEV;
-		goto fail;
+		goto fail_put_clk;
 	}
 
 #ifdef CONFIG_CPU_FREQ
@@ -206,7 +211,7 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 			CPUFREQ_TRANSITION_NOTIFIER);
 	if (ret) {
 		dev_err(dev->dev, "failed to register cpufreq notifier\n");
-		goto fail;
+		goto fail_put_disp_clk;
 	}
 #endif
 
@@ -238,13 +243,13 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 	ret = modeset_init(dev);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to initialize mode setting\n");
-		goto fail;
+		goto fail_cpufreq_unregister;
 	}
 
 	ret = drm_vblank_init(dev, 1);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to initialize vblank\n");
-		goto fail;
+		goto fail_mode_config_cleanup;
 	}
 
 	pm_runtime_get_sync(dev->dev);
@@ -252,7 +257,7 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 	pm_runtime_put_sync(dev->dev);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to install IRQ handler\n");
-		goto fail;
+		goto fail_vblank_cleanup;
 	}
 
 	platform_set_drvdata(pdev, dev);
@@ -260,13 +265,48 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 	priv->fbdev = drm_fbdev_cma_init(dev, 16,
 			dev->mode_config.num_crtc,
 			dev->mode_config.num_connector);
+	if (IS_ERR(priv->fbdev)) {
+		ret = PTR_ERR(priv->fbdev);
+		goto fail_irq_uninstall;
+	}
 
 	drm_kms_helper_poll_init(dev);
 
 	return 0;
 
-fail:
-	tilcdc_unload(dev);
+fail_irq_uninstall:
+	pm_runtime_get_sync(dev->dev);
+	drm_irq_uninstall(dev);
+	pm_runtime_put_sync(dev->dev);
+
+fail_vblank_cleanup:
+	drm_vblank_cleanup(dev);
+
+fail_mode_config_cleanup:
+	drm_mode_config_cleanup(dev);
+
+fail_cpufreq_unregister:
+	pm_runtime_disable(dev->dev);
+#ifdef CONFIG_CPU_FREQ
+	cpufreq_unregister_notifier(&priv->freq_transition,
+			CPUFREQ_TRANSITION_NOTIFIER);
+fail_put_disp_clk:
+	clk_put(priv->disp_clk);
+#endif
+
+fail_put_clk:
+	clk_put(priv->clk);
+
+fail_iounmap:
+	iounmap(priv->mmio);
+
+fail_free_wq:
+	flush_workqueue(priv->wq);
+	destroy_workqueue(priv->wq);
+
+fail_free_priv:
+	dev->dev_private = NULL;
+	kfree(priv);
 	return ret;
 }
 

From 3b163cb4c566c773d049648ec8da752d37f5c200 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 8 Sep 2014 10:33:32 +1000
Subject: [PATCH 1031/1185] drm/nouveau/bios: memset dcb struct to zero before
 parsing

commit 595d373f1e9c9ce0fc946457fdb488e8a58972cd upstream.

Fixes type/mask calculation being based on uninitialised data for VGA
outputs.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c b/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c
index 2d9b9d7a7992..f3edd2841f2d 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c
@@ -124,6 +124,7 @@ dcb_outp_parse(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *len,
 	       struct dcb_output *outp)
 {
 	u16 dcb = dcb_outp(bios, idx, ver, len);
+	memset(outp, 0x00, sizeof(*outp));
 	if (dcb) {
 		if (*ver >= 0x20) {
 			u32 conn = nv_ro32(bios, dcb + 0x00);

From ed070066305551c2bf71b0737fcc5fe3e136aed6 Mon Sep 17 00:00:00 2001
From: Maciej Matraszek <m.matraszek@samsung.com>
Date: Mon, 15 Sep 2014 05:14:48 -0300
Subject: [PATCH 1032/1185] media: v4l2-common: fix overflow in
 v4l_bound_align_image()

commit 3bacc10cd4a85bc70bc0b6c001d3bf995c7fe04c upstream.

Fix clamp_align() used in v4l_bound_align_image() to prevent overflow
when passed large value like UINT32_MAX.

 In the current implementation:
    clamp_align(UINT32_MAX, 8, 8192, 3)

returns 8, because in line:

    x = (x + (1 << (align - 1))) & mask;

x overflows to (-1 + 4) & 0x7 = 3, while expected value is 8192.

v4l_bound_align_image() is heavily used in VIDIOC_S_FMT and
VIDIOC_SUBDEV_S_FMT ioctls handlers, and documentation of the latter
explicitly states that:

"The modified format should be as close as possible to the original
request."
  -- http://linuxtv.org/downloads/v4l-dvb-apis/vidioc-subdev-g-fmt.html

Thus one would expect, that passing UINT32_MAX as format width and
height will result in setting maximum possible resolution for the
device. Particularly, when the driver doesn't support
VIDIOC_ENUM_FRAMESIZES ioctl, which is common in the codebase.

Fixes changeset: b0d3159be9a3

Signed-off-by: Maciej Matraszek <m.matraszek@samsung.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/v4l2-common.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c
index 3fed63f4e026..ec9a4fa3bc86 100644
--- a/drivers/media/v4l2-core/v4l2-common.c
+++ b/drivers/media/v4l2-core/v4l2-common.c
@@ -485,16 +485,13 @@ static unsigned int clamp_align(unsigned int x, unsigned int min,
 	/* Bits that must be zero to be aligned */
 	unsigned int mask = ~((1 << align) - 1);
 
+	/* Clamp to aligned min and max */
+	x = clamp(x, (min + ~mask) & mask, max & mask);
+
 	/* Round to nearest aligned value */
 	if (align)
 		x = (x + (1 << (align - 1))) & mask;
 
-	/* Clamp to aligned value of min and max */
-	if (x < min)
-		x = (min + ~mask) & mask;
-	else if (x > max)
-		x = max & mask;
-
 	return x;
 }
 

From 7dbdd9018603c417a64736414262a4e9b6203390 Mon Sep 17 00:00:00 2001
From: Frank Schaefer <fschaefer.oss@googlemail.com>
Date: Sat, 9 Aug 2014 06:37:20 -0300
Subject: [PATCH 1033/1185] media: em28xx-v4l: give back all active video
 buffers to the vb2 core properly on streaming stop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 627530c32a43283474e9dd3e954519410ffa033a upstream.

When a new video frame is started, the driver takes the next video buffer from
the list of active buffers and moves it to dev->usb_ctl.vid_buf / dev->usb_ctl.vbi_buf
for further processing.

On streaming stop we currently only give back the pending buffers from the list
but not the ones which are currently processed.

This causes the following warning from the vb2 core since kernel 3.15:

...
 ------------[ cut here ]------------
 WARNING: CPU: 1 PID: 2284 at drivers/media/v4l2-core/videobuf2-core.c:2115 __vb2_queue_cancel+0xed/0x150 [videobuf2_core]()
 [...]
 Call Trace:
  [<c0769c46>] dump_stack+0x48/0x69
  [<c0245b69>] warn_slowpath_common+0x79/0x90
  [<f925e4ad>] ? __vb2_queue_cancel+0xed/0x150 [videobuf2_core]
  [<f925e4ad>] ? __vb2_queue_cancel+0xed/0x150 [videobuf2_core]
  [<c0245bfd>] warn_slowpath_null+0x1d/0x20
  [<f925e4ad>] __vb2_queue_cancel+0xed/0x150 [videobuf2_core]
  [<f925fa35>] vb2_internal_streamoff+0x35/0x90 [videobuf2_core]
  [<f925fac5>] vb2_streamoff+0x35/0x60 [videobuf2_core]
  [<f925fb27>] vb2_ioctl_streamoff+0x37/0x40 [videobuf2_core]
  [<f8e45895>] v4l_streamoff+0x15/0x20 [videodev]
  [<f8e4925d>] __video_do_ioctl+0x23d/0x2d0 [videodev]
  [<f8e49020>] ? video_ioctl2+0x20/0x20 [videodev]
  [<f8e48c63>] video_usercopy+0x203/0x5a0 [videodev]
  [<f8e49020>] ? video_ioctl2+0x20/0x20 [videodev]
  [<c039d0e7>] ? fsnotify+0x1e7/0x2b0
  [<f8e49012>] video_ioctl2+0x12/0x20 [videodev]
  [<f8e49020>] ? video_ioctl2+0x20/0x20 [videodev]
  [<f8e4461e>] v4l2_ioctl+0xee/0x130 [videodev]
  [<f8e44530>] ? v4l2_open+0xf0/0xf0 [videodev]
  [<c0378de2>] do_vfs_ioctl+0x2e2/0x4d0
  [<c0368eec>] ? vfs_write+0x13c/0x1c0
  [<c0369a8f>] ? vfs_writev+0x2f/0x50
  [<c0379028>] SyS_ioctl+0x58/0x80
  [<c076fff3>] sysenter_do_call+0x12/0x12
 ---[ end trace 5545f934409f13f4 ]---
...

Many thanks to Hans Verkuil, whose recently added check in the vb2 core unveiled
this long standing issue and who has investigated it further.

Signed-off-by: Frank Schäfer <fschaefer.oss@googlemail.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/em28xx/em28xx-video.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c
index 32d60e5546bc..a2737b4b090b 100644
--- a/drivers/media/usb/em28xx/em28xx-video.c
+++ b/drivers/media/usb/em28xx/em28xx-video.c
@@ -696,13 +696,16 @@ static int em28xx_stop_streaming(struct vb2_queue *vq)
 	}
 
 	spin_lock_irqsave(&dev->slock, flags);
+	if (dev->usb_ctl.vid_buf != NULL) {
+		vb2_buffer_done(&dev->usb_ctl.vid_buf->vb, VB2_BUF_STATE_ERROR);
+		dev->usb_ctl.vid_buf = NULL;
+	}
 	while (!list_empty(&vidq->active)) {
 		struct em28xx_buffer *buf;
 		buf = list_entry(vidq->active.next, struct em28xx_buffer, list);
 		list_del(&buf->list);
 		vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR);
 	}
-	dev->usb_ctl.vid_buf = NULL;
 	spin_unlock_irqrestore(&dev->slock, flags);
 
 	return 0;
@@ -724,13 +727,16 @@ int em28xx_stop_vbi_streaming(struct vb2_queue *vq)
 	}
 
 	spin_lock_irqsave(&dev->slock, flags);
+	if (dev->usb_ctl.vbi_buf != NULL) {
+		vb2_buffer_done(&dev->usb_ctl.vbi_buf->vb, VB2_BUF_STATE_ERROR);
+		dev->usb_ctl.vbi_buf = NULL;
+	}
 	while (!list_empty(&vbiq->active)) {
 		struct em28xx_buffer *buf;
 		buf = list_entry(vbiq->active.next, struct em28xx_buffer, list);
 		list_del(&buf->list);
 		vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR);
 	}
-	dev->usb_ctl.vbi_buf = NULL;
 	spin_unlock_irqrestore(&dev->slock, flags);
 
 	return 0;

From f879c8cce0b17a7939a96bb6d0967cc5aafc8dee Mon Sep 17 00:00:00 2001
From: Ulrich Eckhardt <uli-lirc@uli-eckhardt.de>
Date: Fri, 10 Oct 2014 14:19:12 -0300
Subject: [PATCH 1034/1185] media: ds3000: fix LNB supply voltage on Tevii S480
 on initialization

commit 8c5bcded11cb607b1bb5920de3b9c882136d27db upstream.

The Tevii S480 outputs 18V on startup for the LNB supply voltage and does not
automatically power down. This blocks other receivers connected
to a satellite channel router (EN50494), since the receivers can not send the
required DiSEqC sequences when the Tevii card is connected to a the same SCR.

This patch switches off the LNB supply voltage on initialization of the frontend.

[mchehab@osg.samsung.com: add a comment about why we're explicitly
 turning off voltage at device init]
Signed-off-by: Ulrich Eckhardt <uli@uli-eckhardt.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/dvb-frontends/ds3000.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/media/dvb-frontends/ds3000.c b/drivers/media/dvb-frontends/ds3000.c
index 1e344b033277..22e8c2032f6d 100644
--- a/drivers/media/dvb-frontends/ds3000.c
+++ b/drivers/media/dvb-frontends/ds3000.c
@@ -864,6 +864,13 @@ struct dvb_frontend *ds3000_attach(const struct ds3000_config *config,
 	memcpy(&state->frontend.ops, &ds3000_ops,
 			sizeof(struct dvb_frontend_ops));
 	state->frontend.demodulator_priv = state;
+
+	/*
+	 * Some devices like T480 starts with voltage on. Be sure
+	 * to turn voltage off during init, as this can otherwise
+	 * interfere with Unicable SCR systems.
+	 */
+	ds3000_set_voltage(&state->frontend, SEC_VOLTAGE_OFF);
 	return &state->frontend;
 
 error3:

From 6033d64297fac0c26284fe9b7ca8751906dcf277 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 8 Aug 2014 10:32:56 -0300
Subject: [PATCH 1035/1185] media: tda7432: Fix setting TDA7432_MUTE bit for
 TDA7432_RF register

commit 91ba0e59babdb3c7aca836a65f1095b3eaff7b06 upstream.

Fix a copy-paste bug when converting to the control framework.

Fixes: commit 5d478e0de871 ("[media] tda7432: convert to the control framework")

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/i2c/tda7432.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/i2c/tda7432.c b/drivers/media/i2c/tda7432.c
index 28b5121881f5..09f4387dbc49 100644
--- a/drivers/media/i2c/tda7432.c
+++ b/drivers/media/i2c/tda7432.c
@@ -293,7 +293,7 @@ static int tda7432_s_ctrl(struct v4l2_ctrl *ctrl)
 		if (t->mute->val) {
 			lf |= TDA7432_MUTE;
 			lr |= TDA7432_MUTE;
-			lf |= TDA7432_MUTE;
+			rf |= TDA7432_MUTE;
 			rr |= TDA7432_MUTE;
 		}
 		/* Mute & update balance*/

From 3ea61129fec62fbb7fba38e60d00e4f9d776cfa5 Mon Sep 17 00:00:00 2001
From: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Date: Fri, 17 Oct 2014 22:55:59 +0200
Subject: [PATCH 1036/1185] kvm: fix excessive pages un-pinning in
 kvm_iommu_map error path.

commit 3d32e4dbe71374a6780eaf51d719d76f9a9bf22f upstream.

The third parameter of kvm_unpin_pages() when called from
kvm_iommu_map_pages() is wrong, it should be the number of pages to un-pin
and not the page size.

This error was facilitated with an inconsistent API: kvm_pin_pages() takes
a size, but kvn_unpin_pages() takes a number of pages, so fix the problem
by matching the two.

This was introduced by commit 350b8bd ("kvm: iommu: fix the third parameter
of kvm_iommu_put_pages (CVE-2014-3601)"), which fixes the lack of
un-pinning for pages intended to be un-pinned (i.e. memory leak) but
unfortunately potentially aggravated the number of pages we un-pin that
should have stayed pinned. As far as I understand though, the same
practical mitigations apply.

This issue was found during review of Red Hat 6.6 patches to prepare
Ksplice rebootless updates.

Thanks to Vegard for his time on a late Friday evening to help me in
understanding this code.

Fixes: 350b8bd ("kvm: iommu: fix the third parameter of... (CVE-2014-3601)")
Signed-off-by: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Jamie Iles <jamie.iles@oracle.com>
Reviewed-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/iommu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index dec997188dfb..a650aa48c786 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 				gfn_t base_gfn, unsigned long npages);
 
 static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
-			   unsigned long size)
+			   unsigned long npages)
 {
 	gfn_t end_gfn;
 	pfn_t pfn;
 
 	pfn     = gfn_to_pfn_memslot(slot, gfn);
-	end_gfn = gfn + (size >> PAGE_SHIFT);
+	end_gfn = gfn + npages;
 	gfn    += 1;
 
 	if (is_error_noslot_pfn(pfn))
@@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 		 * Pin all pages we are about to map in memory. This is
 		 * important because we unmap and unpin in 4kb steps later.
 		 */
-		pfn = kvm_pin_pages(slot, gfn, page_size);
+		pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
 		if (is_error_noslot_pfn(pfn)) {
 			gfn += 1;
 			continue;
@@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%llx\n", pfn);
-			kvm_unpin_pages(kvm, pfn, page_size);
+			kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
 			goto unmap_pages;
 		}
 

From 1bea37d63c16c5988d83ac2431c38e0f0a55cf37 Mon Sep 17 00:00:00 2001
From: Andy Honig <ahonig@google.com>
Date: Wed, 27 Aug 2014 11:16:44 -0700
Subject: [PATCH 1037/1185] KVM: x86: Prevent host from panicking on shared MSR
 writes.

commit 8b3c3104c3f4f706e99365c3e0d2aa61b95f969f upstream.

The previous patch blocked invalid writes directly when the MSR
is written.  As a precaution, prevent future similar mistakes by
gracefulling handle GPs caused by writes to shared MSRs.

Signed-off-by: Andrew Honig <ahonig@google.com>
[Remove parts obsoleted by Nadav's patch. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/vmx.c              |  7 +++++--
 arch/x86/kvm/x86.c              | 11 ++++++++---
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 373058c9b75d..0312876eadb3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1011,7 +1011,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
 
 void kvm_define_shared_msr(unsigned index, u32 msr);
-void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8d9d37ff8250..882d6a95fa1b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2493,12 +2493,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			break;
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
+			u64 old_msr_data = msr->data;
 			msr->data = data;
 			if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
 				preempt_disable();
-				kvm_set_shared_msr(msr->index, msr->data,
-						   msr->mask);
+				ret = kvm_set_shared_msr(msr->index, msr->data,
+							 msr->mask);
 				preempt_enable();
+				if (ret)
+					msr->data = old_msr_data;
 			}
 			break;
 		}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8753555f144..33ea3d07005f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -225,20 +225,25 @@ static void kvm_shared_msr_cpu_online(void)
 		shared_msr_update(i, shared_msrs_global.msrs[i]);
 }
 
-void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
+int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 {
 	unsigned int cpu = smp_processor_id();
 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+	int err;
 
 	if (((value ^ smsr->values[slot].curr) & mask) == 0)
-		return;
+		return 0;
 	smsr->values[slot].curr = value;
-	wrmsrl(shared_msrs_global.msrs[slot], value);
+	err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
+	if (err)
+		return 1;
+
 	if (!smsr->registered) {
 		smsr->urn.on_user_return = kvm_on_user_return;
 		user_return_notifier_register(&smsr->urn);
 		smsr->registered = true;
 	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
 

From ca09be78c8d5d2a4fe38ec97a61b3c7fc3463794 Mon Sep 17 00:00:00 2001
From: Andy Honig <ahonig@google.com>
Date: Wed, 27 Aug 2014 14:42:54 -0700
Subject: [PATCH 1038/1185] KVM: x86: Improve thread safety in pit

commit 2febc839133280d5a5e8e1179c94ea674489dae2 upstream.

There's a race condition in the PIT emulation code in KVM.  In
__kvm_migrate_pit_timer the pit_timer object is accessed without
synchronization.  If the race condition occurs at the wrong time this
can crash the host kernel.

This fixes CVE-2014-3611.

Signed-off-by: Andrew Honig <ahonig@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/i8254.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 518d86471b76..298781d4cfb4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 		return;
 
 	timer = &pit->pit_state.timer;
+	mutex_lock(&pit->pit_state.lock);
 	if (hrtimer_cancel(timer))
 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+	mutex_unlock(&pit->pit_state.lock);
 }
 
 static void destroy_pit_timer(struct kvm_pit *pit)

From ea30614738b5faf98a1a695f78ce11447d4eb072 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Tue, 16 Sep 2014 03:24:05 +0300
Subject: [PATCH 1039/1185] KVM: x86: Check non-canonical addresses upon WRMSR

commit 854e8bb1aa06c578c2c9145fa6bfe3680ef63b23 upstream.

Upon WRMSR, the CPU should inject #GP if a non-canonical value (address) is
written to certain MSRs. The behavior is "almost" identical for AMD and Intel
(ignoring MSRs that are not implemented in either architecture since they would
anyhow #GP). However, IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
non-canonical address is written on Intel but not on AMD (which ignores the top
32-bits).

Accordingly, this patch injects a #GP on the MSRs which behave identically on
Intel and AMD.  To eliminate the differences between the architecutres, the
value which is written to IA32_SYSENTER_ESP and IA32_SYSENTER_EIP is turned to
canonical value before writing instead of injecting a #GP.

Some references from Intel and AMD manuals:

According to Intel SDM description of WRMSR instruction #GP is expected on
WRMSR "If the source register contains a non-canonical address and ECX
specifies one of the following MSRs: IA32_DS_AREA, IA32_FS_BASE, IA32_GS_BASE,
IA32_KERNEL_GS_BASE, IA32_LSTAR, IA32_SYSENTER_EIP, IA32_SYSENTER_ESP."

According to AMD manual instruction manual:
LSTAR/CSTAR (SYSCALL): "The WRMSR instruction loads the target RIP into the
LSTAR and CSTAR registers.  If an RIP written by WRMSR is not in canonical
form, a general-protection exception (#GP) occurs."
IA32_GS_BASE and IA32_FS_BASE (WRFSBASE/WRGSBASE): "The address written to the
base field must be in canonical form or a #GP fault will occur."
IA32_KERNEL_GS_BASE (SWAPGS): "The address stored in the KernelGSbase MSR must
be in canonical form."

This patch fixes CVE-2014-3610.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_host.h | 14 ++++++++++++++
 arch/x86/kvm/svm.c              |  2 +-
 arch/x86/kvm/vmx.c              |  2 +-
 arch/x86/kvm/x86.c              | 27 ++++++++++++++++++++++++++-
 4 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0312876eadb3..4c481e751e8e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -953,6 +953,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
 }
 
+static inline u64 get_canonical(u64 la)
+{
+	return ((int64_t)la << 16) >> 16;
+}
+
+static inline bool is_noncanonical_address(u64 la)
+{
+#ifdef CONFIG_X86_64
+	return get_canonical(la) != la;
+#else
+	return false;
+#endif
+}
+
 #define TSS_IOPB_BASE_OFFSET 0x66
 #define TSS_BASE_SIZE 0x68
 #define TSS_IOPB_SIZE (65536 / 8)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 765210d4d925..f8ada7867443 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3196,7 +3196,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
 	msr.host_initiated = false;
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
-	if (svm_set_msr(&svm->vcpu, &msr)) {
+	if (kvm_set_msr(&svm->vcpu, &msr)) {
 		trace_kvm_msr_write_ex(ecx, data);
 		kvm_inject_gp(&svm->vcpu, 0);
 	} else {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 882d6a95fa1b..e89f887d9f40 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5065,7 +5065,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
 	msr.data = data;
 	msr.index = ecx;
 	msr.host_initiated = false;
-	if (vmx_set_msr(vcpu, &msr) != 0) {
+	if (kvm_set_msr(vcpu, &msr) != 0) {
 		trace_kvm_msr_write_ex(ecx, data);
 		kvm_inject_gp(vcpu, 0);
 		return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 33ea3d07005f..684f46dc87de 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -925,7 +925,6 @@ void kvm_enable_efer_bits(u64 mask)
 }
 EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
 
-
 /*
  * Writes msr value into into the appropriate "register".
  * Returns 0 on success, non-0 otherwise.
@@ -933,8 +932,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
  */
 int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 {
+	switch (msr->index) {
+	case MSR_FS_BASE:
+	case MSR_GS_BASE:
+	case MSR_KERNEL_GS_BASE:
+	case MSR_CSTAR:
+	case MSR_LSTAR:
+		if (is_noncanonical_address(msr->data))
+			return 1;
+		break;
+	case MSR_IA32_SYSENTER_EIP:
+	case MSR_IA32_SYSENTER_ESP:
+		/*
+		 * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
+		 * non-canonical address is written on Intel but not on
+		 * AMD (which ignores the top 32-bits, because it does
+		 * not implement 64-bit SYSENTER).
+		 *
+		 * 64-bit code should hence be able to write a non-canonical
+		 * value on AMD.  Making the address canonical ensures that
+		 * vmentry does not fail on Intel after writing a non-canonical
+		 * value, and that something deterministic happens if the guest
+		 * invokes 64-bit SYSENTER.
+		 */
+		msr->data = get_canonical(msr->data);
+	}
 	return kvm_x86_ops->set_msr(vcpu, msr);
 }
+EXPORT_SYMBOL_GPL(kvm_set_msr);
 
 /*
  * Adapt set_msr() to msr_io()'s calling convention

From e56b9c47d05e4d18e9ddc0cdf8b2716f4de17a25 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 18 Sep 2014 16:21:16 +0300
Subject: [PATCH 1040/1185] kvm: x86: don't kill guest on unknown exit reason

commit 2bc19dc3754fc066c43799659f0d848631c44cfe upstream.

KVM_EXIT_UNKNOWN is a kvm bug, we don't really know whether it was
triggered by a priveledged application.  Let's not kill the guest: WARN
and inject #UD instead.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/svm.c | 6 +++---
 arch/x86/kvm/vmx.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f8ada7867443..8bf40a243d75 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3478,9 +3478,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 
 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
 	    || !svm_exit_handlers[exit_code]) {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = exit_code;
-		return 0;
+		WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
 	}
 
 	return svm_exit_handlers[exit_code](svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e89f887d9f40..51139ff34917 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6654,10 +6654,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu);
 	else {
-		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
-		vcpu->run->hw.hardware_exit_reason = exit_reason;
+		WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
 	}
-	return 0;
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)

From d092975d028dcb428f926511e0129705bf714d5c Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Thu, 18 Sep 2014 22:39:37 +0300
Subject: [PATCH 1041/1185] KVM: x86: Fix wrong masking on relative jump/call

commit 05c83ec9b73c8124555b706f6af777b10adf0862 upstream.

Relative jumps and calls do the masking according to the operand size, and not
according to the address size as the KVM emulator does today.

This patch fixes KVM behavior.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/emulate.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index fb3fddc322f8..450a8cba2795 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -663,11 +663,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
 	masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
 }
 
-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
-	register_address_increment(ctxt, &ctxt->_eip, rel);
-}
-
 static u32 desc_limit_scaled(struct desc_struct *desc)
 {
 	u32 limit = get_desc_limit(desc);
@@ -741,6 +736,28 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
 	return emulate_exception(ctxt, NM_VECTOR, 0, false);
 }
 
+static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+	switch (ctxt->op_bytes) {
+	case 2:
+		ctxt->_eip = (u16)dst;
+		break;
+	case 4:
+		ctxt->_eip = (u32)dst;
+		break;
+	case 8:
+		ctxt->_eip = dst;
+		break;
+	default:
+		WARN(1, "unsupported eip assignment size\n");
+	}
+}
+
+static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+	assign_eip_near(ctxt, ctxt->_eip + rel);
+}
+
 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
 {
 	u16 selector;

From 0efb0baaa7d349bce883bbcaed45319baa33a309 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Thu, 18 Sep 2014 22:39:38 +0300
Subject: [PATCH 1042/1185] KVM: x86: Emulator fixes for eip canonical checks
 on near branches

commit 234f3ce485d54017f15cf5e0699cff4100121601 upstream.

Before changing rip (during jmp, call, ret, etc.) the target should be asserted
to be canonical one, as real CPUs do.  During sysret, both target rsp and rip
should be canonical. If any of these values is noncanonical, a #GP exception
should occur.  The exception to this rule are syscall and sysenter instructions
in which the assigned rip is checked during the assignment to the relevant
MSRs.

This patch fixes the emulator to behave as real CPUs do for near branches.
Far branches are handled by the next patch.

This fixes CVE-2014-3647.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/emulate.c | 80 +++++++++++++++++++++++++++++-------------
 1 file changed, 55 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 450a8cba2795..4c01f022c6ac 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -736,7 +736,8 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
 	return emulate_exception(ctxt, NM_VECTOR, 0, false);
 }
 
-static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+			       int cs_l)
 {
 	switch (ctxt->op_bytes) {
 	case 2:
@@ -746,16 +747,25 @@ static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
 		ctxt->_eip = (u32)dst;
 		break;
 	case 8:
+		if ((cs_l && is_noncanonical_address(dst)) ||
+		    (!cs_l && (dst & ~(u32)-1)))
+			return emulate_gp(ctxt, 0);
 		ctxt->_eip = dst;
 		break;
 	default:
 		WARN(1, "unsupported eip assignment size\n");
 	}
+	return X86EMUL_CONTINUE;
 }
 
-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
 {
-	assign_eip_near(ctxt, ctxt->_eip + rel);
+	return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
+}
+
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+	return assign_eip_near(ctxt, ctxt->_eip + rel);
 }
 
 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
@@ -2178,13 +2188,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
 	case 2: /* call near abs */ {
 		long int old_eip;
 		old_eip = ctxt->_eip;
-		ctxt->_eip = ctxt->src.val;
+		rc = assign_eip_near(ctxt, ctxt->src.val);
+		if (rc != X86EMUL_CONTINUE)
+			break;
 		ctxt->src.val = old_eip;
 		rc = em_push(ctxt);
 		break;
 	}
 	case 4: /* jmp abs */
-		ctxt->_eip = ctxt->src.val;
+		rc = assign_eip_near(ctxt, ctxt->src.val);
 		break;
 	case 5: /* jmp far */
 		rc = em_jmp_far(ctxt);
@@ -2216,10 +2228,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
 
 static int em_ret(struct x86_emulate_ctxt *ctxt)
 {
-	ctxt->dst.type = OP_REG;
-	ctxt->dst.addr.reg = &ctxt->_eip;
-	ctxt->dst.bytes = ctxt->op_bytes;
-	return em_pop(ctxt);
+	int rc;
+	unsigned long eip;
+
+	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+
+	return assign_eip_near(ctxt, eip);
 }
 
 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
@@ -2486,7 +2502,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 {
 	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct cs, ss;
-	u64 msr_data;
+	u64 msr_data, rcx, rdx;
 	int usermode;
 	u16 cs_sel = 0, ss_sel = 0;
 
@@ -2502,6 +2518,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 	else
 		usermode = X86EMUL_MODE_PROT32;
 
+	rcx = reg_read(ctxt, VCPU_REGS_RCX);
+	rdx = reg_read(ctxt, VCPU_REGS_RDX);
+
 	cs.dpl = 3;
 	ss.dpl = 3;
 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
@@ -2519,6 +2538,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 		ss_sel = cs_sel + 8;
 		cs.d = 0;
 		cs.l = 1;
+		if (is_noncanonical_address(rcx) ||
+		    is_noncanonical_address(rdx))
+			return emulate_gp(ctxt, 0);
 		break;
 	}
 	cs_sel |= SELECTOR_RPL_MASK;
@@ -2527,8 +2549,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
-	ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
-	*reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
+	ctxt->_eip = rdx;
+	*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
 
 	return X86EMUL_CONTINUE;
 }
@@ -3067,10 +3089,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
 
 static int em_call(struct x86_emulate_ctxt *ctxt)
 {
+	int rc;
 	long rel = ctxt->src.val;
 
 	ctxt->src.val = (unsigned long)ctxt->_eip;
-	jmp_rel(ctxt, rel);
+	rc = jmp_rel(ctxt, rel);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
 	return em_push(ctxt);
 }
 
@@ -3102,11 +3127,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
+	unsigned long eip;
 
-	ctxt->dst.type = OP_REG;
-	ctxt->dst.addr.reg = &ctxt->_eip;
-	ctxt->dst.bytes = ctxt->op_bytes;
-	rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
+	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	rc = assign_eip_near(ctxt, eip);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 	rsp_increment(ctxt, ctxt->src.val);
@@ -3396,20 +3422,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
 
 static int em_loop(struct x86_emulate_ctxt *ctxt)
 {
+	int rc = X86EMUL_CONTINUE;
+
 	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
 	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
 	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
-		jmp_rel(ctxt, ctxt->src.val);
+		rc = jmp_rel(ctxt, ctxt->src.val);
 
-	return X86EMUL_CONTINUE;
+	return rc;
 }
 
 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
 {
-	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
-		jmp_rel(ctxt, ctxt->src.val);
+	int rc = X86EMUL_CONTINUE;
 
-	return X86EMUL_CONTINUE;
+	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
+		rc = jmp_rel(ctxt, ctxt->src.val);
+
+	return rc;
 }
 
 static int em_in(struct x86_emulate_ctxt *ctxt)
@@ -4738,7 +4768,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 		break;
 	case 0x70 ... 0x7f: /* jcc (short) */
 		if (test_cc(ctxt->b, ctxt->eflags))
-			jmp_rel(ctxt, ctxt->src.val);
+			rc = jmp_rel(ctxt, ctxt->src.val);
 		break;
 	case 0x8d: /* lea r16/r32, m */
 		ctxt->dst.val = ctxt->src.addr.mem.ea;
@@ -4767,7 +4797,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 		break;
 	case 0xe9: /* jmp rel */
 	case 0xeb: /* jmp rel short */
-		jmp_rel(ctxt, ctxt->src.val);
+		rc = jmp_rel(ctxt, ctxt->src.val);
 		ctxt->dst.type = OP_NONE; /* Disable writeback. */
 		break;
 	case 0xf4:              /* hlt */
@@ -4879,7 +4909,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 		break;
 	case 0x80 ... 0x8f: /* jnz rel, etc*/
 		if (test_cc(ctxt->b, ctxt->eflags))
-			jmp_rel(ctxt, ctxt->src.val);
+			rc = jmp_rel(ctxt, ctxt->src.val);
 		break;
 	case 0x90 ... 0x9f:     /* setcc r/m8 */
 		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);

From e27cd7561593ffa85ab51083b27e2bcbd5f62318 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 20 Jun 2014 16:24:49 +0530
Subject: [PATCH 1043/1185] ARC: [nsimosci] Allow "headless" models to boot

commit 5c05483e2db91890faa9a7be0a831701a3f442d6 upstream.

There are certain test configuration of virtual platform which don't
have any real console device (uart/pgu). So add tty0 as a fallback console
device to allow system to boot and be accessible via telnet

Otherwise with ttyS0 as only console, but 8250 disabled in kernel build,
init chokes.

Reported-by: Anton Kolesov <akolesov@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/boot/dts/nsimosci.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index 4f31b2eb5cdf..398064cef746 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -20,7 +20,7 @@ chosen {
 		/* this is for console on PGU */
 		/* bootargs = "console=tty0 consoleblank=0"; */
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=ttyS0,115200n8 consoleblank=0 debug";
+		bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
 	};
 
 	aliases {

From 73213e6c423392a828cab912ddbd986b454283e2 Mon Sep 17 00:00:00 2001
From: Anton Kolesov <Anton.Kolesov@synopsys.com>
Date: Thu, 25 Sep 2014 13:23:24 +0400
Subject: [PATCH 1044/1185] ARC: Update order of registers in KGDB to match GDB
 7.5

commit ebc0c74e76cec9c4dd860eb0ca1c0b39dc63c482 upstream.

Order of registers has changed in GDB moving from 6.8 to 7.5. This patch
updates KGDB to work properly with GDB 7.5, though makes it incompatible
with 6.8.

Signed-off-by: Anton Kolesov <Anton.Kolesov@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/kgdb.h | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/arch/arc/include/asm/kgdb.h b/arch/arc/include/asm/kgdb.h
index 4930957ca3d3..e897610c657a 100644
--- a/arch/arc/include/asm/kgdb.h
+++ b/arch/arc/include/asm/kgdb.h
@@ -19,7 +19,7 @@
  * register API yet */
 #undef DBG_MAX_REG_NUM
 
-#define GDB_MAX_REGS		39
+#define GDB_MAX_REGS		87
 
 #define BREAK_INSTR_SIZE	2
 #define CACHE_FLUSH_IS_SAFE	1
@@ -33,23 +33,27 @@ static inline void arch_kgdb_breakpoint(void)
 
 extern void kgdb_trap(struct pt_regs *regs, int param);
 
-enum arc700_linux_regnums {
+/* This is the numbering of registers according to the GDB. See GDB's
+ * arc-tdep.h for details.
+ *
+ * Registers are ordered for GDB 7.5. It is incompatible with GDB 6.8. */
+enum arc_linux_regnums {
 	_R0		= 0,
 	_R1, _R2, _R3, _R4, _R5, _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13,
 	_R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22, _R23, _R24,
 	_R25, _R26,
-	_BTA		= 27,
-	_LP_START	= 28,
-	_LP_END		= 29,
-	_LP_COUNT	= 30,
-	_STATUS32	= 31,
-	_BLINK		= 32,
-	_FP		= 33,
-	__SP		= 34,
-	_EFA		= 35,
-	_RET		= 36,
-	_ORIG_R8	= 37,
-	_STOP_PC	= 38
+	_FP		= 27,
+	__SP		= 28,
+	_R30		= 30,
+	_BLINK		= 31,
+	_LP_COUNT	= 60,
+	_STOP_PC	= 64,
+	_RET		= 64,
+	_LP_START	= 65,
+	_LP_END		= 66,
+	_STATUS32	= 67,
+	_ECR		= 76,
+	_BTA		= 82,
 };
 
 #else

From e80395d5cdc7c193ac7cc7faff8b7bf313b9164f Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Fri, 3 Oct 2014 14:35:56 -0700
Subject: [PATCH 1045/1185] qla_target: don't delete changed nacls

commit f4c24db1b7ad0ce84409e15744d26c6f86a96840 upstream.

The code is currently riddled with "drop the hardware_lock to avoid a
deadlock" bugs that expose races.  One of those races seems to expose a
valid warning in tcm_qla2xxx_clear_nacl_from_fcport_map.  Add some
bandaid to it.

Signed-off-by: Joern Engel <joern@logfs.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 66b0b26a1381..cfd49eca67aa 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -762,7 +762,16 @@ static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess)
 	pr_debug("fc_rport domain: port_id 0x%06x\n", nacl->nport_id);
 
 	node = btree_remove32(&lport->lport_fcport_map, nacl->nport_id);
-	WARN_ON(node && (node != se_nacl));
+	if (WARN_ON(node && (node != se_nacl))) {
+		/*
+		 * The nacl no longer matches what we think it should be.
+		 * Most likely a new dynamic acl has been added while
+		 * someone dropped the hardware lock.  It clearly is a
+		 * bug elsewhere, but this bit can't make things worse.
+		 */
+		btree_insert32(&lport->lport_fcport_map, nacl->nport_id,
+			       node, GFP_ATOMIC);
+	}
 
 	pr_debug("Removed from fcport_map: %p for WWNN: 0x%016LX, port_id: 0x%06x\n",
 	    se_nacl, nacl->nport_wwnn, nacl->nport_id);

From b036d398754a2b7045735a8839c7314c08eaedd7 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@qlogic.com>
Date: Thu, 25 Sep 2014 06:22:28 -0400
Subject: [PATCH 1046/1185] target: Fix queue full status NULL pointer for
 SCF_TRANSPORT_TASK_SENSE

commit 082f58ac4a48d3f5cb4597232cb2ac6823a96f43 upstream.

During temporary resource starvation at lower transport layer, command
is placed on queue full retry path, which expose this problem.  The TCM
queue full handling of SCF_TRANSPORT_TASK_SENSE currently sends the same
cmd twice to lower layer.  The 1st time led to cmd normal free path.
The 2nd time cause Null pointer access.

This regression bug was originally introduced v3.1-rc code in the
following commit:

commit e057f53308a5f071556ee80586b99ee755bf07f5
Author: Christoph Hellwig <hch@infradead.org>
Date:   Mon Oct 17 13:56:41 2011 -0400

    target: remove the transport_qf_callback se_cmd callback

Signed-off-by: Quinn Tran <quinn.tran@qlogic.com>
Signed-off-by: Saurav Kashyap <saurav.kashyap@qlogic.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/target/target_core_transport.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 6866d86e8663..12342695ed79 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1788,8 +1788,7 @@ static void transport_complete_qf(struct se_cmd *cmd)
 
 	if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) {
 		ret = cmd->se_tfo->queue_status(cmd);
-		if (ret)
-			goto out;
+		goto out;
 	}
 
 	switch (cmd->data_direction) {

From ec0f40e8d1e3660c1595e2fc5cb295cb3127ee40 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sat, 4 Oct 2014 04:23:15 +0000
Subject: [PATCH 1047/1185] target: Fix APTPL metadata handling for dynamic
 MappedLUNs

commit e24805637d2d270d7975502e9024d473de86afdb upstream.

This patch fixes a bug in handling of SPC-3 PR Activate Persistence
across Target Power Loss (APTPL) logic where re-creation of state for
MappedLUNs from dynamically generated NodeACLs did not occur during
I_T Nexus establishment.

It adds the missing core_scsi3_check_aptpl_registration() call during
core_tpg_check_initiator_node_acl() -> core_tpg_add_node_to_devs() in
order to replay any pre-loaded APTPL metadata state associated with
the newly connected SCSI Initiator Port.

Cc: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/target/target_core_device.c | 3 ++-
 drivers/target/target_core_pr.c     | 6 +++---
 drivers/target/target_core_pr.h     | 2 +-
 drivers/target/target_core_tpg.c    | 8 ++++++++
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 68398753eb82..2be407e22eb4 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1293,7 +1293,8 @@ int core_dev_add_initiator_node_lun_acl(
 	 * Check to see if there are any existing persistent reservation APTPL
 	 * pre-registrations that need to be enabled for this LUN ACL..
 	 */
-	core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, lacl);
+	core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, nacl,
+					    lacl->mapped_lun);
 	return 0;
 }
 
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 04a74938bb43..27ec6e4d1c7c 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -945,10 +945,10 @@ int core_scsi3_check_aptpl_registration(
 	struct se_device *dev,
 	struct se_portal_group *tpg,
 	struct se_lun *lun,
-	struct se_lun_acl *lun_acl)
+	struct se_node_acl *nacl,
+	u32 mapped_lun)
 {
-	struct se_node_acl *nacl = lun_acl->se_lun_nacl;
-	struct se_dev_entry *deve = nacl->device_list[lun_acl->mapped_lun];
+	struct se_dev_entry *deve = nacl->device_list[mapped_lun];
 
 	if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
 		return 0;
diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h
index b4a004247ab2..ea9220de1dff 100644
--- a/drivers/target/target_core_pr.h
+++ b/drivers/target/target_core_pr.h
@@ -55,7 +55,7 @@ extern int core_scsi3_alloc_aptpl_registration(
 			unsigned char *, u16, u32, int, int, u8);
 extern int core_scsi3_check_aptpl_registration(struct se_device *,
 			struct se_portal_group *, struct se_lun *,
-			struct se_lun_acl *);
+			struct se_node_acl *, u32);
 extern void core_scsi3_free_pr_reg_from_nacl(struct se_device *,
 					     struct se_node_acl *);
 extern void core_scsi3_free_all_registrations(struct se_device *);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index aac9d2727e3c..8572207e3d4d 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -40,6 +40,7 @@
 #include <target/target_core_fabric.h>
 
 #include "target_core_internal.h"
+#include "target_core_pr.h"
 
 extern struct se_device *g_lun0_dev;
 
@@ -165,6 +166,13 @@ void core_tpg_add_node_to_devs(
 
 		core_enable_device_list_for_node(lun, NULL, lun->unpacked_lun,
 				lun_access, acl, tpg);
+		/*
+		 * Check to see if there are any existing persistent reservation
+		 * APTPL pre-registrations that need to be enabled for this dynamic
+		 * LUN ACL now..
+		 */
+		core_scsi3_check_aptpl_registration(dev, tpg, lun, acl,
+						    lun->unpacked_lun);
 		spin_lock(&tpg->tpg_lun_lock);
 	}
 	spin_unlock(&tpg->tpg_lun_lock);

From b0de6ef3648d201f714bf769347705d74ecca34e Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Mon, 20 Oct 2014 15:34:23 -0700
Subject: [PATCH 1048/1185] MIPS: tlbex: Properly fix HUGE TLB Refill exception
 handler

commit 9e0f162a36914937a937358fcb45e0609ef2bfc4 upstream.

In commit 8393c524a25609 (MIPS: tlbex: Fix a missing statement for
HUGETLB), the TLB Refill handler was fixed so that non-OCTEON targets
would work properly with huge pages.  The change was incorrect in that
it broke the OCTEON case.

The problem is shown here:

    xxx0:	df7a0000 	ld	k0,0(k1)
    .
    .
    .
    xxxc0:	df610000 	ld	at,0(k1)
    xxxc4:	335a0ff0 	andi	k0,k0,0xff0
    xxxc8:	e825ffcd 	bbit1	at,0x5,0x0
    xxxcc:	003ad82d 	daddu	k1,at,k0
    .
    .
    .

In the non-octeon case there is a destructive test for the huge PTE
bit, and then at 0, $k0 is reloaded (that is what the 8393c524a25609
patch added).

In the octeon case, we modify k1 in the branch delay slot, but we
never need k0 again, so the new load is not needed, but since k1 is
modified, if we do the load, we load from a garbage location and then
get a nested TLB Refill, which is seen in userspace as either SIGBUS
or SIGSEGV (depending on the garbage).

The real fix is to only do this reloading if it is needed, and never
where it is harmful.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/8151/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/mm/tlbex.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 0e17e1352718..a91a7a99f70f 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -1091,6 +1091,7 @@ static void __cpuinit build_update_entries(u32 **p, unsigned int tmp,
 struct mips_huge_tlb_info {
 	int huge_pte;
 	int restore_scratch;
+	bool need_reload_pte;
 };
 
 static struct mips_huge_tlb_info __cpuinit
@@ -1105,6 +1106,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
 
 	rv.huge_pte = scratch;
 	rv.restore_scratch = 0;
+	rv.need_reload_pte = false;
 
 	if (check_for_high_segbits) {
 		UASM_i_MFC0(p, tmp, C0_BADVADDR);
@@ -1293,6 +1295,7 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
 	} else {
 		htlb_info.huge_pte = K0;
 		htlb_info.restore_scratch = 0;
+		htlb_info.need_reload_pte = true;
 		vmalloc_mode = refill_noscratch;
 		/*
 		 * create the plain linear handler
@@ -1329,7 +1332,8 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
 	}
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 	uasm_l_tlb_huge_update(&l, p);
-	UASM_i_LW(&p, K0, 0, K1);
+	if (htlb_info.need_reload_pte)
+		UASM_i_LW(&p, htlb_info.huge_pte, 0, K1);
 	build_huge_update_entries(&p, htlb_info.huge_pte, K1);
 	build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random,
 				   htlb_info.restore_scratch);

From c38e36f1966284360a229c7c6e9f2ba601869c2f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 16 Sep 2014 14:43:09 -0400
Subject: [PATCH 1049/1185] jbd2: free bh when descriptor block checksum fails

commit 064d83892e9ba547f7d4eae22cbca066d95210ce upstream.

Free the buffer head if the journal descriptor block fails checksum
verification.

This is the jbd2 port of the e2fsprogs patch "e2fsck: free bh on csum
verify error in do_one_pass".

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jbd2/recovery.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3bacc1909ddb..6e2fb5cbacde 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -522,6 +522,7 @@ static int do_one_pass(journal_t *journal,
 			    !jbd2_descr_block_csum_verify(journal,
 							  bh->b_data)) {
 				err = -EIO;
+				brelse(bh);
 				goto failed;
 			}
 

From cfcc2239096d692a3993b8462594b494da20eddf Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 16 Sep 2014 14:34:59 -0400
Subject: [PATCH 1050/1185] ext4: check EA value offset when loading

commit a0626e75954078cfacddb00a4545dde821170bc5 upstream.

When loading extended attributes, check each entry's value offset to
make sure it doesn't collide with the entries.

Without this check it is easy to crash the kernel by mounting a
malicious FS containing a file with an EA wherein e_value_offs = 0 and
e_value_size > 0 and then deleting the EA, which corrupts the name
list.

(See the f_ea_value_crash test's FS image in e2fsprogs for an example.)

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/xattr.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 298e9c8da364..a20816e7eb3a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -189,14 +189,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
 }
 
 static int
-ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
+ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
+		       void *value_start)
 {
-	while (!IS_LAST_ENTRY(entry)) {
-		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
+	struct ext4_xattr_entry *e = entry;
+
+	while (!IS_LAST_ENTRY(e)) {
+		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
 		if ((void *)next >= end)
 			return -EIO;
-		entry = next;
+		e = next;
 	}
+
+	while (!IS_LAST_ENTRY(entry)) {
+		if (entry->e_value_size != 0 &&
+		    (value_start + le16_to_cpu(entry->e_value_offs) <
+		     (void *)e + sizeof(__u32) ||
+		     value_start + le16_to_cpu(entry->e_value_offs) +
+		    le32_to_cpu(entry->e_value_size) > end))
+			return -EIO;
+		entry = EXT4_XATTR_NEXT(entry);
+	}
+
 	return 0;
 }
 
@@ -213,7 +227,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
 		return -EIO;
 	if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
 		return -EIO;
-	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
+				       bh->b_data);
 	if (!error)
 		set_buffer_verified(bh);
 	return error;
@@ -329,7 +344,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
 	header = IHDR(inode, raw_inode);
 	entry = IFIRST(header);
 	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
-	error = ext4_xattr_check_names(entry, end);
+	error = ext4_xattr_check_names(entry, end, entry);
 	if (error)
 		goto cleanup;
 	error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -457,7 +472,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 	raw_inode = ext4_raw_inode(&iloc);
 	header = IHDR(inode, raw_inode);
 	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
-	error = ext4_xattr_check_names(IFIRST(header), end);
+	error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
 	if (error)
 		goto cleanup;
 	error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -972,7 +987,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
 	is->s.here = is->s.first;
 	is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
 	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
-		error = ext4_xattr_check_names(IFIRST(header), is->s.end);
+		error = ext4_xattr_check_names(IFIRST(header), is->s.end,
+					       IFIRST(header));
 		if (error)
 			return error;
 		/* Find the named attribute. */

From b69805f848234c9dd5ffd53d33d1bb264c91697e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 18 Sep 2014 01:12:15 -0400
Subject: [PATCH 1051/1185] ext4: don't check quota format when there are no
 quota files

commit 279bf6d390933d5353ab298fcc306c391a961469 upstream.

The check whether quota format is set even though there are no
quota files with journalled quota is pointless and it actually
makes it impossible to turn off journalled quotas (as there's
no way to unset journalled quota format). Just remove the check.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1fc14f7a08b2..4a33907c3c8a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1632,13 +1632,6 @@ static int parse_options(char *options, struct super_block *sb,
 					"not specified");
 			return 0;
 		}
-	} else {
-		if (sbi->s_jquota_fmt) {
-			ext4_msg(sb, KERN_ERR, "journaled quota format "
-					"specified with no journaling "
-					"enabled");
-			return 0;
-		}
 	}
 #endif
 	if (test_opt(sb, DIOREAD_NOLOCK)) {

From 744539f1d7e5349ff8084432ccb67f02e015fcf7 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 3 Oct 2014 12:47:23 -0400
Subject: [PATCH 1052/1185] ext4: grab missed write_count for
 EXT4_IOC_SWAP_BOOT

commit 3e67cfad22230ebed85c56cbe413876f33fea82b upstream.

Otherwise this provokes complain like follows:
WARNING: CPU: 12 PID: 5795 at fs/ext4/ext4_jbd2.c:48 ext4_journal_check_start+0x4e/0xa0()
Modules linked in: brd iTCO_wdt lpc_ich mfd_core igb ptp dm_mirror dm_region_hash dm_log dm_mod
CPU: 12 PID: 5795 Comm: python Not tainted 3.17.0-rc2-00175-gae5344f #158
Hardware name: Intel Corporation W2600CR/W2600CR, BIOS SE5C600.86B.99.99.x028.061320111235 06/13/2011
 0000000000000030 ffff8808116cfd28 ffffffff815c7dfc 0000000000000030
 0000000000000000 ffff8808116cfd68 ffffffff8106ce8c ffff8808116cfdc8
 ffff880813b16000 ffff880806ad6ae8 ffffffff81202008 0000000000000000
Call Trace:
 [<ffffffff815c7dfc>] dump_stack+0x51/0x6d
 [<ffffffff8106ce8c>] warn_slowpath_common+0x8c/0xc0
 [<ffffffff81202008>] ? ext4_ioctl+0x9e8/0xeb0
 [<ffffffff8106ceda>] warn_slowpath_null+0x1a/0x20
 [<ffffffff8122867e>] ext4_journal_check_start+0x4e/0xa0
 [<ffffffff81228c10>] __ext4_journal_start_sb+0x90/0x110
 [<ffffffff81202008>] ext4_ioctl+0x9e8/0xeb0
 [<ffffffff8107b0bd>] ? ptrace_stop+0x24d/0x2f0
 [<ffffffff81088530>] ? alloc_pid+0x480/0x480
 [<ffffffff8107b1f2>] ? ptrace_do_notify+0x92/0xb0
 [<ffffffff81186545>] do_vfs_ioctl+0x4e5/0x550
 [<ffffffff815cdbcb>] ? _raw_spin_unlock_irq+0x2b/0x40
 [<ffffffff81186603>] SyS_ioctl+0x53/0x80
 [<ffffffff815ce2ce>] tracesys+0xd0/0xd5

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ioctl.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 42624a995b00..d4fd81c44f55 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -549,9 +549,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	}
 
 	case EXT4_IOC_SWAP_BOOT:
+	{
+		int err;
 		if (!(filp->f_mode & FMODE_WRITE))
 			return -EBADF;
-		return swap_inode_boot_loader(sb, inode);
+		err = mnt_want_write_file(filp);
+		if (err)
+			return err;
+		err = swap_inode_boot_loader(sb, inode);
+		mnt_drop_write_file(filp);
+		return err;
+	}
 
 	case EXT4_IOC_RESIZE_FS: {
 		ext4_fsblk_t n_blocks_count;

From 65f2579916b6224c06514382d2bfc5841d43f291 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 5 Oct 2014 22:56:00 -0400
Subject: [PATCH 1053/1185] ext4: add ext4_iget_normal() which is to be used
 for dir tree lookups

commit f4bb2981024fc91b23b4d09a8817c415396dbabb upstream.

If there is a corrupted file system which has directory entries that
point at reserved, metadata inodes, prohibit them from being used by
treating them the same way we treat Boot Loader inodes --- that is,
mark them to be bad inodes.  This prohibits them from being opened,
deleted, or modified via chmod, chown, utimes, etc.

In particular, this prevents a corrupted file system which has a
directory entry which points at the journal inode from being deleted
and its blocks released, after which point Much Hilarity Ensues.

Reported-by: Sami Liedes <sami.liedes@iki.fi>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4.h  | 1 +
 fs/ext4/inode.c | 7 +++++++
 fs/ext4/namei.c | 4 ++--
 fs/ext4/super.c | 2 +-
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 790b14c5f262..3891475e22f7 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2088,6 +2088,7 @@ int do_journal_get_write_access(handle_t *handle,
 #define CONVERT_INLINE_DATA	 2
 
 extern struct inode *ext4_iget(struct super_block *, unsigned long);
+extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
 extern int  ext4_write_inode(struct inode *, struct writeback_control *);
 extern int  ext4_setattr(struct dentry *, struct iattr *);
 extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f9e11df768d5..c04f7adc7b58 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4351,6 +4351,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 	return ERR_PTR(ret);
 }
 
+struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
+{
+	if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
+		return ERR_PTR(-EIO);
+	return ext4_iget(sb, ino);
+}
+
 static int ext4_inode_blocks_set(handle_t *handle,
 				struct ext4_inode *raw_inode,
 				struct ext4_inode_info *ei)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index ab2f6dc44b3a..f1312173fa90 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1430,7 +1430,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 					 dentry->d_name.name);
 			return ERR_PTR(-EIO);
 		}
-		inode = ext4_iget(dir->i_sb, ino);
+		inode = ext4_iget_normal(dir->i_sb, ino);
 		if (inode == ERR_PTR(-ESTALE)) {
 			EXT4_ERROR_INODE(dir,
 					 "deleted inode referenced: %u",
@@ -1461,7 +1461,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
 		return ERR_PTR(-EIO);
 	}
 
-	return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
+	return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
 }
 
 /*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4a33907c3c8a..a1b780abdf18 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -964,7 +964,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 	 * Currently we don't know the generation for parent directory, so
 	 * a generation of 0 means "accept any"
 	 */
-	inode = ext4_iget(sb, ino);
+	inode = ext4_iget_normal(sb, ino);
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
 	if (generation && inode->i_generation != generation) {

From 1f1ccdde66ed867e2a71297bdb6cf1b7c6a32351 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sat, 11 Oct 2014 19:51:17 -0400
Subject: [PATCH 1054/1185] ext4: fix reservation overflow in
 ext4_da_write_begin

commit 0ff8947fc5f700172b37cbca811a38eb9cb81e08 upstream.

Delalloc write journal reservations only reserve 1 credit,
to update the inode if necessary.  However, it may happen
once in a filesystem's lifetime that a file will cross
the 2G threshold, and require the LARGE_FILE feature to
be set in the superblock as well, if it was not set already.

This overruns the transaction reservation, and can be
demonstrated simply on any ext4 filesystem without the LARGE_FILE
feature already set:

dd if=/dev/zero of=testfile bs=1 seek=2147483646 count=1 \
	conv=notrunc of=testfile
sync
dd if=/dev/zero of=testfile bs=1 seek=2147483647 count=1 \
	conv=notrunc of=testfile

leads to:

EXT4-fs: ext4_do_update_inode:4296: aborting transaction: error 28 in __ext4_handle_dirty_super
EXT4-fs error (device loop0) in ext4_do_update_inode:4301: error 28
EXT4-fs error (device loop0) in ext4_reserve_inode_write:4757: Readonly filesystem
EXT4-fs error (device loop0) in ext4_dirty_inode:4876: error 28
EXT4-fs error (device loop0) in ext4_da_write_end:2685: error 28

Adjust the number of credits based on whether the flag is
already set, and whether the current write may extend past the
LARGE_FILE limit.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inode.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c04f7adc7b58..e48bd5a1814b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2647,6 +2647,20 @@ static int ext4_nonda_switch(struct super_block *sb)
 	return 0;
 }
 
+/* We always reserve for an inode update; the superblock could be there too */
+static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
+{
+	if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+				EXT4_FEATURE_RO_COMPAT_LARGE_FILE)))
+		return 1;
+
+	if (pos + len <= 0x7fffffffULL)
+		return 1;
+
+	/* We might need to update the superblock to set LARGE_FILE */
+	return 2;
+}
+
 static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 			       loff_t pos, unsigned len, unsigned flags,
 			       struct page **pagep, void **fsdata)
@@ -2697,7 +2711,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 	 * of file which has an already mapped buffer.
 	 */
 retry_journal:
-	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
+	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+				ext4_da_write_credits(inode, pos, len));
 	if (IS_ERR(handle)) {
 		page_cache_release(page);
 		return PTR_ERR(handle);

From 209f5484ef126134f2d2f322246b0e4faf3c1fbd Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 14 Oct 2014 02:35:49 -0400
Subject: [PATCH 1055/1185] ext4: check s_chksum_driver when looking for bg
 csum presence

commit 813d32f91333e4c33d5a19b67167c4bae42dae75 upstream.

Convert the ext4_has_group_desc_csum predicate to look for a checksum
driver instead of the metadata_csum flag and change the bg checksum
calculation function to look for GDT_CSUM before taking the crc16
path.

Without this patch, if we mount with ^uninit_bg,^metadata_csum and
later metadata_csum gets turned on by accident, the block group
checksum functions will incorrectly assume that checksumming is
enabled (metadata_csum) but that crc16 should be used
(!s_chksum_driver).  This is totally wrong, so fix the predicate
and the checksum formula selection.

(Granted, if the metadata_csum feature bit gets enabled on a live FS
then something underhanded is going on, but we could at least avoid
writing garbage into the on-disk fields.)

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4.h  | 4 ++--
 fs/ext4/super.c | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3891475e22f7..e4c4ac07cc32 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2261,8 +2261,8 @@ extern int ext4_register_li_request(struct super_block *sb,
 static inline int ext4_has_group_desc_csum(struct super_block *sb)
 {
 	return EXT4_HAS_RO_COMPAT_FEATURE(sb,
-					  EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
-					  EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
+					  EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
+	       (EXT4_SB(sb)->s_chksum_driver != NULL);
 }
 
 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a1b780abdf18..21a0b43a7d31 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1950,6 +1950,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
 	}
 
 	/* old crc16 code */
+	if (!(sbi->s_es->s_feature_ro_compat &
+	      cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
+		return 0;
+
 	offset = offsetof(struct ext4_group_desc, bg_checksum);
 
 	crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));

From 57ce0ed4fba064145d497dc901bee4f74cfc5c25 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 30 Oct 2014 10:52:57 -0400
Subject: [PATCH 1056/1185] ext4: fix overflow when updating superblock backups
 after resize

commit 9378c6768e4fca48971e7b6a9075bc006eda981d upstream.

When there are no meta block groups update_backups() will compute the
backup block in 32-bit arithmetics thus possibly overflowing the block
number and corrupting the filesystem. OTOH filesystems without meta
block groups larger than 16 TB should be rare. Fix the problem by doing
the counting in 64-bit arithmetics.

Coverity-id: 741252
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/resize.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c503850a61a8..a69bd74ed390 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1066,7 +1066,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
 			break;
 
 		if (meta_bg == 0)
-			backup_block = group * bpg + blk_off;
+			backup_block = ((ext4_fsblk_t)group) * bpg + blk_off;
 		else
 			backup_block = (ext4_group_first_block_no(sb, group) +
 					ext4_bg_has_super(sb, group));

From 4bf70f9f0280e5f396e4338967982c4325167259 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Thu, 16 Oct 2014 01:16:51 +0200
Subject: [PATCH 1057/1185] cpufreq: intel_pstate: Fix setting max_perf_pct in
 performance policy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 36b4bed5cd8f6e17019fa7d380e0836872c7b367 upstream.

Code which changes policy to powersave changes also max_policy_pct based on
max_freq. Code which change max_perf_pct has upper limit base on value
max_policy_pct. When policy is changing from powersave back to performance
then max_policy_pct is not changed. Which means that changing max_perf_pct is
not possible to high values if max_freq was too low in powersave policy.

Test case:

$ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq
800000
$ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
3300000
$ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
performance
$ cat /sys/devices/system/cpu/intel_pstate/max_perf_pct
100

$ echo powersave > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
$ echo 800000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
$ echo 20 > /sys/devices/system/cpu/intel_pstate/max_perf_pct

$ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
powersave
$ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
800000
$ cat /sys/devices/system/cpu/intel_pstate/max_perf_pct
20

$ echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
$ echo 3300000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
$ echo 100 > /sys/devices/system/cpu/intel_pstate/max_perf_pct

$ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
performance
$ cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
3300000
$ cat /sys/devices/system/cpu/intel_pstate/max_perf_pct
24

And now intel_pstate driver allows to set maximal value for max_perf_pct based
on max_policy_pct which is 24 for previous powersave max_freq 800000.

This patch will set default value for max_policy_pct when setting policy to
performance so it will allow to set also max value for max_perf_pct.

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Acked-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpufreq/intel_pstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 34d19b1984a1..decf84e71943 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -599,6 +599,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		limits.min_perf_pct = 100;
 		limits.min_perf = int_tofp(1);
+		limits.max_policy_pct = 100;
 		limits.max_perf_pct = 100;
 		limits.max_perf = int_tofp(1);
 		limits.no_turbo = 0;

From 6ce75ebb5334c12febaf520682926ce34c3cfe2b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 30 Oct 2014 10:53:16 -0400
Subject: [PATCH 1058/1185] ext4: fix oops when loading block bitmap failed

commit 599a9b77ab289d85c2d5c8607624efbe1f552b0f upstream.

When we fail to load block bitmap in __ext4_new_inode() we will
dereference NULL pointer in ext4_journal_get_write_access(). So check
for error from ext4_read_block_bitmap().

Coverity-id: 989065
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ialloc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 1ecd3a8c2444..4d4718cf25ab 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -793,6 +793,10 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 		struct buffer_head *block_bitmap_bh;
 
 		block_bitmap_bh = ext4_read_block_bitmap(sb, group);
+		if (!block_bitmap_bh) {
+			err = -EIO;
+			goto out;
+		}
 		BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
 		err = ext4_journal_get_write_access(handle, block_bitmap_bh);
 		if (err) {

From f95ad6ed20948dee0a7c1472250b530136f75db3 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 21 Oct 2014 09:27:12 +0200
Subject: [PATCH 1059/1185] freezer: Do not freeze tasks killed by OOM killer

commit 51fae6da640edf9d266c94f36bc806c63c301991 upstream.

Since f660daac474c6f (oom: thaw threads if oom killed thread is frozen
before deferring) OOM killer relies on being able to thaw a frozen task
to handle OOM situation but a3201227f803 (freezer: make freezing() test
freeze conditions in effect instead of TIF_FREEZE) has reorganized the
code and stopped clearing freeze flag in __thaw_task. This means that
the target task only wakes up and goes into the fridge again because the
freezing condition hasn't changed for it. This reintroduces the bug
fixed by f660daac474c6f.

Fix the issue by checking for TIF_MEMDIE thread flag in
freezing_slow_path and exclude the task from freezing completely. If a
task was already frozen it would get woken by __thaw_task from OOM killer
and get out of freezer after rechecking freezing().

Changes since v1
- put TIF_MEMDIE check into freezing_slowpath rather than in __refrigerator
  as per Oleg
- return __thaw_task into oom_scan_process_thread because
  oom_kill_process will not wake task in the fridge because it is
  sleeping uninterruptible

[mhocko@suse.cz: rewrote the changelog]
Fixes: a3201227f803 (freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE)
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/freezer.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/freezer.c b/kernel/freezer.c
index 78758512b1e1..bd733f6e610d 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -42,6 +42,9 @@ bool freezing_slow_path(struct task_struct *p)
 	if (p->flags & PF_NOFREEZE)
 		return false;
 
+	if (test_thread_flag(TIF_MEMDIE))
+		return false;
+
 	if (pm_nosig_freezing || cgroup_freezing(p))
 		return true;
 

From e033782a2669ae60db31d28127974bac18c63e33 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Mon, 20 Oct 2014 18:12:32 +0200
Subject: [PATCH 1060/1185] OOM, PM: OOM killed task shouldn't escape PM
 suspend

commit 5695be142e203167e3cb515ef86a88424f3524eb upstream.

PM freezer relies on having all tasks frozen by the time devices are
getting frozen so that no task will touch them while they are getting
frozen. But OOM killer is allowed to kill an already frozen task in
order to handle OOM situtation. In order to protect from late wake ups
OOM killer is disabled after all tasks are frozen. This, however, still
keeps a window open when a killed task didn't manage to die by the time
freeze_processes finishes.

Reduce the race window by checking all tasks after OOM killer has been
disabled. This is still not race free completely unfortunately because
oom_killer_disable cannot stop an already ongoing OOM killer so a task
might still wake up from the fridge and get killed without
freeze_processes noticing. Full synchronization of OOM and freezer is,
however, too heavy weight for this highly unlikely case.

Introduce and check oom_kills counter which gets incremented early when
the allocator enters __alloc_pages_may_oom path and only check all the
tasks if the counter changes during the freezing attempt. The counter
is updated so early to reduce the race window since allocator checked
oom_killer_disabled which is set by PM-freezing code. A false positive
will push the PM-freezer into a slow path but that is not a big deal.

Changes since v1
- push the re-check loop out of freeze_processes into
  check_frozen_processes and invert the condition to make the code more
  readable as per Rafael

Fixes: f660daac474c6f (oom: thaw threads if oom killed thread is frozen before deferring)
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/oom.h    |  3 +++
 kernel/power/process.c | 40 +++++++++++++++++++++++++++++++++++++++-
 mm/oom_kill.c          | 17 +++++++++++++++++
 mm/page_alloc.c        |  8 ++++++++
 4 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/include/linux/oom.h b/include/linux/oom.h
index da60007075b5..297cda528855 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -50,6 +50,9 @@ static inline bool oom_task_origin(const struct task_struct *p)
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
 		unsigned long totalpages);
+
+extern int oom_kills_count(void);
+extern void note_oom_kill(void);
 extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			     unsigned int points, unsigned long totalpages,
 			     struct mem_cgroup *memcg, nodemask_t *nodemask,
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 1b212bee1510..0695319b5fde 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -103,6 +103,28 @@ static int try_to_freeze_tasks(bool user_only)
 	return todo ? -EBUSY : 0;
 }
 
+/*
+ * Returns true if all freezable tasks (except for current) are frozen already
+ */
+static bool check_frozen_processes(void)
+{
+	struct task_struct *g, *p;
+	bool ret = true;
+
+	read_lock(&tasklist_lock);
+	for_each_process_thread(g, p) {
+		if (p != current && !freezer_should_skip(p) &&
+		    !frozen(p)) {
+			ret = false;
+			goto done;
+		}
+	}
+done:
+	read_unlock(&tasklist_lock);
+
+	return ret;
+}
+
 /**
  * freeze_processes - Signal user space processes to enter the refrigerator.
  *
@@ -111,6 +133,7 @@ static int try_to_freeze_tasks(bool user_only)
 int freeze_processes(void)
 {
 	int error;
+	int oom_kills_saved;
 
 	error = __usermodehelper_disable(UMH_FREEZING);
 	if (error)
@@ -121,12 +144,27 @@ int freeze_processes(void)
 
 	printk("Freezing user space processes ... ");
 	pm_freezing = true;
+	oom_kills_saved = oom_kills_count();
 	error = try_to_freeze_tasks(true);
 	if (!error) {
-		printk("done.");
 		__usermodehelper_set_disable_depth(UMH_DISABLED);
 		oom_killer_disable();
+
+		/*
+		 * There might have been an OOM kill while we were
+		 * freezing tasks and the killed task might be still
+		 * on the way out so we have to double check for race.
+		 */
+		if (oom_kills_count() != oom_kills_saved &&
+				!check_frozen_processes()) {
+			__usermodehelper_set_disable_depth(UMH_ENABLED);
+			printk("OOM in progress.");
+			error = -EBUSY;
+			goto done;
+		}
+		printk("done.");
 	}
+done:
 	printk("\n");
 	BUG_ON(in_atomic());
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8e40908e724a..f104c7e9f61e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -402,6 +402,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 		dump_tasks(memcg, nodemask);
 }
 
+/*
+ * Number of OOM killer invocations (including memcg OOM killer).
+ * Primarily used by PM freezer to check for potential races with
+ * OOM killed frozen task.
+ */
+static atomic_t oom_kills = ATOMIC_INIT(0);
+
+int oom_kills_count(void)
+{
+	return atomic_read(&oom_kills);
+}
+
+void note_oom_kill(void)
+{
+	atomic_inc(&oom_kills);
+}
+
 #define K(x) ((x) << (PAGE_SHIFT-10))
 /*
  * Must be called while holding a reference to p, which will be released upon
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 71305c6aba5b..494a081ec5e4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2119,6 +2119,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 		return NULL;
 	}
 
+	/*
+	 * PM-freezer should be notified that there might be an OOM killer on
+	 * its way to kill and wake somebody up. This is too early and we might
+	 * end up not killing anything but false positives are acceptable.
+	 * See freeze_processes.
+	 */
+	note_oom_kill();
+
 	/*
 	 * Go through the zonelist yet one more time, keep very high watermark
 	 * here, this is only to catch a parallel oom killing, we must fail if

From 81548735844bf3de7ee6d5a5afde9841de697a7e Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 25 Sep 2014 15:27:00 +0100
Subject: [PATCH 1061/1185] staging:iio:ad5933: Fix NULL pointer deref when
 enabling buffer

commit 824269c5868d2a7a26417e5ef3841a27d42c6139 upstream.

In older versions of the IIO framework it was possible to pass a
completely different set of channels to iio_buffer_register() as the one
that is assigned to the IIO device. Commit 959d2952d124 ("staging:iio: make
iio_sw_buffer_preenable much more general.") introduced a restriction that
requires that the set of channels that is passed to iio_buffer_register() is
a subset of the channels assigned to the IIO device as the IIO core will use
the list of channels that is assigned to the device to lookup a channel by
scan index in iio_compute_scan_bytes(). If it can not find the channel the
function will crash. This patch fixes the issue by making sure that the same
set of channels is assigned to the IIO device and passed to
iio_buffer_register().

Fixes the follow NULL pointer derefernce kernel crash:
	Unable to handle kernel NULL pointer dereference at virtual address 00000016
	pgd = d53d0000
	[00000016] *pgd=1534e831, *pte=00000000, *ppte=00000000
	Internal error: Oops: 17 [#1] PREEMPT SMP ARM
	Modules linked in:
	CPU: 1 PID: 1626 Comm: bash Not tainted 3.15.0-19969-g2a180eb-dirty #9545
	task: d6c124c0 ti: d539a000 task.ti: d539a000
	PC is at iio_compute_scan_bytes+0x34/0xa8
	LR is at iio_compute_scan_bytes+0x34/0xa8
	pc : [<c03052e4>]    lr : [<c03052e4>]    psr: 60070013
	sp : d539beb8  ip : 00000001  fp : 00000000
	r10: 00000002  r9 : 00000000  r8 : 00000001
	r7 : 00000000  r6 : d6dc8800  r5 : d7571000  r4 : 00000002
	r3 : d7571000  r2 : 00000044  r1 : 00000001  r0 : 00000000
	Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
	Control: 18c5387d  Table: 153d004a  DAC: 00000015
	Process bash (pid: 1626, stack limit = 0xd539a240)
	Stack: (0xd539beb8 to 0xd539c000)
	bea0:                                                       c02fc0e4 d7571000
	bec0: d76c1640 d6dc8800 d757117c 00000000 d757112c c0305b04 d76c1690 d76c1640
	bee0: d7571188 00000002 00000000 d7571000 d539a000 00000000 000dd1c8 c0305d54
	bf00: d7571010 0160b868 00000002 c69d3900 d7573278 d7573308 c69d3900 c01ece90
	bf20: 00000002 c0103fac c0103f6c d539bf88 00000002 c69d3b00 c69d3b0c c0103468
	bf40: 00000000 00000000 d7694a00 00000002 000af408 d539bf88 c000dd84 c00b2f94
	bf60: d7694a00 000af408 00000002 d7694a00 d7694a00 00000002 000af408 c000dd84
	bf80: 00000000 c00b32d0 00000000 00000000 00000002 b6f1aa78 00000002 000af408
	bfa0: 00000004 c000dc00 b6f1aa78 00000002 00000001 000af408 00000002 00000000
	bfc0: b6f1aa78 00000002 000af408 00000004 be806a4c 000a6094 00000000 000dd1c8
	bfe0: 00000000 be8069cc b6e8ab77 b6ec125c 40070010 00000001 22940489 154a5007
	[<c03052e4>] (iio_compute_scan_bytes) from [<c0305b04>] (__iio_update_buffers+0x248/0x438)
	[<c0305b04>] (__iio_update_buffers) from [<c0305d54>] (iio_buffer_store_enable+0x60/0x7c)
	[<c0305d54>] (iio_buffer_store_enable) from [<c01ece90>] (dev_attr_store+0x18/0x24)
	[<c01ece90>] (dev_attr_store) from [<c0103fac>] (sysfs_kf_write+0x40/0x4c)
	[<c0103fac>] (sysfs_kf_write) from [<c0103468>] (kernfs_fop_write+0x110/0x154)
	[<c0103468>] (kernfs_fop_write) from [<c00b2f94>] (vfs_write+0xd0/0x160)
	[<c00b2f94>] (vfs_write) from [<c00b32d0>] (SyS_write+0x40/0x78)
	[<c00b32d0>] (SyS_write) from [<c000dc00>] (ret_fast_syscall+0x0/0x30)
	Code: ea00000e e1a01008 e1a00005 ebfff6fc (e5d0a016)

Fixes: 959d2952d124 ("staging:iio: make iio_sw_buffer_preenable much more general.")
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/iio/impedance-analyzer/ad5933.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 6330af656a0f..9d5f205807c5 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -115,6 +115,7 @@ static const struct iio_chan_spec ad5933_channels[] = {
 		.channel = 0,
 		.info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
 		.address = AD5933_REG_TEMP_DATA,
+		.scan_index = -1,
 		.scan_type = {
 			.sign = 's',
 			.realbits = 14,
@@ -125,8 +126,6 @@ static const struct iio_chan_spec ad5933_channels[] = {
 		.indexed = 1,
 		.channel = 0,
 		.extend_name = "real_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
-		BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD5933_REG_REAL_DATA,
 		.scan_index = 0,
 		.scan_type = {
@@ -139,8 +138,6 @@ static const struct iio_chan_spec ad5933_channels[] = {
 		.indexed = 1,
 		.channel = 0,
 		.extend_name = "imag_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
-		BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD5933_REG_IMAG_DATA,
 		.scan_index = 1,
 		.scan_type = {
@@ -746,14 +743,14 @@ static int ad5933_probe(struct i2c_client *client,
 	indio_dev->name = id->name;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->channels = ad5933_channels;
-	indio_dev->num_channels = 1; /* only register temp0_input */
+	indio_dev->num_channels = ARRAY_SIZE(ad5933_channels);
 
 	ret = ad5933_register_ring_funcs_and_init(indio_dev);
 	if (ret)
 		goto error_disable_reg;
 
-	/* skip temp0_input, register in0_(real|imag)_raw */
-	ret = iio_buffer_register(indio_dev, &ad5933_channels[1], 2);
+	ret = iio_buffer_register(indio_dev, ad5933_channels,
+		ARRAY_SIZE(ad5933_channels));
 	if (ret)
 		goto error_unreg_ring;
 

From af29aab040d017a634dd626ad14e35526213633b Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 25 Sep 2014 15:27:00 +0100
Subject: [PATCH 1062/1185] staging:iio:ad5933: Drop "raw" from channel names

commit 6822ee34ad57b29a3b44df2c2829910f03c34fa4 upstream.

"raw" is the name of a channel property, but should not be part of the
channel name itself.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/iio/impedance-analyzer/ad5933.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 9d5f205807c5..bc23d66a7a1e 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -125,7 +125,7 @@ static const struct iio_chan_spec ad5933_channels[] = {
 		.type = IIO_VOLTAGE,
 		.indexed = 1,
 		.channel = 0,
-		.extend_name = "real_raw",
+		.extend_name = "real",
 		.address = AD5933_REG_REAL_DATA,
 		.scan_index = 0,
 		.scan_type = {
@@ -137,7 +137,7 @@ static const struct iio_chan_spec ad5933_channels[] = {
 		.type = IIO_VOLTAGE,
 		.indexed = 1,
 		.channel = 0,
-		.extend_name = "imag_raw",
+		.extend_name = "imag",
 		.address = AD5933_REG_IMAG_DATA,
 		.scan_index = 1,
 		.scan_type = {

From 035bc79c51a0c5cfca17f1692f9ad9de0d3ae829 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 4 Nov 2014 18:03:14 +0100
Subject: [PATCH 1063/1185] staging:iio:ade7758: Fix NULL pointer deref when
 enabling buffer

commit e10554738cab4224e097c2f9d975ea781a4fcde4 upstream.

In older versions of the IIO framework it was possible to pass a completely
different set of channels to iio_buffer_register() as the one that is
assigned to the IIO device. Commit 959d2952d124 ("staging:iio: make
iio_sw_buffer_preenable much more general.") introduced a restriction that
requires that the set of channels that is passed to iio_buffer_register() is
a subset of the channels assigned to the IIO device as the IIO core will use
the list of channels that is assigned to the device to lookup a channel by
scan index in iio_compute_scan_bytes(). If it can not find the channel the
function will crash. This patch fixes the issue by making sure that the same
set of channels is assigned to the IIO device and passed to
iio_buffer_register().

Note that we need to remove the IIO_CHAN_INFO_RAW and IIO_CHAN_INFO_SCALE
info attributes from the channels since we don't actually want those to be
registered.

Fixes the following crash:
	Unable to handle kernel NULL pointer dereference at virtual address 00000016
	pgd = d2094000
	[00000016] *pgd=16e39831, *pte=00000000, *ppte=00000000
	Internal error: Oops: 17 [#1] PREEMPT SMP ARM
	Modules linked in:
	CPU: 1 PID: 1695 Comm: bash Not tainted 3.17.0-06329-g29461ee #9686
	task: d7768040 ti: d5bd4000 task.ti: d5bd4000
	PC is at iio_compute_scan_bytes+0x38/0xc0
	LR is at iio_compute_scan_bytes+0x34/0xc0
	pc : [<c0316de8>]    lr : [<c0316de4>]    psr: 60070013
	sp : d5bd5ec0  ip : 00000000  fp : 00000000
	r10: d769f934  r9 : 00000000  r8 : 00000001
	r7 : 00000000  r6 : c8fc6240  r5 : d769f800  r4 : 00000000
	r3 : d769f800  r2 : 00000000  r1 : ffffffff  r0 : 00000000
	Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
	Control: 18c5387d  Table: 1209404a  DAC: 00000015
	Process bash (pid: 1695, stack limit = 0xd5bd4240)
	Stack: (0xd5bd5ec0 to 0xd5bd6000)
	5ec0: d769f800 d7435640 c8fc6240 d769f984 00000000 c03175a4 d7435690 d7435640
	5ee0: d769f990 00000002 00000000 d769f800 d5bd4000 00000000 000b43a8 c03177f4
	5f00: d769f810 0162b8c8 00000002 c8fc7e00 d77f1d08 d77f1da8 c8fc7e00 c01faf1c
	5f20: 00000002 c010694c c010690c d5bd5f88 00000002 c8fc6840 c8fc684c c0105e08
	5f40: 00000000 00000000 d20d1580 00000002 000af408 d5bd5f88 c000de84 c00b76d4
	5f60: d20d1580 000af408 00000002 d20d1580 d20d1580 00000002 000af408 c000de84
	5f80: 00000000 c00b7a44 00000000 00000000 00000002 b6ebea78 00000002 000af408
	5fa0: 00000004 c000dd00 b6ebea78 00000002 00000001 000af408 00000002 00000000
	5fc0: b6ebea78 00000002 000af408 00000004 bee96a4c 000a6094 00000000 000b43a8
	5fe0: 00000000 bee969cc b6e2eb77 b6e6525c 40070010 00000001 00000000 00000000
	[<c0316de8>] (iio_compute_scan_bytes) from [<c03175a4>] (__iio_update_buffers+0x248/0x438)
	[<c03175a4>] (__iio_update_buffers) from [<c03177f4>] (iio_buffer_store_enable+0x60/0x7c)
	[<c03177f4>] (iio_buffer_store_enable) from [<c01faf1c>] (dev_attr_store+0x18/0x24)
	[<c01faf1c>] (dev_attr_store) from [<c010694c>] (sysfs_kf_write+0x40/0x4c)
	[<c010694c>] (sysfs_kf_write) from [<c0105e08>] (kernfs_fop_write+0x110/0x154)
	[<c0105e08>] (kernfs_fop_write) from [<c00b76d4>] (vfs_write+0xbc/0x170)
	[<c00b76d4>] (vfs_write) from [<c00b7a44>] (SyS_write+0x40/0x78)
	[<c00b7a44>] (SyS_write) from [<c000dd00>] (ret_fast_syscall+0x0/0x30)

Fixes: 959d2952d124 ("staging:iio: make iio_sw_buffer_preenable much more general.")
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/iio/meter/ade7758.h      |  1 -
 drivers/staging/iio/meter/ade7758_core.c | 33 ++----------------------
 drivers/staging/iio/meter/ade7758_ring.c |  3 +--
 3 files changed, 3 insertions(+), 34 deletions(-)

diff --git a/drivers/staging/iio/meter/ade7758.h b/drivers/staging/iio/meter/ade7758.h
index 07318203a836..e8c98cf57070 100644
--- a/drivers/staging/iio/meter/ade7758.h
+++ b/drivers/staging/iio/meter/ade7758.h
@@ -119,7 +119,6 @@ struct ade7758_state {
 	u8			*tx;
 	u8			*rx;
 	struct mutex		buf_lock;
-	const struct iio_chan_spec *ade7758_ring_channels;
 	struct spi_transfer	ring_xfer[4];
 	struct spi_message	ring_msg;
 	/*
diff --git a/drivers/staging/iio/meter/ade7758_core.c b/drivers/staging/iio/meter/ade7758_core.c
index 8f5bcfab3563..847576fc4f36 100644
--- a/drivers/staging/iio/meter/ade7758_core.c
+++ b/drivers/staging/iio/meter/ade7758_core.c
@@ -649,8 +649,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 0,
 		.extend_name = "raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_A, AD7758_VOLTAGE),
 		.scan_index = 0,
 		.scan_type = {
@@ -663,8 +661,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 0,
 		.extend_name = "raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_A, AD7758_CURRENT),
 		.scan_index = 1,
 		.scan_type = {
@@ -677,8 +673,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 0,
 		.extend_name = "apparent_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_A, AD7758_APP_PWR),
 		.scan_index = 2,
 		.scan_type = {
@@ -691,8 +685,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 0,
 		.extend_name = "active_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_A, AD7758_ACT_PWR),
 		.scan_index = 3,
 		.scan_type = {
@@ -705,8 +697,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 0,
 		.extend_name = "reactive_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_A, AD7758_REACT_PWR),
 		.scan_index = 4,
 		.scan_type = {
@@ -719,8 +709,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 1,
 		.extend_name = "raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_B, AD7758_VOLTAGE),
 		.scan_index = 5,
 		.scan_type = {
@@ -733,8 +721,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 1,
 		.extend_name = "raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_B, AD7758_CURRENT),
 		.scan_index = 6,
 		.scan_type = {
@@ -747,8 +733,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 1,
 		.extend_name = "apparent_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_B, AD7758_APP_PWR),
 		.scan_index = 7,
 		.scan_type = {
@@ -761,8 +745,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 1,
 		.extend_name = "active_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_B, AD7758_ACT_PWR),
 		.scan_index = 8,
 		.scan_type = {
@@ -775,8 +757,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 1,
 		.extend_name = "reactive_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_B, AD7758_REACT_PWR),
 		.scan_index = 9,
 		.scan_type = {
@@ -789,8 +769,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 2,
 		.extend_name = "raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_C, AD7758_VOLTAGE),
 		.scan_index = 10,
 		.scan_type = {
@@ -803,8 +781,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 2,
 		.extend_name = "raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_C, AD7758_CURRENT),
 		.scan_index = 11,
 		.scan_type = {
@@ -817,8 +793,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 2,
 		.extend_name = "apparent_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_C, AD7758_APP_PWR),
 		.scan_index = 12,
 		.scan_type = {
@@ -831,8 +805,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 2,
 		.extend_name = "active_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_C, AD7758_ACT_PWR),
 		.scan_index = 13,
 		.scan_type = {
@@ -845,8 +817,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.indexed = 1,
 		.channel = 2,
 		.extend_name = "reactive_raw",
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.address = AD7758_WT(AD7758_PHASE_C, AD7758_REACT_PWR),
 		.scan_index = 14,
 		.scan_type = {
@@ -890,13 +860,14 @@ static int ade7758_probe(struct spi_device *spi)
 		goto error_free_rx;
 	}
 	st->us = spi;
-	st->ade7758_ring_channels = &ade7758_channels[0];
 	mutex_init(&st->buf_lock);
 
 	indio_dev->name = spi->dev.driver->name;
 	indio_dev->dev.parent = &spi->dev;
 	indio_dev->info = &ade7758_info;
 	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = ade7758_channels;
+	indio_dev->num_channels = ARRAY_SIZE(ade7758_channels);
 
 	ret = ade7758_configure_ring(indio_dev);
 	if (ret)
diff --git a/drivers/staging/iio/meter/ade7758_ring.c b/drivers/staging/iio/meter/ade7758_ring.c
index b29e2d5d9937..25e1b45b6c40 100644
--- a/drivers/staging/iio/meter/ade7758_ring.c
+++ b/drivers/staging/iio/meter/ade7758_ring.c
@@ -89,7 +89,6 @@ static irqreturn_t ade7758_trigger_handler(int irq, void *p)
  **/
 static int ade7758_ring_preenable(struct iio_dev *indio_dev)
 {
-	struct ade7758_state *st = iio_priv(indio_dev);
 	unsigned channel;
 	int ret;
 
@@ -104,7 +103,7 @@ static int ade7758_ring_preenable(struct iio_dev *indio_dev)
 				 indio_dev->masklength);
 
 	ade7758_write_waveform_type(&indio_dev->dev,
-		st->ade7758_ring_channels[channel].address);
+		indio_dev->channels[channel].address);
 
 	return 0;
 }

From 4fdbedf5569be1dbd53cb6d87743e155b1eb032a Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 4 Nov 2014 18:03:15 +0100
Subject: [PATCH 1064/1185] staging:iio:ade7758: Fix check if channels are
 enabled in prenable

commit 79fa64eb2ee8ccb4bcad7f54caa2699730b10b22 upstream.

We should check if a channel is enabled, not if no channels are enabled.

Fixes: 550268ca1111 ("staging:iio: scrap scan_count and ensure all drivers use active_scan_mask")
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/iio/meter/ade7758_ring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/iio/meter/ade7758_ring.c b/drivers/staging/iio/meter/ade7758_ring.c
index 25e1b45b6c40..6a0ef97e9146 100644
--- a/drivers/staging/iio/meter/ade7758_ring.c
+++ b/drivers/staging/iio/meter/ade7758_ring.c
@@ -92,7 +92,7 @@ static int ade7758_ring_preenable(struct iio_dev *indio_dev)
 	unsigned channel;
 	int ret;
 
-	if (!bitmap_empty(indio_dev->active_scan_mask, indio_dev->masklength))
+	if (bitmap_empty(indio_dev->active_scan_mask, indio_dev->masklength))
 		return -EINVAL;
 
 	ret = iio_sw_buffer_preenable(indio_dev);

From a316af4f979bb8825a4db9cf77be89e645231b3a Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 4 Nov 2014 18:03:16 +0100
Subject: [PATCH 1065/1185] staging:iio:ade7758: Remove "raw" from channel name

commit b598aacc29331e7e638cd509108600e916c6331b upstream.

"raw" is a property of a channel, but should not be part of the name of
channel.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/iio/meter/ade7758_core.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/iio/meter/ade7758_core.c b/drivers/staging/iio/meter/ade7758_core.c
index 847576fc4f36..75d9fe6a1bc1 100644
--- a/drivers/staging/iio/meter/ade7758_core.c
+++ b/drivers/staging/iio/meter/ade7758_core.c
@@ -648,7 +648,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_VOLTAGE,
 		.indexed = 1,
 		.channel = 0,
-		.extend_name = "raw",
 		.address = AD7758_WT(AD7758_PHASE_A, AD7758_VOLTAGE),
 		.scan_index = 0,
 		.scan_type = {
@@ -660,7 +659,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_CURRENT,
 		.indexed = 1,
 		.channel = 0,
-		.extend_name = "raw",
 		.address = AD7758_WT(AD7758_PHASE_A, AD7758_CURRENT),
 		.scan_index = 1,
 		.scan_type = {
@@ -672,7 +670,7 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_POWER,
 		.indexed = 1,
 		.channel = 0,
-		.extend_name = "apparent_raw",
+		.extend_name = "apparent",
 		.address = AD7758_WT(AD7758_PHASE_A, AD7758_APP_PWR),
 		.scan_index = 2,
 		.scan_type = {
@@ -684,7 +682,7 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_POWER,
 		.indexed = 1,
 		.channel = 0,
-		.extend_name = "active_raw",
+		.extend_name = "active",
 		.address = AD7758_WT(AD7758_PHASE_A, AD7758_ACT_PWR),
 		.scan_index = 3,
 		.scan_type = {
@@ -696,7 +694,7 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_POWER,
 		.indexed = 1,
 		.channel = 0,
-		.extend_name = "reactive_raw",
+		.extend_name = "reactive",
 		.address = AD7758_WT(AD7758_PHASE_A, AD7758_REACT_PWR),
 		.scan_index = 4,
 		.scan_type = {
@@ -708,7 +706,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_VOLTAGE,
 		.indexed = 1,
 		.channel = 1,
-		.extend_name = "raw",
 		.address = AD7758_WT(AD7758_PHASE_B, AD7758_VOLTAGE),
 		.scan_index = 5,
 		.scan_type = {
@@ -720,7 +717,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_CURRENT,
 		.indexed = 1,
 		.channel = 1,
-		.extend_name = "raw",
 		.address = AD7758_WT(AD7758_PHASE_B, AD7758_CURRENT),
 		.scan_index = 6,
 		.scan_type = {
@@ -732,7 +728,7 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_POWER,
 		.indexed = 1,
 		.channel = 1,
-		.extend_name = "apparent_raw",
+		.extend_name = "apparent",
 		.address = AD7758_WT(AD7758_PHASE_B, AD7758_APP_PWR),
 		.scan_index = 7,
 		.scan_type = {
@@ -744,7 +740,7 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_POWER,
 		.indexed = 1,
 		.channel = 1,
-		.extend_name = "active_raw",
+		.extend_name = "active",
 		.address = AD7758_WT(AD7758_PHASE_B, AD7758_ACT_PWR),
 		.scan_index = 8,
 		.scan_type = {
@@ -756,7 +752,7 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_POWER,
 		.indexed = 1,
 		.channel = 1,
-		.extend_name = "reactive_raw",
+		.extend_name = "reactive",
 		.address = AD7758_WT(AD7758_PHASE_B, AD7758_REACT_PWR),
 		.scan_index = 9,
 		.scan_type = {
@@ -768,7 +764,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_VOLTAGE,
 		.indexed = 1,
 		.channel = 2,
-		.extend_name = "raw",
 		.address = AD7758_WT(AD7758_PHASE_C, AD7758_VOLTAGE),
 		.scan_index = 10,
 		.scan_type = {
@@ -780,7 +775,6 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_CURRENT,
 		.indexed = 1,
 		.channel = 2,
-		.extend_name = "raw",
 		.address = AD7758_WT(AD7758_PHASE_C, AD7758_CURRENT),
 		.scan_index = 11,
 		.scan_type = {
@@ -792,7 +786,7 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_POWER,
 		.indexed = 1,
 		.channel = 2,
-		.extend_name = "apparent_raw",
+		.extend_name = "apparent",
 		.address = AD7758_WT(AD7758_PHASE_C, AD7758_APP_PWR),
 		.scan_index = 12,
 		.scan_type = {
@@ -804,7 +798,7 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_POWER,
 		.indexed = 1,
 		.channel = 2,
-		.extend_name = "active_raw",
+		.extend_name = "active",
 		.address = AD7758_WT(AD7758_PHASE_C, AD7758_ACT_PWR),
 		.scan_index = 13,
 		.scan_type = {
@@ -816,7 +810,7 @@ static const struct iio_chan_spec ade7758_channels[] = {
 		.type = IIO_POWER,
 		.indexed = 1,
 		.channel = 2,
-		.extend_name = "reactive_raw",
+		.extend_name = "reactive",
 		.address = AD7758_WT(AD7758_PHASE_C, AD7758_REACT_PWR),
 		.scan_index = 14,
 		.scan_type = {

From a8fb82d5f59da4d42a722043566376a82a3b4644 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Thu, 16 Oct 2014 13:46:38 -0400
Subject: [PATCH 1066/1185] serial: Fix divide-by-zero fault in
 uart_get_divisor()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 547039ec502076e60034eeb79611df3433a99b7d upstream.

uart_get_baud_rate() will return baud == 0 if the max rate is set
to the "magic" 38400 rate and the SPD_* flags are also specified.
On the first iteration, if the current baud rate is higher than the
max, the baud rate is clamped at the max (which in the degenerate
case is 38400). On the second iteration, the now-"magic" 38400 baud
rate selects the possibly higher alternate baud rate indicated by
the SPD_* flag. Since only two loop iterations are performed, the
loop is exited, a kernel WARNING is generated and a baud rate of
0 is returned.

Reproducible with:
 setserial /dev/ttyS0 spd_hi base_baud 38400

Only perform the "magic" 38400 -> SPD_* baud transform on the first
loop iteration, which prevents the degenerate case from recognizing
the clamped baud rate as the "magic" 38400 value.

Reported-by: Robert Święcki <robert@swiecki.net>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0f1cc2c8c22a..1fabb22ae615 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -359,7 +359,7 @@ uart_get_baud_rate(struct uart_port *port, struct ktermios *termios,
 		 * The spd_hi, spd_vhi, spd_shi, spd_warp kludge...
 		 * Die! Die! Die!
 		 */
-		if (baud == 38400)
+		if (try == 0 && baud == 38400)
 			baud = altbaud;
 
 		/*

From 6cc7e9f66ba2a8cd0eb1cd4987df44d30eac649e Mon Sep 17 00:00:00 2001
From: Nathaniel Ting <nathaniel.ting@silabs.com>
Date: Fri, 3 Oct 2014 12:01:20 -0400
Subject: [PATCH 1067/1185] USB: serial: cp210x: add Silicon Labs 358x VID and
 PID

commit 35cc83eab097e5720a9cc0ec12bdc3a726f58381 upstream.

Enable Silicon Labs Ember VID chips to enumerate with the cp210x usb serial
driver. EM358x devices operating with the Ember Z-Net 5.1.2 stack may now
connect to host PCs over a USB serial link.

Signed-off-by: Nathaniel Ting <nathaniel.ting@silabs.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index b22a4bc308e2..e9183eda39e0 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -155,6 +155,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
 	{ USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
 	{ USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */
+	{ USB_DEVICE(0x1BA4, 0x0002) },	/* Silicon Labs 358x factory default */
 	{ USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */
 	{ USB_DEVICE(0x1D6F, 0x0010) }, /* Seluxit ApS RF Dongle */
 	{ USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */

From 652fe31da6f78f2cdb4e92fff646aed03b50fdf8 Mon Sep 17 00:00:00 2001
From: Frans Klaver <frans.klaver@xsens.com>
Date: Fri, 10 Oct 2014 11:52:08 +0200
Subject: [PATCH 1068/1185] usb: serial: ftdi_sio: add Awinda Station and
 Dongle products

commit edd74ffab1f6909eee400c7de8ce621870aacac9 upstream.

Add new IDs for the Xsens Awinda Station and Awinda Dongle.

While at it, order the definitions by PID and add a logical separation
between devices using Xsens' VID and those using FTDI's VID.

Signed-off-by: Frans Klaver <frans.klaver@xsens.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ftdi_sio.c     | 2 ++
 drivers/usb/serial/ftdi_sio_ids.h | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 4235693ba2f7..1cd759534f3e 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -677,6 +677,8 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_5_PID) },
 	{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_6_PID) },
 	{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_7_PID) },
+	{ USB_DEVICE(XSENS_VID, XSENS_AWINDA_DONGLE_PID) },
+	{ USB_DEVICE(XSENS_VID, XSENS_AWINDA_STATION_PID) },
 	{ USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) },
 	{ USB_DEVICE(XSENS_VID, XSENS_MTW_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_OMNI1509) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 8927a5c39b00..933ff1f8fa9a 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -143,8 +143,12 @@
  * Xsens Technologies BV products (http://www.xsens.com).
  */
 #define XSENS_VID		0x2639
-#define XSENS_CONVERTER_PID	0xD00D	/* Xsens USB-serial converter */
+#define XSENS_AWINDA_STATION_PID 0x0101
+#define XSENS_AWINDA_DONGLE_PID 0x0102
 #define XSENS_MTW_PID		0x0200	/* Xsens MTw */
+#define XSENS_CONVERTER_PID	0xD00D	/* Xsens USB-serial converter */
+
+/* Xsens devices using FTDI VID */
 #define XSENS_CONVERTER_0_PID	0xD388	/* Xsens USB converter */
 #define XSENS_CONVERTER_1_PID	0xD389	/* Xsens Wireless Receiver */
 #define XSENS_CONVERTER_2_PID	0xD38A

From 066ef018e21e253ee96eea26f1b8b1cde4a68ca2 Mon Sep 17 00:00:00 2001
From: Perry Hung <iperry@gmail.com>
Date: Wed, 22 Oct 2014 23:31:34 -0400
Subject: [PATCH 1069/1185] usb: serial: ftdi_sio: add "bricked" FTDI device
 PID

commit 7f2719f0003da1ad13124ef00f48d7514c79e30d upstream.

An official recent Windows driver from FTDI detects counterfeit devices
and reprograms the internal EEPROM containing the USB PID to 0, effectively
bricking the device.

Add support for this VID/PID pair to correctly bind the driver on these
devices.

See:
http://hackaday.com/2014/10/22/watch-that-windows-update-ftdi-drivers-are-killing-fake-chips/

Signed-off-by: Perry Hung <iperry@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ftdi_sio.c     | 1 +
 drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 1cd759534f3e..768c2b4722d1 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -148,6 +148,7 @@ static struct ftdi_sio_quirk ftdi_8u2232c_quirk = {
  * /sys/bus/usb/ftdi_sio/new_id, then send patch/report!
  */
 static struct usb_device_id id_table_combined [] = {
+	{ USB_DEVICE(FTDI_VID, FTDI_BRICK_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ZEITCONTROL_TAGTRACE_MIFARE_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_CTI_MINI_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_CTI_NANO_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 933ff1f8fa9a..302ab9a71f06 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -30,6 +30,12 @@
 
 /*** third-party PIDs (using FTDI_VID) ***/
 
+/*
+ * Certain versions of the official Windows FTDI driver reprogrammed
+ * counterfeit FTDI devices to PID 0. Support these devices anyway.
+ */
+#define FTDI_BRICK_PID		0x0000
+
 #define FTDI_LUMEL_PD12_PID	0x6002
 
 /*

From 1e2dedaa4d57a7aa681a542358dcbada0cf7d896 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 27 Oct 2014 18:34:33 +0100
Subject: [PATCH 1070/1185] USB: cdc-acm: add device id for GW Instek AFG-2225

commit cf84a691a61606a2e7269907d3727e2d9fa148ee upstream.

Add device-id entry for GW Instek AFG-2225, which has a byte swapped
bInterfaceSubClass (0x20).

Reported-by: Karl Palsson <karlp@tweak.net.au>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index fbf3f11aed2c..537e1d32f92b 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1588,6 +1588,7 @@ static const struct usb_device_id acm_ids[] = {
 	{ USB_DEVICE(0x0572, 0x1328), /* Shiro / Aztech USB MODEM UM-3100 */
 	.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
 	},
+	{ USB_DEVICE(0x2184, 0x001c) },	/* GW Instek AFG-2225 */
 	{ USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */
 	},
 	/* Motorola H24 HSPA module: */

From 2a18a875f45d56777394323e725ea8be378b068c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 5 Nov 2014 18:41:59 +0100
Subject: [PATCH 1071/1185] USB: cdc-acm: only raise DTR on transitions from B0

commit 4473d054ceb572557954f9536731d39b20937b0c upstream.

Make sure to only raise DTR on transitions from B0 in set_termios.

Also allow set_termios to be called from open with a termios_old of
NULL. Note that DTR will not be raised prematurely in this case.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 537e1d32f92b..1e71f918eb9f 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -883,11 +883,12 @@ static void acm_tty_set_termios(struct tty_struct *tty,
 	/* FIXME: Needs to clear unsupported bits in the termios */
 	acm->clocal = ((termios->c_cflag & CLOCAL) != 0);
 
-	if (!newline.dwDTERate) {
+	if (C_BAUD(tty) == B0) {
 		newline.dwDTERate = acm->line.dwDTERate;
 		newctrl &= ~ACM_CTRL_DTR;
-	} else
+	} else if (termios_old && (termios_old->c_cflag & CBAUD) == B0) {
 		newctrl |=  ACM_CTRL_DTR;
+	}
 
 	if (newctrl != acm->ctrlout)
 		acm_set_control(acm, acm->ctrlout = newctrl);

From 0f4a15711a844a835a28a46ec7ab9e6f1c957561 Mon Sep 17 00:00:00 2001
From: Daniele Palmas <dnlplm@gmail.com>
Date: Tue, 14 Oct 2014 10:47:37 +0200
Subject: [PATCH 1072/1185] usb: option: add support for Telit LE910

commit 2d0eb862dd477c3c4f32b201254ca0b40e6f465c upstream.

Add VID/PID for Telit LE910 modem. Interfaces description is almost the
same than LE920, except that the qmi interface is number 2 (instead than
5).

Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index e47aabe0c760..900e3ad541e3 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -269,6 +269,7 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_DE910_DUAL		0x1010
 #define TELIT_PRODUCT_UE910_V2			0x1012
 #define TELIT_PRODUCT_LE920			0x1200
+#define TELIT_PRODUCT_LE910			0x1201
 
 /* ZTE PRODUCTS */
 #define ZTE_VENDOR_ID				0x19d2
@@ -588,6 +589,11 @@ static const struct option_blacklist_info zte_1255_blacklist = {
 	.reserved = BIT(3) | BIT(4),
 };
 
+static const struct option_blacklist_info telit_le910_blacklist = {
+	.sendsetup = BIT(0),
+	.reserved = BIT(1) | BIT(2),
+};
+
 static const struct option_blacklist_info telit_le920_blacklist = {
 	.sendsetup = BIT(0),
 	.reserved = BIT(1) | BIT(5),
@@ -1137,6 +1143,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
+		.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
 		.driver_info = (kernel_ulong_t)&telit_le920_blacklist },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */

From f9e91f45dac49e401eff3fd1d63413d0fd2107e6 Mon Sep 17 00:00:00 2001
From: Dan Williams <dcbw@redhat.com>
Date: Tue, 14 Oct 2014 11:10:41 -0500
Subject: [PATCH 1073/1185] USB: option: add Haier CE81B CDMA modem

commit 012eee1522318b5ccd64d277d50ac32f7e9974fe upstream.

Port layout:

0: QCDM/DIAG
1: NMEA
2: AT
3: AT/PPP

Signed-off-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 900e3ad541e3..8b3484134ab0 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -362,6 +362,7 @@ static void option_instat_callback(struct urb *urb);
 
 /* Haier products */
 #define HAIER_VENDOR_ID				0x201e
+#define HAIER_PRODUCT_CE81B			0x10f8
 #define HAIER_PRODUCT_CE100			0x2009
 
 /* Cinterion (formerly Siemens) products */
@@ -1620,6 +1621,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) },
 	{ USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) },
 	{ USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(HAIER_VENDOR_ID, HAIER_PRODUCT_CE81B, 0xff, 0xff, 0xff) },
 	/* Pirelli  */
 	{ USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_C100_1, 0xff) },
 	{ USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_C100_2, 0xff) },

From 9add88d00d8d86a2721d489d6acc7621c0747972 Mon Sep 17 00:00:00 2001
From: Cyril Brulebois <kibi@debian.org>
Date: Tue, 28 Oct 2014 16:42:41 +0100
Subject: [PATCH 1074/1185] wireless: rt2x00: add new rt2800usb device

commit 664d6a792785cc677c2091038ce10322c8d04ae1 upstream.

0x1b75 0xa200 AirLive WN-200USB wireless 11b/g/n dongle

References: https://bugs.debian.org/766802
Reported-by: Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>
Signed-off-by: Cyril Brulebois <kibi@debian.org>
Acked-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/rt2x00/rt2800usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index 9ef0711a5cc1..400b8679796a 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -1091,6 +1091,7 @@ static struct usb_device_id rt2800usb_device_table[] = {
 	/* Ovislink */
 	{ USB_DEVICE(0x1b75, 0x3071) },
 	{ USB_DEVICE(0x1b75, 0x3072) },
+	{ USB_DEVICE(0x1b75, 0xa200) },
 	/* Para */
 	{ USB_DEVICE(0x20b8, 0x8888) },
 	/* Pegatron */

From 6b93e3669cb10c50dd9441f3eabc63b8d43749d0 Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Tue, 21 Oct 2014 16:31:10 -0700
Subject: [PATCH 1075/1185] usb: dwc3: gadget: Properly initialize LINK TRB

commit 1200a82a59b6aa65758ccc92c3447b98c53cd7a2 upstream.

On ISOC endpoints the last trb_pool entry used as a
LINK TRB is not getting zeroed out correctly due to
memset being called incorrectly and in the wrong place.
If pool allocated from DMA was not zero-initialized
to begin with this will result in the size and ctrl
values being random garbage. Call memset correctly after
assignment of the trb_link pointer.

Fixes: f6bafc6a1c ("usb: dwc3: convert TRBs into bitshifts")
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index d868b62c1a16..ec4cb05291d7 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -550,12 +550,11 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
 		if (!usb_endpoint_xfer_isoc(desc))
 			return 0;
 
-		memset(&trb_link, 0, sizeof(trb_link));
-
 		/* Link TRB for ISOC. The HWO bit is never reset */
 		trb_st_hw = &dep->trb_pool[0];
 
 		trb_link = &dep->trb_pool[DWC3_TRB_NUM - 1];
+		memset(trb_link, 0, sizeof(*trb_link));
 
 		trb_link->bpl = lower_32_bits(dwc3_trb_dma_offset(dep, trb_st_hw));
 		trb_link->bph = upper_32_bits(dwc3_trb_dma_offset(dep, trb_st_hw));

From a8be23c660a778cdab39bcde9cd3696ebc097f6f Mon Sep 17 00:00:00 2001
From: Ray Jui <rjui@broadcom.com>
Date: Thu, 9 Oct 2014 11:44:54 -0700
Subject: [PATCH 1076/1185] spi: pl022: Fix incorrect dma_unmap_sg

commit 3ffa6158f002e096d28ede71be4e0ee8ab20baa2 upstream.

When mapped RX DMA entries are unmapped in an error condition when DMA
is firstly configured in the driver, the number of TX DMA entries was
passed in, which is incorrect

Signed-off-by: Ray Jui <rjui@broadcom.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-pl022.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 371cc66f1a0e..5266c89fc989 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -1080,7 +1080,7 @@ static int configure_dma(struct pl022 *pl022)
 		     pl022->sgt_tx.nents, DMA_TO_DEVICE);
 err_tx_sgmap:
 	dma_unmap_sg(rxchan->device->dev, pl022->sgt_rx.sgl,
-		     pl022->sgt_tx.nents, DMA_FROM_DEVICE);
+		     pl022->sgt_rx.nents, DMA_FROM_DEVICE);
 err_rx_sgmap:
 	sg_free_table(&pl022->sgt_tx);
 err_alloc_tx_sg:

From c822fb57ba12fbf0b989c201e400a5f71c9fade5 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Thu, 6 Nov 2014 14:08:29 +0300
Subject: [PATCH 1077/1185] spi: pxa2xx: toggle clocks on suspend if not
 disabled by runtime PM

commit 2b9375b91bef65b837bed61a05fb387159b38ddf upstream.

If PM_RUNTIME is enabled, it is easy to trigger the following backtrace
on pxa2xx hosts:

------------[ cut here ]------------
WARNING: CPU: 0 PID: 1 at /home/lumag/linux/arch/arm/mach-pxa/clock.c:35 clk_disable+0xa0/0xa8()
Modules linked in:
CPU: 0 PID: 1 Comm: swapper Not tainted 3.17.0-00007-g1b3d2ee-dirty #104
[<c000de68>] (unwind_backtrace) from [<c000c078>] (show_stack+0x10/0x14)
[<c000c078>] (show_stack) from [<c001d75c>] (warn_slowpath_common+0x6c/0x8c)
[<c001d75c>] (warn_slowpath_common) from [<c001d818>] (warn_slowpath_null+0x1c/0x24)
[<c001d818>] (warn_slowpath_null) from [<c0015e80>] (clk_disable+0xa0/0xa8)
[<c0015e80>] (clk_disable) from [<c02507f8>] (pxa2xx_spi_suspend+0x2c/0x34)
[<c02507f8>] (pxa2xx_spi_suspend) from [<c0200360>] (platform_pm_suspend+0x2c/0x54)
[<c0200360>] (platform_pm_suspend) from [<c0207fec>] (dpm_run_callback.isra.14+0x2c/0x74)
[<c0207fec>] (dpm_run_callback.isra.14) from [<c0209254>] (__device_suspend+0x120/0x2f8)
[<c0209254>] (__device_suspend) from [<c0209a94>] (dpm_suspend+0x50/0x208)
[<c0209a94>] (dpm_suspend) from [<c00455ac>] (suspend_devices_and_enter+0x8c/0x3a0)
[<c00455ac>] (suspend_devices_and_enter) from [<c0045ad4>] (pm_suspend+0x214/0x2a8)
[<c0045ad4>] (pm_suspend) from [<c04b5c34>] (test_suspend+0x14c/0x1dc)
[<c04b5c34>] (test_suspend) from [<c000880c>] (do_one_initcall+0x8c/0x1fc)
[<c000880c>] (do_one_initcall) from [<c04aecfc>] (kernel_init_freeable+0xf4/0x1b4)
[<c04aecfc>] (kernel_init_freeable) from [<c0378078>] (kernel_init+0x8/0xec)
[<c0378078>] (kernel_init) from [<c0009590>] (ret_from_fork+0x14/0x24)
---[ end trace 46524156d8faa4f6 ]---

This happens because suspend function tries to disable a clock that is
already disabled by runtime_suspend callback. Add if
(!pm_runtime_suspended()) checks to suspend/resume path.

Fixes: 7d94a505858 (spi/pxa2xx: add support for runtime PM)
Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Reported-by: Andrea Adami <andrea.adami@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-pxa2xx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 48b396fced0a..d26a2d195d21 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1324,7 +1324,9 @@ static int pxa2xx_spi_suspend(struct device *dev)
 	if (status != 0)
 		return status;
 	write_SSCR0(0, drv_data->ioaddr);
-	clk_disable_unprepare(ssp->clk);
+
+	if (!pm_runtime_suspended(dev))
+		clk_disable_unprepare(ssp->clk);
 
 	return 0;
 }
@@ -1338,7 +1340,8 @@ static int pxa2xx_spi_resume(struct device *dev)
 	pxa2xx_spi_dma_resume(drv_data);
 
 	/* Enable the SSP clock */
-	clk_prepare_enable(ssp->clk);
+	if (!pm_runtime_suspended(dev))
+		clk_prepare_enable(ssp->clk);
 
 	/* Start the queue running */
 	status = spi_master_resume(drv_data->master);

From b0476c87764ac62d79390678cefb3821308a5d08 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 31 Oct 2014 14:49:47 -0400
Subject: [PATCH 1078/1185] usb-storage: handle a skipped data phase

commit 93c9bf4d1838d5851a18ca398b0ad66397f05056 upstream.

Sometimes mass-storage devices using the Bulk-only transport will
mistakenly skip the data phase of a command.  Rather than sending the
data expected by the host or sending a zero-length packet, they go
directly to the status phase and send the CSW.

This causes problems for usb-storage, for obvious reasons.  The driver
will interpret the CSW as a short data transfer and will wait to
receive a CSW.  The device won't have anything left to send, so the
command eventually times out.

The SCSI layer doesn't retry commands after they time out (this is a
relatively recent change).  Therefore we should do our best to detect
a skipped data phase and handle it promptly.

This patch adds code to do that.  If usb-storage receives a short
13-byte data transfer from the device, and if the first four bytes of
the data match the CSW signature, the driver will set the residue to
the full transfer length and interpret the data as a CSW.

This fixes Bugzilla #86611.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Tested-by: Paul Osmialowski <newchief@king.net.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/transport.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index 22c7d4360fa2..b1d815eb6d0b 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -1118,6 +1118,31 @@ int usb_stor_Bulk_transport(struct scsi_cmnd *srb, struct us_data *us)
 		 */
 		if (result == USB_STOR_XFER_LONG)
 			fake_sense = 1;
+
+		/*
+		 * Sometimes a device will mistakenly skip the data phase
+		 * and go directly to the status phase without sending a
+		 * zero-length packet.  If we get a 13-byte response here,
+		 * check whether it really is a CSW.
+		 */
+		if (result == USB_STOR_XFER_SHORT &&
+				srb->sc_data_direction == DMA_FROM_DEVICE &&
+				transfer_length - scsi_get_resid(srb) ==
+					US_BULK_CS_WRAP_LEN) {
+			struct scatterlist *sg = NULL;
+			unsigned int offset = 0;
+
+			if (usb_stor_access_xfer_buf((unsigned char *) bcs,
+					US_BULK_CS_WRAP_LEN, srb, &sg,
+					&offset, FROM_XFER_BUF) ==
+						US_BULK_CS_WRAP_LEN &&
+					bcs->Signature ==
+						cpu_to_le32(US_BULK_CS_SIGN)) {
+				usb_stor_dbg(us, "Device skipped data phase\n");
+				scsi_set_resid(srb, transfer_length);
+				goto skipped_data_phase;
+			}
+		}
 	}
 
 	/* See flow chart on pg 15 of the Bulk Only Transport spec for
@@ -1153,6 +1178,7 @@ int usb_stor_Bulk_transport(struct scsi_cmnd *srb, struct us_data *us)
 	if (result != USB_STOR_XFER_GOOD)
 		return USB_STOR_TRANSPORT_ERROR;
 
+ skipped_data_phase:
 	/* check bulk status */
 	residue = le32_to_cpu(bcs->Residue);
 	usb_stor_dbg(us, "Bulk Status S 0x%x T 0x%x R %u Stat 0x%x\n",

From 547d682d568271cf537af2850a066a287a51b62b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 29 Oct 2014 09:07:31 +0100
Subject: [PATCH 1079/1185] USB: opticon: fix non-atomic allocation in write
 path

commit e681286de221af78fc85db9222b6a203148c005a upstream.

Write may be called from interrupt context so make sure to use
GFP_ATOMIC for all allocations in write.

Fixes: 0d930e51cfe6 ("USB: opticon: Add Opticon OPN2001 write support")
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/opticon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index 5f4b0cd0f6e9..b0eb1dfc601a 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -219,7 +219,7 @@ static int opticon_write(struct tty_struct *tty, struct usb_serial_port *port,
 
 	/* The conncected devices do not have a bulk write endpoint,
 	 * to transmit data to de barcode device the control endpoint is used */
-	dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_NOIO);
+	dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_ATOMIC);
 	if (!dr) {
 		dev_err(&port->dev, "out of memory\n");
 		count = -ENOMEM;

From e0daafbbf3cb9ea341f72e5e26f1a8fcea977c0e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 1 Oct 2014 11:29:14 +0200
Subject: [PATCH 1080/1185] usb: Do not allow usb_alloc_streams on unconfigured
 devices

commit 90a646c770c50cc206ceba0d7b50453c46c13c36 upstream.

This commit fixes the following oops:

[10238.622067] scsi host3: uas_eh_bus_reset_handler start
[10240.766164] usb 3-4: reset SuperSpeed USB device number 3 using xhci_hcd
[10245.779365] usb 3-4: device descriptor read/8, error -110
[10245.883331] usb 3-4: reset SuperSpeed USB device number 3 using xhci_hcd
[10250.897603] usb 3-4: device descriptor read/8, error -110
[10251.058200] BUG: unable to handle kernel NULL pointer dereference at  0000000000000040
[10251.058244] IP: [<ffffffff815ac6e1>] xhci_check_streams_endpoint+0x91/0x140
<snip>
[10251.059473] Call Trace:
[10251.059487]  [<ffffffff815aca6c>] xhci_calculate_streams_and_bitmask+0xbc/0x130
[10251.059520]  [<ffffffff815aeb5f>] xhci_alloc_streams+0x10f/0x5a0
[10251.059548]  [<ffffffff810a4685>] ? check_preempt_curr+0x75/0xa0
[10251.059575]  [<ffffffff810a46dc>] ? ttwu_do_wakeup+0x2c/0x100
[10251.059601]  [<ffffffff810a49e6>] ? ttwu_do_activate.constprop.111+0x66/0x70
[10251.059635]  [<ffffffff815779ab>] usb_alloc_streams+0xab/0xf0
[10251.059662]  [<ffffffffc0616b48>] uas_configure_endpoints+0x128/0x150 [uas]
[10251.059694]  [<ffffffffc0616bac>] uas_post_reset+0x3c/0xb0 [uas]
[10251.059722]  [<ffffffff815727d9>] usb_reset_device+0x1b9/0x2a0
[10251.059749]  [<ffffffffc0616f42>] uas_eh_bus_reset_handler+0xb2/0x190 [uas]
[10251.059781]  [<ffffffff81514293>] scsi_try_bus_reset+0x53/0x110
[10251.059808]  [<ffffffff815163b7>] scsi_eh_bus_reset+0xf7/0x270
<snip>

The problem is the following call sequence (simplified):

1) usb_reset_device
2)  usb_reset_and_verify_device
2)   hub_port_init
3)    hub_port_finish_reset
3)     xhci_discover_or_reset_device
        This frees xhci->devs[slot_id]->eps[ep_index].ring for all eps but 0
4)    usb_get_device_descriptor
       This fails
5)   hub_port_init fails
6)  usb_reset_and_verify_device fails, does not restore device config
7)  uas_post_reset
8)   xhci_alloc_streams
      NULL deref on the free-ed ring

This commit fixes this by not allowing usb_alloc_streams to continue if
the device is not configured.

Note that we do allow usb_free_streams to continue after a (logical)
disconnect, as it is necessary to explicitly free the streams at the xhci
controller level.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index d53547d2e4c7..f6e5ceb03afb 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1947,6 +1947,8 @@ int usb_alloc_streams(struct usb_interface *interface,
 		return -EINVAL;
 	if (dev->speed != USB_SPEED_SUPER)
 		return -EINVAL;
+	if (dev->state < USB_STATE_CONFIGURED)
+		return -ENODEV;
 
 	/* Streams only apply to bulk endpoints. */
 	for (i = 0; i < num_eps; i++)

From 6c8ad60fef55353a5add4d9e5aabecc1d01968c3 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 29 Oct 2014 14:50:18 -0700
Subject: [PATCH 1081/1185] cgroup/kmemleak: add kmemleak_free() for cgroup
 deallocations.

commit 401507d67d5c2854f5a88b3f93f64fc6f267bca5 upstream.

Commit ff7ee93f4715 ("cgroup/kmemleak: Annotate alloc_page() for cgroup
allocations") introduces kmemleak_alloc() for alloc_page_cgroup(), but
corresponding kmemleak_free() is missing, which makes kmemleak be
wrongly disabled after memory offlining.  Log is pasted at the end of
this commit message.

This patch add kmemleak_free() into free_page_cgroup().  During page
offlining, this patch removes corresponding entries in kmemleak rbtree.
After that, the freed memory can be allocated again by other subsystems
without killing kmemleak.

  bash # for x in 1 2 3 4; do echo offline > /sys/devices/system/memory/memory$x/state ; sleep 1; done ; dmesg | grep leak

  Offlined Pages 32768
  kmemleak: Cannot insert 0xffff880016969000 into the object search tree (overlaps existing)
  CPU: 0 PID: 412 Comm: sleep Not tainted 3.17.0-rc5+ #86
  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  Call Trace:
    dump_stack+0x46/0x58
    create_object+0x266/0x2c0
    kmemleak_alloc+0x26/0x50
    kmem_cache_alloc+0xd3/0x160
    __sigqueue_alloc+0x49/0xd0
    __send_signal+0xcb/0x410
    send_signal+0x45/0x90
    __group_send_sig_info+0x13/0x20
    do_notify_parent+0x1bb/0x260
    do_exit+0x767/0xa40
    do_group_exit+0x44/0xa0
    SyS_exit_group+0x17/0x20
    system_call_fastpath+0x16/0x1b

  kmemleak: Kernel memory leak detector disabled
  kmemleak: Object 0xffff880016900000 (size 524288):
  kmemleak:   comm "swapper/0", pid 0, jiffies 4294667296
  kmemleak:   min_count = 0
  kmemleak:   count = 0
  kmemleak:   flags = 0x1
  kmemleak:   checksum = 0
  kmemleak:   backtrace:
        log_early+0x63/0x77
        kmemleak_alloc+0x4b/0x50
        init_section_page_cgroup+0x7f/0xf5
        page_cgroup_init+0xc5/0xd0
        start_kernel+0x333/0x408
        x86_64_start_reservations+0x2a/0x2c
        x86_64_start_kernel+0xf5/0xfc

Fixes: ff7ee93f4715 (cgroup/kmemleak: Annotate alloc_page() for cgroup allocations)
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/page_cgroup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 6d757e3a872a..e007236f345a 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -170,6 +170,7 @@ static void free_page_cgroup(void *addr)
 			sizeof(struct page_cgroup) * PAGES_PER_SECTION;
 
 		BUG_ON(PageReserved(page));
+		kmemleak_free(addr);
 		free_pages_exact(addr, table_size);
 	}
 }

From 8b080e3470f4c01a27cc5fc8b73676300c6be12d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 29 Oct 2014 14:50:44 -0700
Subject: [PATCH 1082/1185] lib/bitmap.c: fix undefined shift in
 __bitmap_shift_{left|right}()

commit ea5d05b34aca25c066e0699512d0ffbd8ee6ac3e upstream.

If __bitmap_shift_left() or __bitmap_shift_right() are asked to shift by
a multiple of BITS_PER_LONG, they will try to shift a long value by
BITS_PER_LONG bits which is undefined.  Change the functions to avoid
the undefined shift.

Coverity id: 1192175
Coverity id: 1192174
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/bitmap.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06f7e4fe8d2d..e5c4ebe586ba 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -131,7 +131,9 @@ void __bitmap_shift_right(unsigned long *dst,
 		lower = src[off + k];
 		if (left && off + k == lim - 1)
 			lower &= mask;
-		dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem;
+		dst[k] = lower >> rem;
+		if (rem)
+			dst[k] |= upper << (BITS_PER_LONG - rem);
 		if (left && k == lim - 1)
 			dst[k] &= mask;
 	}
@@ -172,7 +174,9 @@ void __bitmap_shift_left(unsigned long *dst,
 		upper = src[k];
 		if (left && k == lim - 1)
 			upper &= (1UL << left) - 1;
-		dst[k + off] = lower  >> (BITS_PER_LONG - rem) | upper << rem;
+		dst[k + off] = upper << rem;
+		if (rem)
+			dst[k + off] |= lower >> (BITS_PER_LONG - rem);
 		if (left && k + off == lim - 1)
 			dst[k + off] &= (1UL << left) - 1;
 	}

From 7d3a9bd9615685af8858e208aecccbf285022b99 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 22 Oct 2014 20:13:39 -0600
Subject: [PATCH 1083/1185] scsi: Fix error handling in SCSI_IOCTL_SEND_COMMAND

commit 84ce0f0e94ac97217398b3b69c21c7a62ebeed05 upstream.

When sg_scsi_ioctl() fails to prepare request to submit in
blk_rq_map_kern() we jump to a label where we just end up copying
(luckily zeroed-out) kernel buffer to userspace instead of reporting
error. Fix the problem by jumping to the right label.

CC: Jens Axboe <axboe@kernel.dk>
CC: linux-scsi@vger.kernel.org
Coverity-id: 1226871
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fixed up the, now unused, out label.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/scsi_ioctl.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index a5ffcc988f0b..1b4988b4bc11 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -506,7 +506,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 
 	if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_WAIT)) {
 		err = DRIVER_ERROR << 24;
-		goto out;
+		goto error;
 	}
 
 	memset(sense, 0, sizeof(sense));
@@ -516,7 +516,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 
 	blk_execute_rq(q, disk, rq, 0);
 
-out:
 	err = rq->errors & 0xff;	/* only 8 bit SCSI status */
 	if (err) {
 		if (rq->sense_len && rq->sense) {

From 0d280a6fa05199651ed7134a394999a3f434d07f Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Wed, 15 Oct 2014 20:47:24 +0000
Subject: [PATCH 1084/1185] i82860_edac: Report CE events properly

commit ab0543de6ff0877474f57a5aafbb51a61e88676f upstream.

Fix CE event being reported as HW_EVENT_ERR_UNCORRECTED.

Signed-off-by: Jason Baron <jbaron@akamai.com>
Link: http://lkml.kernel.org/r/7aee8e244a32ff86b399a8f966c4aae70296aae0.1413405053.git.jbaron@akamai.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/i82860_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c
index 3e3e431c8301..b93b0d006ebb 100644
--- a/drivers/edac/i82860_edac.c
+++ b/drivers/edac/i82860_edac.c
@@ -124,7 +124,7 @@ static int i82860_process_error_info(struct mem_ctl_info *mci,
 				     dimm->location[0], dimm->location[1], -1,
 				     "i82860 UE", "");
 	else
-		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
 				     info->eap, 0, info->derrsyn,
 				     dimm->location[0], dimm->location[1], -1,
 				     "i82860 CE", "");

From 10069449db937d0f1df94ec165dd0691100a5987 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Wed, 15 Oct 2014 20:47:21 +0000
Subject: [PATCH 1085/1185] i3200_edac: Report CE events properly

commit 8a3f075d6c9b3612b4a5fb2af8db82b38b20caf0 upstream.

Fix CE event being reported as HW_EVENT_ERR_UNCORRECTED.

Signed-off-by: Jason Baron <jbaron@akamai.com>
Link: http://lkml.kernel.org/r/d02465b4f30314b390c12c061502eda5e9d29c52.1413405053.git.jbaron@akamai.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/i3200_edac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index aa44c1718f50..71b26513b93b 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -242,11 +242,11 @@ static void i3200_process_error_info(struct mem_ctl_info *mci,
 					     -1, -1,
 					     "i3000 UE", "");
 		} else if (log & I3200_ECCERRLOG_CE) {
-			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
 					     0, 0, eccerrlog_syndrome(log),
 					     eccerrlog_row(channel, log),
 					     -1, -1,
-					     "i3000 UE", "");
+					     "i3000 CE", "");
 		}
 	}
 }

From 7fbba82d28916b7f639915d10a1d7da97afc75b7 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Sat, 18 Oct 2014 16:06:32 +0200
Subject: [PATCH 1086/1185] e7xxx_edac: Report CE events properly

commit 8030122a9ccf939186f8db96c318dbb99b5463f6 upstream.

Fix CE event being reported as HW_EVENT_ERR_UNCORRECTED.

Signed-off-by: Jason Baron <jbaron@akamai.com>
Link: http://lkml.kernel.org/r/e6dd616f2cd51583a7e77af6f639b86313c74144.1413405053.git.jbaron@akamai.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/e7xxx_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c
index 1c4056a50383..2697deae3ab7 100644
--- a/drivers/edac/e7xxx_edac.c
+++ b/drivers/edac/e7xxx_edac.c
@@ -226,7 +226,7 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
 static void process_ce_no_info(struct mem_ctl_info *mci)
 {
 	edac_dbg(3, "\n");
-	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
+	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
 			     "e7xxx CE log register overflow", "");
 }
 

From 770600e703e72e27b66149550ea683b53b758b0c Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Wed, 15 Oct 2014 20:47:28 +0000
Subject: [PATCH 1087/1185] cpc925_edac: Report UE events properly

commit fa19ac4b92bc2b5024af3e868f41f81fa738567a upstream.

Fix UE event being reported as HW_EVENT_ERR_CORRECTED.

Signed-off-by: Jason Baron <jbaron@akamai.com>
Link: http://lkml.kernel.org/r/8beb13803500076fef827eab33d523e355d83759.1413405053.git.jbaron@akamai.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/cpc925_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index 7f3c57113ba1..1e08ce765f0c 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -562,7 +562,7 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
 
 	if (apiexcp & UECC_EXCP_DETECTED) {
 		cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n");
-		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
 				     pfn, offset, 0,
 				     csrow, -1, -1,
 				     mci->ctl_name, "");

From b51649f0c028f5cecd2bc55ee5b5901515f2ffc6 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 22 Oct 2014 14:46:29 -0400
Subject: [PATCH 1088/1185] nfsd4: fix crash on unknown operation number

commit 51904b08072a8bf2b9ed74d1bd7a5300a614471d upstream.

Unknown operation numbers are caught in nfsd4_decode_compound() which
sets op->opnum to OP_ILLEGAL and op->status to nfserr_op_illegal.  The
error causes the main loop in nfsd4_proc_compound() to skip most
processing.  But nfsd4_proc_compound also peeks ahead at the next
operation in one case and doesn't take similar precautions there.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfs4proc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0f9ce13972d0..9240dd1678da 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1191,7 +1191,8 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
 	 */
 	if (argp->opcnt == resp->opcnt)
 		return false;
-
+	if (next->opnum == OP_ILLEGAL)
+		return false;
 	nextd = OPDESC(next);
 	/*
 	 * Rest of 2.6.3.1.1: certain operations will return WRONGSEC

From b57203e1fdfb7fa7e7b00d777fb6c83d73f8ce36 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 16 Sep 2014 22:23:10 +0200
Subject: [PATCH 1089/1185] ext3: Don't check quota format when there are no
 quota files

commit 7938db449bbc55bbeb164bec7af406212e7e98f1 upstream.

The check whether quota format is set even though there are no
quota files with journalled quota is pointless and it actually
makes it impossible to turn off journalled quotas (as there's
no way to unset journalled quota format). Just remove the check.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext3/super.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6356665a74bb..882d4bdfd428 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1300,13 +1300,6 @@ static int parse_options (char *options, struct super_block *sb,
 					"not specified.");
 			return 0;
 		}
-	} else {
-		if (sbi->s_jquota_fmt) {
-			ext3_msg(sb, KERN_ERR, "error: journaled quota format "
-					"specified with no journaling "
-					"enabled.");
-			return 0;
-		}
 	}
 #endif
 	return 1;

From 33d90ee4d0318fb1ec5bba6a8b9f4fa2d5333d3c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 22 Oct 2014 09:06:49 +0200
Subject: [PATCH 1090/1185] quota: Properly return errors from
 dquot_writeback_dquots()

commit 474d2605d119479e5aa050f738632e63589d4bb5 upstream.

Due to a switched left and right side of an assignment,
dquot_writeback_dquots() never returned error. This could result in
errors during quota writeback to not be reported to userspace properly.
Fix it.

Coverity-id: 1226884
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/quota/dquot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 38802d683969..7a10e047bc33 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -637,7 +637,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
 			dqstats_inc(DQST_LOOKUPS);
 			err = sb->dq_op->write_dquot(dquot);
 			if (!ret && err)
-				err = ret;
+				ret = err;
 			dqput(dquot);
 			spin_lock(&dq_list_lock);
 		}

From a88f5eaa6802026c97ebe34c2b961d60ad9c5bfa Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Thu, 16 Oct 2014 13:51:30 -0400
Subject: [PATCH 1091/1185] tty: Fix high cpu load if tty is unreleaseable

commit 37b164578826406a173ca7c20d9ba7430134d23e upstream.

Kernel oops can cause the tty to be unreleaseable (for example, if
n_tty_read() crashes while on the read_wait queue). This will cause
tty_release() to endlessly loop without sleeping.

Use a killable sleep timeout which grows by 2n+1 jiffies over the interval
[0, 120 secs.) and then jumps to forever (but still killable).

NB: killable just allows for the task to be rewoken manually, not
to be terminated.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 3723c0ebb316..d35afccdb6c9 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1698,6 +1698,7 @@ int tty_release(struct inode *inode, struct file *filp)
 	int	pty_master, tty_closing, o_tty_closing, do_sleep;
 	int	idx;
 	char	buf[64];
+	long	timeout = 0;
 
 	if (tty_paranoia_check(tty, inode, __func__))
 		return 0;
@@ -1782,7 +1783,11 @@ int tty_release(struct inode *inode, struct file *filp)
 				__func__, tty_name(tty, buf));
 		tty_unlock_pair(tty, o_tty);
 		mutex_unlock(&tty_mutex);
-		schedule();
+		schedule_timeout_killable(timeout);
+		if (timeout < 120 * HZ)
+			timeout = 2 * timeout + 1;
+		else
+			timeout = MAX_SCHEDULE_TIMEOUT;
 	}
 
 	/*

From 1eaaef74ada050d820e5dd2132a56fd3c66b1549 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 24 Oct 2014 20:29:10 +0300
Subject: [PATCH 1092/1185] PM / Sleep: fix recovery during resuming from
 hibernation

commit 94fb823fcb4892614f57e59601bb9d4920f24711 upstream.

If a device's dev_pm_ops::freeze callback fails during the QUIESCE
phase, we don't rollback things correctly calling the thaw and complete
callbacks. This could leave some devices in a suspended state in case of
an error during resuming from hibernation.

Signed-off-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/power/hibernate.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index b26f5f1e773e..1634dc6e2fe7 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -491,8 +491,14 @@ int hibernation_restore(int platform_mode)
 	error = dpm_suspend_start(PMSG_QUIESCE);
 	if (!error) {
 		error = resume_target_kernel(platform_mode);
-		dpm_resume_end(PMSG_RECOVER);
+		/*
+		 * The above should either succeed and jump to the new kernel,
+		 * or return with an error. Otherwise things are just
+		 * undefined, so let's be paranoid.
+		 */
+		BUG_ON(!error);
 	}
+	dpm_resume_end(PMSG_RECOVER);
 	pm_restore_gfp_mask();
 	ftrace_start();
 	resume_console();

From 9c31f4eca5d2633c78fb06be7d781583d4bf1cfa Mon Sep 17 00:00:00 2001
From: Karl Beldan <karl.beldan@rivierawaves.com>
Date: Mon, 13 Oct 2014 14:34:41 +0200
Subject: [PATCH 1093/1185] mac80211: fix typo in starting baserate for
 rts_cts_rate_idx

commit c7abf25af0f41be4b50d44c5b185d52eea360cb8 upstream.

It affects non-(V)HT rates and can lead to selecting an rts_cts rate
that is not a basic rate or way superior to the reference rate (ATM
rates[0] used for the 1st attempt of the protected frame data).

E.g, assuming drivers register growing (bitrate) sorted tables of
ieee80211_rate-s, having :
- rates[0].idx == d'2 and basic_rates == b'10100
will select rts_cts idx b'10011 & ~d'(BIT(2)-1), i.e. 1, likewise
- rates[0].idx == d'2 and basic_rates == b'10001
will select rts_cts idx b'10000
The first is not a basic rate and the second is > rates[0].

Also, wrt severity of the addressed misbehavior, ATM we only have one
rts_cts_rate_idx rather than one per rate table entry, so this idx might
still point to bitrates > rates[1..MAX_RATES].

Fixes: 5253ffb8c9e1 ("mac80211: always pick a basic rate to tx RTS/CTS for pre-HT rates")
Signed-off-by: Karl Beldan <karl.beldan@rivierawaves.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/rate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index a02bef35b134..d68d6cfac3b5 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -448,7 +448,7 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif,
 	 */
 	if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) {
 		u32 basic_rates = vif->bss_conf.basic_rates;
-		s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0;
+		s8 baserate = basic_rates ? ffs(basic_rates) - 1 : 0;
 
 		rate = &sband->bitrates[rates[0].idx];
 

From f9b6264a0fdf9268e61a065177dde1c8f823dfc0 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sat, 4 Oct 2014 23:06:39 +0200
Subject: [PATCH 1094/1185] posix-timers: Fix stack info leak in timer_create()

commit 6891c4509c792209c44ced55a60f13954cb50ef4 upstream.

If userland creates a timer without specifying a sigevent info, we'll
create one ourself, using a stack local variable. Particularly will we
use the timer ID as sival_int. But as sigev_value is a union containing
a pointer and an int, that assignment will only partially initialize
sigev_value on systems where the size of a pointer is bigger than the
size of an int. On such systems we'll copy the uninitialized stack bytes
from the timer_create() call to userland when the timer actually fires
and we're going to deliver the signal.

Initialize sigev_value with 0 to plug the stack info leak.

Found in the PaX patch, written by the PaX Team.

Fixes: 5a9fa7307285 ("posix-timers: kill ->it_sigev_signo and...")
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Brad Spengler <spender@grsecurity.net>
Cc: PaX Team <pageexec@freemail.hu>
Link: http://lkml.kernel.org/r/1412456799-32339-1-git-send-email-minipli@googlemail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/posix-timers.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 424c2d4265c9..77e6b83c0431 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -634,6 +634,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
 			goto out;
 		}
 	} else {
+		memset(&event.sigev_value, 0, sizeof(event.sigev_value));
 		event.sigev_notify = SIGEV_SIGNAL;
 		event.sigev_signo = SIGALRM;
 		event.sigev_value.sival_int = new_timer->it_id;

From 2d1fef447d8e19ac23b8226e4655b78f072569bd Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Wed, 15 Oct 2014 10:12:07 -0700
Subject: [PATCH 1095/1185] x86, apic: Handle a bad TSC more gracefully

commit b47dcbdc5161d3d5756f430191e2840d9b855492 upstream.

If the TSC is unusable or disabled, then this patch fixes:

 - Confusion while trying to clear old APIC interrupts.
 - Division by zero and incorrect programming of the TSC deadline
   timer.

This fixes boot if the CPU has a TSC deadline timer but a missing or
broken TSC.  The failure to boot can be observed with qemu using
-cpu qemu64,-tsc,+tsc-deadline

This also happens to me in nested KVM for unknown reasons.
With this patch, I can boot cleanly (although without a TSC).

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Bandan Das <bsd@redhat.com>
Link: http://lkml.kernel.org/r/e2fa274e498c33988efac0ba8b7e3120f7f92d78.1413393027.git.luto@amacapital.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/apic/apic.c | 4 ++--
 arch/x86/kernel/tsc.c       | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 904611bf0e5a..033eb44dc661 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1263,7 +1263,7 @@ void __cpuinit setup_local_APIC(void)
 	unsigned int value, queued;
 	int i, j, acked = 0;
 	unsigned long long tsc = 0, ntsc;
-	long long max_loops = cpu_khz;
+	long long max_loops = cpu_khz ? cpu_khz : 1000000;
 
 	if (cpu_has_tsc)
 		rdtscll(tsc);
@@ -1360,7 +1360,7 @@ void __cpuinit setup_local_APIC(void)
 			break;
 		}
 		if (queued) {
-			if (cpu_has_tsc) {
+			if (cpu_has_tsc && cpu_khz) {
 				rdtscll(ntsc);
 				max_loops = (cpu_khz << 10) - (ntsc - tsc);
 			} else
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 098b3cfda72e..4e27ba53c40c 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -968,14 +968,17 @@ void __init tsc_init(void)
 
 	x86_init.timers.tsc_pre_init();
 
-	if (!cpu_has_tsc)
+	if (!cpu_has_tsc) {
+		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 		return;
+	}
 
 	tsc_khz = x86_platform.calibrate_tsc();
 	cpu_khz = tsc_khz;
 
 	if (!tsc_khz) {
 		mark_tsc_unstable("could not calculate TSC khz");
+		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 		return;
 	}
 

From b089fe5b6d7fe1ad383dd2ffcb2e4ce4ee0c574d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 30 Oct 2014 10:35:00 +1100
Subject: [PATCH 1096/1185] mm: Remove false WARN_ON from
 pagecache_isize_extended()

commit f55fefd1a5a339b1bd08c120b93312d6eb64a9fb upstream.

The WARN_ON checking whether i_mutex is held in
pagecache_isize_extended() was wrong because some filesystems (e.g.
XFS) use different locks for serialization of truncates / writes. So
just remove the check.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/truncate.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mm/truncate.c b/mm/truncate.c
index 2f03c3ac7ab7..2d6151fc8f08 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -603,7 +603,6 @@ void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
 	struct page *page;
 	pgoff_t index;
 
-	WARN_ON(!mutex_is_locked(&inode->i_mutex));
 	WARN_ON(to > inode->i_size);
 
 	if (from >= to || bsize == PAGE_CACHE_SIZE)

From 591189c21235f0c2884cfae24b65ecfb5e120325 Mon Sep 17 00:00:00 2001
From: Ondrej Kozina <okozina@redhat.com>
Date: Mon, 25 Aug 2014 11:49:54 +0200
Subject: [PATCH 1097/1185] crypto: algif - avoid excessive use of socket
 buffer in skcipher

commit e2cffb5f493a8b431dc87124388ea59b79f0bccb upstream.

On archs with PAGE_SIZE >= 64 KiB the function skcipher_alloc_sgl()
fails with -ENOMEM no matter what user space actually requested.
This is caused by the fact sock_kmalloc call inside the function tried
to allocate more memory than allowed by the default kernel socket buffer
size (kernel param net.core.optmem_max).

Signed-off-by: Ondrej Kozina <okozina@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/algif_skcipher.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index a19c027b29bd..83187f497c7c 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -49,7 +49,7 @@ struct skcipher_ctx {
 	struct ablkcipher_request req;
 };
 
-#define MAX_SGL_ENTS ((PAGE_SIZE - sizeof(struct skcipher_sg_list)) / \
+#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
 		      sizeof(struct scatterlist) - 1)
 
 static inline int skcipher_sndbuf(struct sock *sk)

From 7d5137586f7ce78e2ac5e11e2f9bfe9a12a4c496 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 10 Nov 2014 08:55:44 -0600
Subject: [PATCH 1098/1185] usb: dwc3: gadget: fix set_halt() bug with pending
 transfers

[ Upstream commit 7a60855972f0d3c014093046cb6f013a1ee5bb19 ]

According to our Gadget Framework API documentation,
->set_halt() *must* return -EAGAIN if we have pending
transfers (on either direction) or FIFO isn't empty (on
TX endpoints).

Fix this bug so that the mass storage gadget can be used
without stall=0 parameter.

This patch should be backported to all kernels since v3.2.

Suggested-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/ep0.c    |  4 ++--
 drivers/usb/dwc3/gadget.c | 16 ++++++++++++----
 drivers/usb/dwc3/gadget.h |  2 +-
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 3cea676ba901..6cd418f6ac07 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -270,7 +270,7 @@ static void dwc3_ep0_stall_and_restart(struct dwc3 *dwc)
 
 	/* stall is always issued on EP0 */
 	dep = dwc->eps[0];
-	__dwc3_gadget_ep_set_halt(dep, 1);
+	__dwc3_gadget_ep_set_halt(dep, 1, false);
 	dep->flags = DWC3_EP_ENABLED;
 	dwc->delayed_status = false;
 
@@ -480,7 +480,7 @@ static int dwc3_ep0_handle_feature(struct dwc3 *dwc,
 				return -EINVAL;
 			if (set == 0 && (dep->flags & DWC3_EP_WEDGE))
 				break;
-			ret = __dwc3_gadget_ep_set_halt(dep, set);
+			ret = __dwc3_gadget_ep_set_halt(dep, set, true);
 			if (ret)
 				return -EINVAL;
 			break;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index ec4cb05291d7..8f8e75e392de 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -605,7 +605,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
 
 	/* make sure HW endpoint isn't stalled */
 	if (dep->flags & DWC3_EP_STALL)
-		__dwc3_gadget_ep_set_halt(dep, 0);
+		__dwc3_gadget_ep_set_halt(dep, 0, false);
 
 	reg = dwc3_readl(dwc->regs, DWC3_DALEPENA);
 	reg &= ~DWC3_DALEPENA_EP(dep->number);
@@ -1205,7 +1205,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
 	return ret;
 }
 
-int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value)
+int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
 {
 	struct dwc3_gadget_ep_cmd_params	params;
 	struct dwc3				*dwc = dep->dwc;
@@ -1214,6 +1214,14 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value)
 	memset(&params, 0x00, sizeof(params));
 
 	if (value) {
+		if (!protocol && ((dep->direction && dep->flags & DWC3_EP_BUSY) ||
+				(!list_empty(&dep->req_queued) ||
+				 !list_empty(&dep->request_list)))) {
+			dev_dbg(dwc->dev, "%s: pending request, cannot halt\n",
+					dep->name);
+			return -EAGAIN;
+		}
+
 		ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
 			DWC3_DEPCMD_SETSTALL, &params);
 		if (ret)
@@ -1253,7 +1261,7 @@ static int dwc3_gadget_ep_set_halt(struct usb_ep *ep, int value)
 		goto out;
 	}
 
-	ret = __dwc3_gadget_ep_set_halt(dep, value);
+	ret = __dwc3_gadget_ep_set_halt(dep, value, false);
 out:
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
@@ -1273,7 +1281,7 @@ static int dwc3_gadget_ep_set_wedge(struct usb_ep *ep)
 	if (dep->number == 0 || dep->number == 1)
 		return dwc3_gadget_ep0_set_halt(ep, 1);
 	else
-		return dwc3_gadget_ep_set_halt(ep, 1);
+		return __dwc3_gadget_ep_set_halt(dep, 1, false);
 }
 
 /* -------------------------------------------------------------------------- */
diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h
index 99e6d7248820..b3f25c302e35 100644
--- a/drivers/usb/dwc3/gadget.h
+++ b/drivers/usb/dwc3/gadget.h
@@ -114,7 +114,7 @@ void dwc3_ep0_out_start(struct dwc3 *dwc);
 int dwc3_gadget_ep0_set_halt(struct usb_ep *ep, int value);
 int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
 		gfp_t gfp_flags);
-int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value);
+int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol);
 int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
 		unsigned cmd, struct dwc3_gadget_ep_cmd_params *params);
 int dwc3_send_gadget_generic_command(struct dwc3 *dwc, int cmd, u32 param);

From 1786a65707397a5670998eb3c217179b2e3db8f5 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 10 Nov 2014 08:56:40 -0600
Subject: [PATCH 1099/1185] usb: gadget: function: acm: make f_acm pass USB20CV
 Chapter9

[ Upstream commit 52ec49a5e56a27c5b6f8217708783eff39f24c16 ]

During Halt Endpoint Test, our interrupt endpoint
will be disabled, which will clear out ep->desc
to NULL. Unless we call config_ep_by_speed() again,
we will not be able to enable this endpoint which
will make us fail that test.

Fixes: f9c56cd (usb: gadget: Clear usb_endpoint_descriptor
	inside the struct usb_ep on disable)
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/f_acm.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/f_acm.c b/drivers/usb/gadget/f_acm.c
index ab1065afbbd0..3384486c2884 100644
--- a/drivers/usb/gadget/f_acm.c
+++ b/drivers/usb/gadget/f_acm.c
@@ -430,11 +430,12 @@ static int acm_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
 		if (acm->notify->driver_data) {
 			VDBG(cdev, "reset acm control interface %d\n", intf);
 			usb_ep_disable(acm->notify);
-		} else {
-			VDBG(cdev, "init acm ctrl interface %d\n", intf);
+		}
+
+		if (!acm->notify->desc)
 			if (config_ep_by_speed(cdev->gadget, f, acm->notify))
 				return -EINVAL;
-		}
+
 		usb_ep_enable(acm->notify);
 		acm->notify->driver_data = acm;
 

From 42a1d0367d05b8a4cb2147948c53cc956df6fd59 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 10 Nov 2014 09:06:20 -0600
Subject: [PATCH 1100/1185] usb: gadget: udc: core: fix kernel oops with
 soft-connect

[ Upstream commit bfa6b18c680450c17512c741ed1d818695747621 ]

Currently, there's no guarantee that udc->driver
will be valid when using soft_connect sysfs
interface. In fact, we can very easily trigger
a NULL pointer dereference by trying to disconnect
when a gadget driver isn't loaded.

Fix this bug:

~# echo disconnect > soft_connect
[   33.685743] Unable to handle kernel NULL pointer dereference at virtual address 00000014
[   33.694221] pgd = ed0cc000
[   33.697174] [00000014] *pgd=ae351831, *pte=00000000, *ppte=00000000
[   33.703766] Internal error: Oops: 17 [#1] SMP ARM
[   33.708697] Modules linked in: xhci_plat_hcd xhci_hcd snd_soc_davinci_mcasp snd_soc_tlv320aic3x snd_soc_edma snd_soc_omap snd_soc_evm snd_soc_core dwc3 snd_compress snd_pcm_dmaengine snd_pcm snd_timer snd lis3lv02d_i2c matrix_keypad lis3lv02d dwc3_omap input_polldev soundcore
[   33.734372] CPU: 0 PID: 1457 Comm: bash Not tainted 3.17.0-09740-ga93416e-dirty #345
[   33.742457] task: ee71ce00 ti: ee68a000 task.ti: ee68a000
[   33.748116] PC is at usb_udc_softconn_store+0xa4/0xec
[   33.753416] LR is at mark_held_locks+0x78/0x90
[   33.758057] pc : [<c04df128>]    lr : [<c00896a4>]    psr: 20000013
[   33.758057] sp : ee68bec8  ip : c0c00008  fp : ee68bee4
[   33.770050] r10: ee6b394c  r9 : ee68bf80  r8 : ee6062c0
[   33.775508] r7 : 00000000  r6 : ee6062c0  r5 : 0000000b  r4 : ee739408
[   33.782346] r3 : 00000000  r2 : 00000000  r1 : ee71d390  r0 : ee664170
[   33.789168] Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
[   33.796636] Control: 10c5387d  Table: ad0cc059  DAC: 00000015
[   33.802638] Process bash (pid: 1457, stack limit = 0xee68a248)
[   33.808740] Stack: (0xee68bec8 to 0xee68c000)
[   33.813299] bec0:                   0000000b c0411284 ee6062c0 00000000 ee68bef4 ee68bee8
[   33.821862] bee0: c04112ac c04df090 ee68bf14 ee68bef8 c01c2868 c0411290 0000000b ee6b3940
[   33.830419] bf00: 00000000 00000000 ee68bf4c ee68bf18 c01c1a24 c01c2818 00000000 00000000
[   33.838990] bf20: ee61b940 ee2f47c0 0000000b 000ce408 ee68bf80 c000f304 ee68a000 00000000
[   33.847544] bf40: ee68bf7c ee68bf50 c0152dd8 c01c1960 ee68bf7c c0170af8 ee68bf7c ee2f47c0
[   33.856099] bf60: ee2f47c0 000ce408 0000000b c000f304 ee68bfa4 ee68bf80 c0153330 c0152d34
[   33.864653] bf80: 00000000 00000000 0000000b 000ce408 b6e7fb50 00000004 00000000 ee68bfa8
[   33.873204] bfa0: c000f080 c01532e8 0000000b 000ce408 00000001 000ce408 0000000b 00000000
[   33.881763] bfc0: 0000000b 000ce408 b6e7fb50 00000004 0000000b 00000000 000c5758 00000000
[   33.890319] bfe0: 00000000 bec2c924 b6de422d b6e1d226 40000030 00000001 75716d2f 00657565
[   33.898890] [<c04df128>] (usb_udc_softconn_store) from [<c04112ac>] (dev_attr_store+0x28/0x34)
[   33.907920] [<c04112ac>] (dev_attr_store) from [<c01c2868>] (sysfs_kf_write+0x5c/0x60)
[   33.916200] [<c01c2868>] (sysfs_kf_write) from [<c01c1a24>] (kernfs_fop_write+0xd0/0x194)
[   33.924773] [<c01c1a24>] (kernfs_fop_write) from [<c0152dd8>] (vfs_write+0xb0/0x1bc)
[   33.932874] [<c0152dd8>] (vfs_write) from [<c0153330>] (SyS_write+0x54/0xb0)
[   33.940247] [<c0153330>] (SyS_write) from [<c000f080>] (ret_fast_syscall+0x0/0x48)
[   33.948160] Code: e1a01007 e12fff33 e5140004 e5143008 (e5933014)
[   33.954625] ---[ end trace f849bead94eab7ea ]---

Fixes: 2ccea03 (usb: gadget: introduce UDC Class)
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc-core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c
index 5514822114a5..817a26cbfab1 100644
--- a/drivers/usb/gadget/udc-core.c
+++ b/drivers/usb/gadget/udc-core.c
@@ -439,6 +439,11 @@ static ssize_t usb_udc_softconn_store(struct device *dev,
 {
 	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
 
+	if (!udc->driver) {
+		dev_err(dev, "soft-connect without a gadget driver\n");
+		return -EOPNOTSUPP;
+	}
+
 	if (sysfs_streq(buf, "connect")) {
 		usb_gadget_udc_start(udc->gadget, udc->driver);
 		usb_gadget_connect(udc->gadget);

From 5157bf1f2b4e8796bf5dc197155f7daacc9053ee Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Sun, 26 Oct 2014 15:18:42 -0400
Subject: [PATCH 1101/1185] drm/radeon: remove invalid pci id

commit 8c3e434769b1707fd2d24de5a2eb25fedc634c4a upstream.

0x4c6e is a secondary device id so should not be used
by the driver.

Noticed-by: Mark Kettenis <mark.kettenis@xs4all.nl>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/drm/drm_pciids.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 3a1cff56ef20..d7b717090f2f 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -52,7 +52,6 @@
 	{0x1002, 0x4C64, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x4C66, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x4C67, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \
-	{0x1002, 0x4C6E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV280|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x4E44, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \
 	{0x1002, 0x4E45, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \
 	{0x1002, 0x4E46, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \

From f693fddf24d1808a835b15c72c04d30c2df1e6e7 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 22 Oct 2014 09:17:24 +0200
Subject: [PATCH 1102/1185] rbd: Fix error recovery in rbd_obj_read_sync()

commit a8d4205623ae965e36c68629db306ca0695a2771 upstream.

When we fail to allocate page vector in rbd_obj_read_sync() we just
basically ignore the problem and continue which will result in an oops
later. Fix the problem by returning proper error.

CC: Yehuda Sadeh <yehuda@inktank.com>
CC: Sage Weil <sage@inktank.com>
CC: ceph-devel@vger.kernel.org
Coverity-id: 1226882
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/rbd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 07caf44d5755..9951e66b8502 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3227,7 +3227,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
 	page_count = (u32) calc_pages_for(offset, length);
 	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
 	if (IS_ERR(pages))
-		ret = PTR_ERR(pages);
+		return PTR_ERR(pages);
 
 	ret = -ENOMEM;
 	obj_request = rbd_obj_request_create(object_name, offset, length,

From 018fd7f83f65eed43a6c655686e05c95bab03637 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 22 Oct 2014 16:06:38 +0200
Subject: [PATCH 1103/1185] acer-wmi: Add acpi_backlight=video quirk for the
 Acer KAV80

commit 183fd8fcd7f8afb7ac5ec68f83194872f9fecc84 upstream.

The acpi-video backlight interface on the Acer KAV80 is broken, and worse
it causes the entire machine to slow down significantly after a suspend/resume.

Blacklist it, and use the acer-wmi backlight interface instead. Note that
the KAV80 is somewhat unique in that it is the only Acer model where we
fall back to acer-wmi after blacklisting, rather then using the native
(e.g. intel) backlight driver. This is done because there is no native
backlight interface on this model.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1128309
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/platform/x86/acer-wmi.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index c9076bdaf2c1..59a8d325a697 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -572,6 +572,17 @@ static const struct dmi_system_id video_vendor_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5750"),
 		},
 	},
+	{
+		/*
+		 * Note no video_set_backlight_video_vendor, we must use the
+		 * acer interface, as there is no native backlight interface.
+		 */
+		.ident = "Acer KAV80",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "KAV80"),
+		},
+	},
 	{}
 };
 

From 7fae0f7488f7817eadc0c2dfa70a416da2cafb3b Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 3 Nov 2014 21:16:16 +0100
Subject: [PATCH 1104/1185] i2c: at91: don't account as iowait

commit 11cfbfb098b22d3e57f1f2be217cad20e2d48463 upstream.

iowait is for blkio [1]. I2C shouldn't use it.

[1] https://lkml.org/lkml/2014/11/3/317

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-at91.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index b1240a250149..09324d0178d5 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c
@@ -435,7 +435,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev)
 		}
 	}
 
-	ret = wait_for_completion_io_timeout(&dev->cmd_complete,
+	ret = wait_for_completion_timeout(&dev->cmd_complete,
 					     dev->adapter.timeout);
 	if (ret == 0) {
 		dev_err(dev->dev, "controller timed out\n");

From afb16d3e8e031b25993df65dfdb92e503f596916 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Fri, 7 Nov 2014 12:05:49 +0800
Subject: [PATCH 1105/1185] sysfs: driver core: Fix glue dir race condition by
 gdp_mutex

commit e4a60d139060975eb956717e4f63ae348d4d8cc5 upstream.

There is a race condition when removing glue directory.
It can be reproduced in following test:

path 1: Add first child device
device_add()
    get_device_parent()
            /*find parent from glue_dirs.list*/
            list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry)
                    if (k->parent == parent_kobj) {
                            kobj = kobject_get(k);
                            break;
                    }
            ....
            class_dir_create_and_add()

path2: Remove last child device under glue dir
device_del()
    cleanup_device_parent()
            cleanup_glue_dir()
                    kobject_put(glue_dir);

If path2 has been called cleanup_glue_dir(), but not
call kobject_put(glue_dir), the glue dir is still
in parent's kset list. Meanwhile, path1 find the glue
dir from the glue_dirs.list. Path2 may release glue dir
before path1 call kobject_get(). So kernel will report
the warning and bug_on.

This is a "classic" problem we have of a kref in a list
that can be found while the last instance could be removed
at the same time.

This patch reuse gdp_mutex to fix this race condition.

The following calltrace is captured in kernel 3.4, but
the latest kernel still has this bug.

-----------------------------------------------------
<4>[ 3965.441471] WARNING: at ...include/linux/kref.h:41 kobject_get+0x33/0x40()
<4>[ 3965.441474] Hardware name: Romley
<4>[ 3965.441475] Modules linked in: isd_iop(O) isd_xda(O)...
...
<4>[ 3965.441605] Call Trace:
<4>[ 3965.441611]  [<ffffffff8103717a>] warn_slowpath_common+0x7a/0xb0
<4>[ 3965.441615]  [<ffffffff810371c5>] warn_slowpath_null+0x15/0x20
<4>[ 3965.441618]  [<ffffffff81215963>] kobject_get+0x33/0x40
<4>[ 3965.441624]  [<ffffffff812d1e45>] get_device_parent.isra.11+0x135/0x1f0
<4>[ 3965.441627]  [<ffffffff812d22d4>] device_add+0xd4/0x6d0
<4>[ 3965.441631]  [<ffffffff812d0dbc>] ? dev_set_name+0x3c/0x40
....
<2>[ 3965.441912] kernel BUG at ..../fs/sysfs/group.c:65!
<4>[ 3965.441915] invalid opcode: 0000 [#1] SMP
...
<4>[ 3965.686743]  [<ffffffff811a677e>] sysfs_create_group+0xe/0x10
<4>[ 3965.686748]  [<ffffffff810cfb04>] blk_trace_init_sysfs+0x14/0x20
<4>[ 3965.686753]  [<ffffffff811fcabb>] blk_register_queue+0x3b/0x120
<4>[ 3965.686756]  [<ffffffff812030bc>] add_disk+0x1cc/0x490
....
-------------------------------------------------------

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Weng Meiling <wengmeiling.weng@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index ca4bcb8b3938..2a19097a7cb1 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -765,12 +765,12 @@ class_dir_create_and_add(struct class *class, struct kobject *parent_kobj)
 	return &dir->kobj;
 }
 
+static DEFINE_MUTEX(gdp_mutex);
 
 static struct kobject *get_device_parent(struct device *dev,
 					 struct device *parent)
 {
 	if (dev->class) {
-		static DEFINE_MUTEX(gdp_mutex);
 		struct kobject *kobj = NULL;
 		struct kobject *parent_kobj;
 		struct kobject *k;
@@ -834,7 +834,9 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
 	    glue_dir->kset != &dev->class->p->glue_dirs)
 		return;
 
+	mutex_lock(&gdp_mutex);
 	kobject_put(glue_dir);
+	mutex_unlock(&gdp_mutex);
 }
 
 static void cleanup_device_parent(struct device *dev)

From 96db973853b1d5a93836261b0edcc877ddc335a4 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Mon, 3 Nov 2014 15:15:35 +0000
Subject: [PATCH 1106/1185] of: Fix overflow bug in string property parsing
 functions

commit a87fa1d81a9fb5e9adca9820e16008c40ad09f33 upstream.

The string property read helpers will run off the end of the buffer if
it is handed a malformed string property. Rework the parsers to make
sure that doesn't happen. At the same time add new test cases to make
sure the functions behave themselves.

The original implementations of of_property_read_string_index() and
of_property_count_strings() both open-coded the same block of parsing
code, each with it's own subtly different bugs. The fix here merges
functions into a single helper and makes the original functions static
inline wrappers around the helper.

One non-bugfix aspect of this patch is the addition of a new wrapper,
of_property_read_string_array(). The new wrapper is needed by the
device_properties feature that Rafael is working on and planning to
merge for v3.19. The implementation is identical both with and without
the new static inline wrapper, so it just got left in to reduce the
churn on the header file.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Darren Hart <darren.hart@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/base.c     | 88 +++++++++++--------------------------------
 drivers/of/selftest.c | 66 +++++++++++++++++++++++++++++---
 include/linux/of.h    | 84 ++++++++++++++++++++++++++++++++++-------
 3 files changed, 152 insertions(+), 86 deletions(-)

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 1d10b4ec6814..b60f9a77ab03 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -962,52 +962,6 @@ int of_property_read_string(struct device_node *np, const char *propname,
 }
 EXPORT_SYMBOL_GPL(of_property_read_string);
 
-/**
- * of_property_read_string_index - Find and read a string from a multiple
- * strings property.
- * @np:		device node from which the property value is to be read.
- * @propname:	name of the property to be searched.
- * @index:	index of the string in the list of strings
- * @out_string:	pointer to null terminated return string, modified only if
- *		return value is 0.
- *
- * Search for a property in a device tree node and retrieve a null
- * terminated string value (pointer to data, not a copy) in the list of strings
- * contained in that property.
- * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if
- * property does not have a value, and -EILSEQ if the string is not
- * null-terminated within the length of the property data.
- *
- * The out_string pointer is modified only if a valid string can be decoded.
- */
-int of_property_read_string_index(struct device_node *np, const char *propname,
-				  int index, const char **output)
-{
-	struct property *prop = of_find_property(np, propname, NULL);
-	int i = 0;
-	size_t l = 0, total = 0;
-	const char *p;
-
-	if (!prop)
-		return -EINVAL;
-	if (!prop->value)
-		return -ENODATA;
-	if (strnlen(prop->value, prop->length) >= prop->length)
-		return -EILSEQ;
-
-	p = prop->value;
-
-	for (i = 0; total < prop->length; total += l, p += l) {
-		l = strlen(p) + 1;
-		if (i++ == index) {
-			*output = p;
-			return 0;
-		}
-	}
-	return -ENODATA;
-}
-EXPORT_SYMBOL_GPL(of_property_read_string_index);
-
 /**
  * of_property_match_string() - Find string in a list and return index
  * @np: pointer to node containing string list property
@@ -1034,7 +988,7 @@ int of_property_match_string(struct device_node *np, const char *propname,
 	end = p + prop->length;
 
 	for (i = 0; p < end; i++, p += l) {
-		l = strlen(p) + 1;
+		l = strnlen(p, end - p) + 1;
 		if (p + l > end)
 			return -EILSEQ;
 		pr_debug("comparing %s with %s\n", string, p);
@@ -1046,39 +1000,41 @@ int of_property_match_string(struct device_node *np, const char *propname,
 EXPORT_SYMBOL_GPL(of_property_match_string);
 
 /**
- * of_property_count_strings - Find and return the number of strings from a
- * multiple strings property.
+ * of_property_read_string_util() - Utility helper for parsing string properties
  * @np:		device node from which the property value is to be read.
  * @propname:	name of the property to be searched.
+ * @out_strs:	output array of string pointers.
+ * @sz:		number of array elements to read.
+ * @skip:	Number of strings to skip over at beginning of list.
  *
- * Search for a property in a device tree node and retrieve the number of null
- * terminated string contain in it. Returns the number of strings on
- * success, -EINVAL if the property does not exist, -ENODATA if property
- * does not have a value, and -EILSEQ if the string is not null-terminated
- * within the length of the property data.
+ * Don't call this function directly. It is a utility helper for the
+ * of_property_read_string*() family of functions.
  */
-int of_property_count_strings(struct device_node *np, const char *propname)
+int of_property_read_string_helper(struct device_node *np, const char *propname,
+				   const char **out_strs, size_t sz, int skip)
 {
 	struct property *prop = of_find_property(np, propname, NULL);
-	int i = 0;
-	size_t l = 0, total = 0;
-	const char *p;
+	int l = 0, i = 0;
+	const char *p, *end;
 
 	if (!prop)
 		return -EINVAL;
 	if (!prop->value)
 		return -ENODATA;
-	if (strnlen(prop->value, prop->length) >= prop->length)
-		return -EILSEQ;
-
 	p = prop->value;
+	end = p + prop->length;
 
-	for (i = 0; total < prop->length; total += l, p += l, i++)
-		l = strlen(p) + 1;
-
-	return i;
+	for (i = 0; p < end && (!out_strs || i < skip + sz); i++, p += l) {
+		l = strnlen(p, end - p) + 1;
+		if (p + l > end)
+			return -EILSEQ;
+		if (out_strs && i >= skip)
+			*out_strs++ = p;
+	}
+	i -= skip;
+	return i <= 0 ? -ENODATA : i;
 }
-EXPORT_SYMBOL_GPL(of_property_count_strings);
+EXPORT_SYMBOL_GPL(of_property_read_string_helper);
 
 /**
  * of_parse_phandle - Resolve a phandle property to a device_node pointer
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index 0eb5c38b4e07..f5e8dc7a725c 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -126,8 +126,9 @@ static void __init of_selftest_parse_phandle_with_args(void)
 	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 }
 
-static void __init of_selftest_property_match_string(void)
+static void __init of_selftest_property_string(void)
 {
+	const char *strings[4];
 	struct device_node *np;
 	int rc;
 
@@ -145,13 +146,66 @@ static void __init of_selftest_property_match_string(void)
 	rc = of_property_match_string(np, "phandle-list-names", "third");
 	selftest(rc == 2, "third expected:0 got:%i\n", rc);
 	rc = of_property_match_string(np, "phandle-list-names", "fourth");
-	selftest(rc == -ENODATA, "unmatched string; rc=%i", rc);
+	selftest(rc == -ENODATA, "unmatched string; rc=%i\n", rc);
 	rc = of_property_match_string(np, "missing-property", "blah");
-	selftest(rc == -EINVAL, "missing property; rc=%i", rc);
+	selftest(rc == -EINVAL, "missing property; rc=%i\n", rc);
 	rc = of_property_match_string(np, "empty-property", "blah");
-	selftest(rc == -ENODATA, "empty property; rc=%i", rc);
+	selftest(rc == -ENODATA, "empty property; rc=%i\n", rc);
 	rc = of_property_match_string(np, "unterminated-string", "blah");
-	selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc);
+	selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc);
+
+	/* of_property_count_strings() tests */
+	rc = of_property_count_strings(np, "string-property");
+	selftest(rc == 1, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_count_strings(np, "phandle-list-names");
+	selftest(rc == 3, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_count_strings(np, "unterminated-string");
+	selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc);
+	rc = of_property_count_strings(np, "unterminated-string-list");
+	selftest(rc == -EILSEQ, "unterminated string array; rc=%i\n", rc);
+
+	/* of_property_read_string_index() tests */
+	rc = of_property_read_string_index(np, "string-property", 0, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "foobar"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "string-property", 1, strings);
+	selftest(rc == -ENODATA && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "phandle-list-names", 0, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "first"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "phandle-list-names", 1, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "second"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "phandle-list-names", 2, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "third"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "phandle-list-names", 3, strings);
+	selftest(rc == -ENODATA && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "unterminated-string", 0, strings);
+	selftest(rc == -EILSEQ && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "unterminated-string-list", 0, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "first"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "unterminated-string-list", 2, strings); /* should fail */
+	selftest(rc == -EILSEQ && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[1] = NULL;
+
+	/* of_property_read_string_array() tests */
+	rc = of_property_read_string_array(np, "string-property", strings, 4);
+	selftest(rc == 1, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_read_string_array(np, "phandle-list-names", strings, 4);
+	selftest(rc == 3, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_read_string_array(np, "unterminated-string", strings, 4);
+	selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc);
+	/* -- An incorrectly formed string should cause a failure */
+	rc = of_property_read_string_array(np, "unterminated-string-list", strings, 4);
+	selftest(rc == -EILSEQ, "unterminated string array; rc=%i\n", rc);
+	/* -- parsing the correctly formed strings should still work: */
+	strings[2] = NULL;
+	rc = of_property_read_string_array(np, "unterminated-string-list", strings, 2);
+	selftest(rc == 2 && strings[2] == NULL, "of_property_read_string_array() failure; rc=%i\n", rc);
+	strings[1] = NULL;
+	rc = of_property_read_string_array(np, "phandle-list-names", strings, 1);
+	selftest(rc == 1 && strings[1] == NULL, "Overwrote end of string array; rc=%i, str='%s'\n", rc, strings[1]);
 }
 
 static int __init of_selftest(void)
@@ -167,7 +221,7 @@ static int __init of_selftest(void)
 
 	pr_info("start of selftest - you will see error messages\n");
 	of_selftest_parse_phandle_with_args();
-	of_selftest_property_match_string();
+	of_selftest_property_string();
 	pr_info("end of selftest - %s\n", selftest_passed ? "PASS" : "FAIL");
 	return 0;
 }
diff --git a/include/linux/of.h b/include/linux/of.h
index 1fd08ca23106..5e9d35233a65 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -252,14 +252,12 @@ extern int of_property_read_u64(const struct device_node *np,
 extern int of_property_read_string(struct device_node *np,
 				   const char *propname,
 				   const char **out_string);
-extern int of_property_read_string_index(struct device_node *np,
-					 const char *propname,
-					 int index, const char **output);
 extern int of_property_match_string(struct device_node *np,
 				    const char *propname,
 				    const char *string);
-extern int of_property_count_strings(struct device_node *np,
-				     const char *propname);
+extern int of_property_read_string_helper(struct device_node *np,
+					      const char *propname,
+					      const char **out_strs, size_t sz, int index);
 extern int of_device_is_compatible(const struct device_node *device,
 				   const char *);
 extern int of_device_is_available(const struct device_node *device);
@@ -439,15 +437,9 @@ static inline int of_property_read_string(struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline int of_property_read_string_index(struct device_node *np,
-						const char *propname, int index,
-						const char **out_string)
-{
-	return -ENOSYS;
-}
-
-static inline int of_property_count_strings(struct device_node *np,
-					    const char *propname)
+static inline int of_property_read_string_helper(struct device_node *np,
+						 const char *propname,
+						 const char **out_strs, size_t sz, int index)
 {
 	return -ENOSYS;
 }
@@ -522,6 +514,70 @@ static inline int of_node_to_nid(struct device_node *np)
 #define of_node_to_nid of_node_to_nid
 #endif
 
+/**
+ * of_property_read_string_array() - Read an array of strings from a multiple
+ * strings property.
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @out_strs:	output array of string pointers.
+ * @sz:		number of array elements to read.
+ *
+ * Search for a property in a device tree node and retrieve a list of
+ * terminated string values (pointer to data, not a copy) in that property.
+ *
+ * If @out_strs is NULL, the number of strings in the property is returned.
+ */
+static inline int of_property_read_string_array(struct device_node *np,
+						const char *propname, const char **out_strs,
+						size_t sz)
+{
+	return of_property_read_string_helper(np, propname, out_strs, sz, 0);
+}
+
+/**
+ * of_property_count_strings() - Find and return the number of strings from a
+ * multiple strings property.
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ *
+ * Search for a property in a device tree node and retrieve the number of null
+ * terminated string contain in it. Returns the number of strings on
+ * success, -EINVAL if the property does not exist, -ENODATA if property
+ * does not have a value, and -EILSEQ if the string is not null-terminated
+ * within the length of the property data.
+ */
+static inline int of_property_count_strings(struct device_node *np,
+					    const char *propname)
+{
+	return of_property_read_string_helper(np, propname, NULL, 0, 0);
+}
+
+/**
+ * of_property_read_string_index() - Find and read a string from a multiple
+ * strings property.
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @index:	index of the string in the list of strings
+ * @out_string:	pointer to null terminated return string, modified only if
+ *		return value is 0.
+ *
+ * Search for a property in a device tree node and retrieve a null
+ * terminated string value (pointer to data, not a copy) in the list of strings
+ * contained in that property.
+ * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if
+ * property does not have a value, and -EILSEQ if the string is not
+ * null-terminated within the length of the property data.
+ *
+ * The out_string pointer is modified only if a valid string can be decoded.
+ */
+static inline int of_property_read_string_index(struct device_node *np,
+						const char *propname,
+						int index, const char **output)
+{
+	int rc = of_property_read_string_helper(np, propname, output, 1, index);
+	return rc < 0 ? rc : 0;
+}
+
 /**
  * of_property_read_bool - Findfrom a property
  * @np:		device node from which the property value is to be read.

From 58f382ffaf2570032da636a348aa6c045a166f4e Mon Sep 17 00:00:00 2001
From: Chris Mason <clm@fb.com>
Date: Tue, 4 Nov 2014 06:59:04 -0800
Subject: [PATCH 1107/1185] Btrfs: fix kfree on list_head in
 btrfs_lookup_csums_range error cleanup

commit 6e5aafb27419f32575b27ef9d6a31e5d54661aca upstream.

If we hit any errors in btrfs_lookup_csums_range, we'll loop through all
the csums we allocate and free them.  But the code was using list_entry
incorrectly, and ended up trying to free the on-stack list_head instead.

This bug came from commit 0678b6185

btrfs: Don't BUG_ON kzalloc error in btrfs_lookup_csums_range()

Signed-off-by: Chris Mason <clm@fb.com>
Reported-by: Erik Berg <btrfs@slipsprogrammoer.no>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/file-item.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 3af77aa051d2..e4bcfec7787e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -403,7 +403,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 	ret = 0;
 fail:
 	while (ret < 0 && !list_empty(&tmplist)) {
-		sums = list_entry(&tmplist, struct btrfs_ordered_sum, list);
+		sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list);
 		list_del(&sums->list);
 		kfree(sums);
 	}

From f05c0daaf68e424d05d271c7fb2fbfd5750a315e Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@redhat.com>
Date: Fri, 10 Oct 2014 16:39:05 +0400
Subject: [PATCH 1108/1185] libceph: ceph-msgr workqueue needs a resque worker

commit f9865f06f7f18c6661c88d0511f05c48612319cc upstream.

Commit f363e45fd118 ("net/ceph: make ceph_msgr_wq non-reentrant")
effectively removed WQ_MEM_RECLAIM flag from ceph_msgr_wq.  This is
wrong - libceph is very much a memory reclaim path, so restore it.

Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
Tested-by: Micha Krause <micha@krausam.de>
Reviewed-by: Sage Weil <sage@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ceph/messenger.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 66e77f380fce..e3bea2e0821a 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -290,7 +290,8 @@ int ceph_msgr_init(void)
 	if (ceph_msgr_slab_init())
 		return -ENOMEM;
 
-	ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
+	ceph_msgr_wq = alloc_workqueue("ceph-msgr",
+				       WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
 	if (ceph_msgr_wq)
 		return 0;
 

From be70188832b22a8f1a49d0e3a3eb2209f9cfdc8a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 14 Nov 2014 08:48:23 -0800
Subject: [PATCH 1109/1185] Linux 3.10.60

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 7baf27f5cf0f..9d4f30d0d201 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 59
+SUBLEVEL = 60
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From 212df5dd3212092602119d03a2f5d3a27be77ce4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 17 Nov 2014 12:36:56 +0000
Subject: [PATCH 1110/1185] cpufreq: Don't iterate over all CPUs when
 suspending and resuming governors

The governors only expect to be stopped and started once so don't call
them repeatedly.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 54 +++++++++++++--------------------------
 1 file changed, 18 insertions(+), 36 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index d444bfe83f13..ae9d9dc6b79b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1293,7 +1293,6 @@ static struct subsys_interface cpufreq_interface = {
 void cpufreq_suspend(void)
 {
 	struct cpufreq_policy *policy;
-	int cpu;
 
 	if (!cpufreq_driver)
 		return;
@@ -1303,20 +1302,15 @@ void cpufreq_suspend(void)
 
 	pr_debug("%s: Suspending Governors\n", __func__);
 
-	for_each_possible_cpu(cpu) {
-		if (!cpu_online(cpu))
-			continue;
+	policy = cpufreq_cpu_get(0);
 
-		policy = cpufreq_cpu_get(cpu);
-
-		if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP))
-			pr_err("%s: Failed to stop governor for policy: %p\n",
-				__func__, policy);
-		else if (cpufreq_driver->suspend
-		    && cpufreq_driver->suspend(policy))
-			pr_err("%s: Failed to suspend driver: %p\n", __func__,
-				policy);
-	}
+	if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP))
+		pr_err("%s: Failed to stop governor for policy: %p\n",
+			__func__, policy);
+	else if (cpufreq_driver->suspend
+	    && cpufreq_driver->suspend(policy))
+		pr_err("%s: Failed to suspend driver: %p\n", __func__,
+			policy);
 
 	cpufreq_suspended = true;
 }
@@ -1330,7 +1324,6 @@ void cpufreq_suspend(void)
 void cpufreq_resume(void)
 {
 	struct cpufreq_policy *policy;
-	int cpu;
 
 	if (!cpufreq_driver)
 		return;
@@ -1342,29 +1335,18 @@ void cpufreq_resume(void)
 
 	cpufreq_suspended = false;
 
-	for_each_possible_cpu(cpu) {
-		if (!cpu_online(cpu))
-			continue;
+	policy = cpufreq_cpu_get(0);
 
-		policy = cpufreq_cpu_get(cpu);
+	if (__cpufreq_governor(policy, CPUFREQ_GOV_START)
+	    || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
+		pr_err("%s: Failed to start governor for policy: %p\n",
+			__func__, policy);
+	else if (cpufreq_driver->resume
+	    && cpufreq_driver->resume(policy))
+		pr_err("%s: Failed to resume driver: %p\n", __func__,
+			policy);
 
-		if (__cpufreq_governor(policy, CPUFREQ_GOV_START)
-		    || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
-			pr_err("%s: Failed to start governor for policy: %p\n",
-				__func__, policy);
-		else if (cpufreq_driver->resume
-		    && cpufreq_driver->resume(policy))
-			pr_err("%s: Failed to resume driver: %p\n", __func__,
-				policy);
-
-		/*
-		 * schedule call cpufreq_update_policy() for boot CPU, i.e. last
-		 * policy in list. It will verify that the current freq is in
-		 * sync with what we believe it to be.
-		 */
-		if (cpu == 0)
-			schedule_work(&policy->update);
-	}
+	schedule_work(&policy->update);
 }
 
 /**

From 2d2a8385287335071789c88e4df4a516091472d5 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 3 Nov 2014 09:19:27 +0100
Subject: [PATCH 1111/1185] ip6_tunnel: Use ip6_tnl_dev_init as the ndo_init
 function.

[ Upstream commit 6c6151daaf2d8dc2046d9926539feed5f66bf74e ]

ip6_tnl_dev_init() sets the dev->iflink via a call to
ip6_tnl_link_config(). After that, register_netdevice()
sets dev->iflink = -1. So we loose the iflink configuration
for ipv6 tunnels. Fix this by using ip6_tnl_dev_init() as the
ndo_init function. Then ip6_tnl_dev_init() is called after
dev->iflink is set to -1 from register_netdevice().

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_tunnel.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a0ecdf596f2f..14f46af17704 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -265,9 +265,6 @@ static int ip6_tnl_create2(struct net_device *dev)
 	int err;
 
 	t = netdev_priv(dev);
-	err = ip6_tnl_dev_init(dev);
-	if (err < 0)
-		goto out;
 
 	err = register_netdevice(dev);
 	if (err < 0)
@@ -1433,6 +1430,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
 
 
 static const struct net_device_ops ip6_tnl_netdev_ops = {
+	.ndo_init	= ip6_tnl_dev_init,
 	.ndo_uninit	= ip6_tnl_dev_uninit,
 	.ndo_start_xmit = ip6_tnl_xmit,
 	.ndo_do_ioctl	= ip6_tnl_ioctl,
@@ -1514,16 +1512,10 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct net *net = dev_net(dev);
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-	int err = ip6_tnl_dev_init_gen(dev);
-
-	if (err)
-		return err;
 
 	t->parms.proto = IPPROTO_IPV6;
 	dev_hold(dev);
 
-	ip6_tnl_link_config(t);
-
 	rcu_assign_pointer(ip6n->tnls_wc[0], t);
 	return 0;
 }

From 460ceaa05d35a85502f661a8754d9304c0b941b5 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 3 Nov 2014 09:19:30 +0100
Subject: [PATCH 1112/1185] gre6: Move the setting of dev->iflink into the
 ndo_init functions.

[ Upstream commit f03eb128e3f4276f46442d14f3b8f864f3775821 ]

Otherwise it gets overwritten by register_netdev().

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_gre.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 250a73e77f57..6c20f4731f1a 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -962,8 +962,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
 	else
 		dev->flags &= ~IFF_POINTOPOINT;
 
-	dev->iflink = p->link;
-
 	/* Precalculate GRE options length */
 	if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
 		if (t->parms.o_flags&GRE_CSUM)
@@ -1267,6 +1265,8 @@ static int ip6gre_tunnel_init(struct net_device *dev)
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	dev->iflink = tunnel->parms.link;
+
 	return 0;
 }
 
@@ -1282,7 +1282,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
 	dev_hold(dev);
 }
 
-
 static struct inet6_protocol ip6gre_protocol __read_mostly = {
 	.handler     = ip6gre_rcv,
 	.err_handler = ip6gre_err,
@@ -1458,6 +1457,8 @@ static int ip6gre_tap_init(struct net_device *dev)
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	dev->iflink = tunnel->parms.link;
+
 	return 0;
 }
 

From 7031dcb018db2a7776c1c31ef156cf8ac8da8a99 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 10 Nov 2014 17:54:26 +0100
Subject: [PATCH 1113/1185] net: sctp: fix NULL pointer dereference in
 af->from_addr_param on malformed packet

[ Upstream commit e40607cbe270a9e8360907cb1e62ddf0736e4864 ]

An SCTP server doing ASCONF will panic on malformed INIT ping-of-death
in the form of:

  ------------ INIT[PARAM: SET_PRIMARY_IP] ------------>

While the INIT chunk parameter verification dissects through many things
in order to detect malformed input, it misses to actually check parameters
inside of parameters. E.g. RFC5061, section 4.2.4 proposes a 'set primary
IP address' parameter in ASCONF, which has as a subparameter an address
parameter.

So an attacker may send a parameter type other than SCTP_PARAM_IPV4_ADDRESS
or SCTP_PARAM_IPV6_ADDRESS, param_type2af() will subsequently return 0
and thus sctp_get_af_specific() returns NULL, too, which we then happily
dereference unconditionally through af->from_addr_param().

The trace for the log:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000078
IP: [<ffffffffa01e9c62>] sctp_process_init+0x492/0x990 [sctp]
PGD 0
Oops: 0000 [#1] SMP
[...]
Pid: 0, comm: swapper Not tainted 2.6.32-504.el6.x86_64 #1 Bochs Bochs
RIP: 0010:[<ffffffffa01e9c62>]  [<ffffffffa01e9c62>] sctp_process_init+0x492/0x990 [sctp]
[...]
Call Trace:
 <IRQ>
 [<ffffffffa01f2add>] ? sctp_bind_addr_copy+0x5d/0xe0 [sctp]
 [<ffffffffa01e1fcb>] sctp_sf_do_5_1B_init+0x21b/0x340 [sctp]
 [<ffffffffa01e3751>] sctp_do_sm+0x71/0x1210 [sctp]
 [<ffffffffa01e5c09>] ? sctp_endpoint_lookup_assoc+0xc9/0xf0 [sctp]
 [<ffffffffa01e61f6>] sctp_endpoint_bh_rcv+0x116/0x230 [sctp]
 [<ffffffffa01ee986>] sctp_inq_push+0x56/0x80 [sctp]
 [<ffffffffa01fcc42>] sctp_rcv+0x982/0xa10 [sctp]
 [<ffffffffa01d5123>] ? ipt_local_in_hook+0x23/0x28 [iptable_filter]
 [<ffffffff8148bdc9>] ? nf_iterate+0x69/0xb0
 [<ffffffff81496d10>] ? ip_local_deliver_finish+0x0/0x2d0
 [<ffffffff8148bf86>] ? nf_hook_slow+0x76/0x120
 [<ffffffff81496d10>] ? ip_local_deliver_finish+0x0/0x2d0
[...]

A minimal way to address this is to check for NULL as we do on all
other such occasions where we know sctp_get_af_specific() could
possibly return with NULL.

Fixes: d6de3097592b ("[SCTP]: Add the handling of "Set Primary IP Address" parameter to INIT")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/sm_make_chunk.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 87e244be899a..6ca48b16f6bf 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2596,6 +2596,9 @@ static int sctp_process_param(struct sctp_association *asoc,
 		addr_param = param.v + sizeof(sctp_addip_param_t);
 
 		af = sctp_get_af_specific(param_type2af(param.p->type));
+		if (af == NULL)
+			break;
+
 		af->from_addr_param(&addr, addr_param,
 				    htons(asoc->peer.port), 0);
 

From e79c2487e4e01ccad077252c398627fd99f55924 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 10 Nov 2014 18:00:09 +0100
Subject: [PATCH 1114/1185] net: sctp: fix memory leak in auth key management

[ Upstream commit 4184b2a79a7612a9272ce20d639934584a1f3786 ]

A very minimal and simple user space application allocating an SCTP
socket, setting SCTP_AUTH_KEY setsockopt(2) on it and then closing
the socket again will leak the memory containing the authentication
key from user space:

unreferenced object 0xffff8800837047c0 (size 16):
  comm "a.out", pid 2789, jiffies 4296954322 (age 192.258s)
  hex dump (first 16 bytes):
    01 00 00 00 04 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff816d7e8e>] kmemleak_alloc+0x4e/0xb0
    [<ffffffff811c88d8>] __kmalloc+0xe8/0x270
    [<ffffffffa0870c23>] sctp_auth_create_key+0x23/0x50 [sctp]
    [<ffffffffa08718b1>] sctp_auth_set_key+0xa1/0x140 [sctp]
    [<ffffffffa086b383>] sctp_setsockopt+0xd03/0x1180 [sctp]
    [<ffffffff815bfd94>] sock_common_setsockopt+0x14/0x20
    [<ffffffff815beb61>] SyS_setsockopt+0x71/0xd0
    [<ffffffff816e58a9>] system_call_fastpath+0x12/0x17
    [<ffffffffffffffff>] 0xffffffffffffffff

This is bad because of two things, we can bring down a machine from
user space when auth_enable=1, but also we would leave security sensitive
keying material in memory without clearing it after use. The issue is
that sctp_auth_create_key() already sets the refcount to 1, but after
allocation sctp_auth_set_key() does an additional refcount on it, and
thus leaving it around when we free the socket.

Fixes: 65b07e5d0d0 ("[SCTP]: API updates to suport SCTP-AUTH extensions.")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/auth.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 7a19117254db..bc2fae7e67be 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -874,8 +874,6 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 		list_add(&cur_key->key_list, sh_keys);
 
 	cur_key->key = key;
-	sctp_auth_key_hold(key);
-
 	return 0;
 nomem:
 	if (!replace)

From e4da88a6dab26f68c5f0e493178f9ef2c23f0d08 Mon Sep 17 00:00:00 2001
From: Allen Pais <allen.pais@oracle.com>
Date: Fri, 19 Sep 2014 09:42:14 -0400
Subject: [PATCH 1115/1185] sunvdc: add cdrom and v1.1 protocol support

[ Upstream commit 9bce21828d54a95143f1b74619705c2dd8e88b92 ]

Interpret the media type from v1.1 protocol to support CDROM/DVD.

For v1.0 protocol, a disk's size continues to be calculated from the
geometry returned by the vdisk server. The geometry returned by the server
can be less than the actual number of sectors available in the backing
image/device due to the rounding in the division used to compute the
geometry in the vdisk server.

In v1.1 protocol a disk's actual size in sectors is returned during the
handshake. Use this size when v1.1 protocol is negotiated. Since this size
will always be larger than the former geometry computed size, disks created
under v1.0 will be forwards compatible to v1.1, but not vice versa.

Signed-off-by: Dwight Engen <dwight.engen@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/include/asm/vio.h |  12 +++-
 drivers/block/sunvdc.c       | 109 +++++++++++++++++++++++++++++------
 2 files changed, 101 insertions(+), 20 deletions(-)

diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index 432afa838861..a8210c5e5932 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -118,12 +118,18 @@ struct vio_disk_attr_info {
 	u8			vdisk_type;
 #define VD_DISK_TYPE_SLICE	0x01 /* Slice in block device	*/
 #define VD_DISK_TYPE_DISK	0x02 /* Entire block device	*/
-	u16			resv1;
+	u8			vdisk_mtype;		/* v1.1 */
+#define VD_MEDIA_TYPE_FIXED	0x01 /* Fixed device */
+#define VD_MEDIA_TYPE_CD	0x02 /* CD Device    */
+#define VD_MEDIA_TYPE_DVD	0x03 /* DVD Device   */
+	u8			resv1;
 	u32			vdisk_block_size;
 	u64			operations;
-	u64			vdisk_size;
+	u64			vdisk_size;		/* v1.1 */
 	u64			max_xfer_size;
-	u64			resv2[2];
+	u32			phys_block_size;	/* v1.2 */
+	u32			resv2;
+	u64			resv3[1];
 };
 
 struct vio_disk_desc {
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 5814deb6963d..66ddf704ad7f 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -9,6 +9,7 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/genhd.h>
+#include <linux/cdrom.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
@@ -22,8 +23,8 @@
 
 #define DRV_MODULE_NAME		"sunvdc"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"1.0"
-#define DRV_MODULE_RELDATE	"June 25, 2007"
+#define DRV_MODULE_VERSION	"1.1"
+#define DRV_MODULE_RELDATE	"February 13, 2013"
 
 static char version[] =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -65,6 +66,7 @@ struct vdc_port {
 	u64			operations;
 	u32			vdisk_size;
 	u8			vdisk_type;
+	u8			vdisk_mtype;
 
 	char			disk_name[32];
 
@@ -79,9 +81,16 @@ static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
 
 /* Ordered from largest major to lowest */
 static struct vio_version vdc_versions[] = {
+	{ .major = 1, .minor = 1 },
 	{ .major = 1, .minor = 0 },
 };
 
+static inline int vdc_version_supported(struct vdc_port *port,
+					u16 major, u16 minor)
+{
+	return port->vio.ver.major == major && port->vio.ver.minor >= minor;
+}
+
 #define VDCBLK_NAME	"vdisk"
 static int vdc_major;
 #define PARTITION_SHIFT	3
@@ -103,9 +112,41 @@ static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
+/* Add ioctl/CDROM_GET_CAPABILITY to support cdrom_id in udev
+ * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD.
+ * Needed to be able to install inside an ldom from an iso image.
+ */
+static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
+		     unsigned command, unsigned long argument)
+{
+	int i;
+	struct gendisk *disk;
+
+	switch (command) {
+	case CDROMMULTISESSION:
+		pr_debug(PFX "Multisession CDs not supported\n");
+		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
+			if (put_user(0, (char __user *)(argument + i)))
+				return -EFAULT;
+		return 0;
+
+	case CDROM_GET_CAPABILITY:
+		disk = bdev->bd_disk;
+
+		if (bdev->bd_disk && (disk->flags & GENHD_FL_CD))
+			return 0;
+		return -EINVAL;
+
+	default:
+		pr_debug(PFX "ioctl %08x not supported\n", command);
+		return -EINVAL;
+	}
+}
+
 static const struct block_device_operations vdc_fops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= vdc_getgeo,
+	.ioctl		= vdc_ioctl,
 };
 
 static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
@@ -165,9 +206,9 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
 	struct vio_disk_attr_info *pkt = arg;
 
 	viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] "
-	       "xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
+	       "mtype[0x%x] xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
 	       pkt->tag.stype, pkt->operations,
-	       pkt->vdisk_size, pkt->vdisk_type,
+	       pkt->vdisk_size, pkt->vdisk_type, pkt->vdisk_mtype,
 	       pkt->xfer_mode, pkt->vdisk_block_size,
 	       pkt->max_xfer_size);
 
@@ -192,8 +233,11 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
 		}
 
 		port->operations = pkt->operations;
-		port->vdisk_size = pkt->vdisk_size;
 		port->vdisk_type = pkt->vdisk_type;
+		if (vdc_version_supported(port, 1, 1)) {
+			port->vdisk_size = pkt->vdisk_size;
+			port->vdisk_mtype = pkt->vdisk_mtype;
+		}
 		if (pkt->max_xfer_size < port->max_xfer_size)
 			port->max_xfer_size = pkt->max_xfer_size;
 		port->vdisk_block_size = pkt->vdisk_block_size;
@@ -663,18 +707,25 @@ static int probe_disk(struct vdc_port *port)
 		return err;
 	}
 
-	err = generic_request(port, VD_OP_GET_DISKGEOM,
-			      &port->geom, sizeof(port->geom));
-	if (err < 0) {
-		printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
-		       "error %d\n", err);
-		return err;
+	if (vdc_version_supported(port, 1, 1)) {
+		/* vdisk_size should be set during the handshake, if it wasn't
+		 * then the underlying disk is reserved by another system
+		 */
+		if (port->vdisk_size == -1)
+			return -ENODEV;
+	} else {
+		err = generic_request(port, VD_OP_GET_DISKGEOM,
+				      &port->geom, sizeof(port->geom));
+		if (err < 0) {
+			printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
+			       "error %d\n", err);
+			return err;
+		}
+		port->vdisk_size = ((u64)port->geom.num_cyl *
+				    (u64)port->geom.num_hd *
+				    (u64)port->geom.num_sec);
 	}
 
-	port->vdisk_size = ((u64)port->geom.num_cyl *
-			    (u64)port->geom.num_hd *
-			    (u64)port->geom.num_sec);
-
 	q = blk_init_queue(do_vdc_request, &port->vio.lock);
 	if (!q) {
 		printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
@@ -704,9 +755,32 @@ static int probe_disk(struct vdc_port *port)
 
 	set_capacity(g, port->vdisk_size);
 
-	printk(KERN_INFO PFX "%s: %u sectors (%u MB)\n",
+	if (vdc_version_supported(port, 1, 1)) {
+		switch (port->vdisk_mtype) {
+		case VD_MEDIA_TYPE_CD:
+			pr_info(PFX "Virtual CDROM %s\n", port->disk_name);
+			g->flags |= GENHD_FL_CD;
+			g->flags |= GENHD_FL_REMOVABLE;
+			set_disk_ro(g, 1);
+			break;
+
+		case VD_MEDIA_TYPE_DVD:
+			pr_info(PFX "Virtual DVD %s\n", port->disk_name);
+			g->flags |= GENHD_FL_CD;
+			g->flags |= GENHD_FL_REMOVABLE;
+			set_disk_ro(g, 1);
+			break;
+
+		case VD_MEDIA_TYPE_FIXED:
+			pr_info(PFX "Virtual Hard disk %s\n", port->disk_name);
+			break;
+		}
+	}
+
+	pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n",
 	       g->disk_name,
-	       port->vdisk_size, (port->vdisk_size >> (20 - 9)));
+	       port->vdisk_size, (port->vdisk_size >> (20 - 9)),
+	       port->vio.ver.major, port->vio.ver.minor);
 
 	add_disk(g);
 
@@ -765,6 +839,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	else
 		snprintf(port->disk_name, sizeof(port->disk_name),
 			 VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26));
+	port->vdisk_size = -1;
 
 	err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
 			      vdc_versions, ARRAY_SIZE(vdc_versions),

From 9e23c21149bfdc57aa9d9a35821994dfb0253c40 Mon Sep 17 00:00:00 2001
From: Allen Pais <allen.pais@oracle.com>
Date: Fri, 19 Sep 2014 09:42:26 -0400
Subject: [PATCH 1116/1185] sunvdc: compute vdisk geometry from capacity

[ Upstream commit de5b73f08468b4fc5e2f6d1505f650262622f78b ]

The LDom diskserver doesn't return reliable geometry data. In addition,
the types for all fields in the vio_disk_geom are u16, which were being
truncated in the cast into the u8's of the Linux struct hd_geometry.

Modify vdc_getgeo() to compute the geometry from the disk's capacity in a
manner consistent with xen-blkfront::blkif_getgeo().

Signed-off-by: Dwight Engen <dwight.engen@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/sunvdc.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 66ddf704ad7f..1616ad091a5e 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -70,7 +70,6 @@ struct vdc_port {
 
 	char			disk_name[32];
 
-	struct vio_disk_geom	geom;
 	struct vio_disk_vtoc	label;
 };
 
@@ -103,11 +102,15 @@ static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
 static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
 	struct gendisk *disk = bdev->bd_disk;
-	struct vdc_port *port = disk->private_data;
+	sector_t nsect = get_capacity(disk);
+	sector_t cylinders = nsect;
 
-	geo->heads = (u8) port->geom.num_hd;
-	geo->sectors = (u8) port->geom.num_sec;
-	geo->cylinders = port->geom.num_cyl;
+	geo->heads = 0xff;
+	geo->sectors = 0x3f;
+	sector_div(cylinders, geo->heads * geo->sectors);
+	geo->cylinders = cylinders;
+	if ((sector_t)(geo->cylinders + 1) * geo->heads * geo->sectors < nsect)
+		geo->cylinders = 0xffff;
 
 	return 0;
 }
@@ -714,16 +717,18 @@ static int probe_disk(struct vdc_port *port)
 		if (port->vdisk_size == -1)
 			return -ENODEV;
 	} else {
+		struct vio_disk_geom geom;
+
 		err = generic_request(port, VD_OP_GET_DISKGEOM,
-				      &port->geom, sizeof(port->geom));
+				      &geom, sizeof(geom));
 		if (err < 0) {
 			printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
 			       "error %d\n", err);
 			return err;
 		}
-		port->vdisk_size = ((u64)port->geom.num_cyl *
-				    (u64)port->geom.num_hd *
-				    (u64)port->geom.num_sec);
+		port->vdisk_size = ((u64)geom.num_cyl *
+				    (u64)geom.num_hd *
+				    (u64)geom.num_sec);
 	}
 
 	q = blk_init_queue(do_vdc_request, &port->vio.lock);

From 5cf61378accedc6acd5923f3b00bc945be1a9d29 Mon Sep 17 00:00:00 2001
From: Dwight Engen <dwight.engen@oracle.com>
Date: Fri, 19 Sep 2014 09:42:53 -0400
Subject: [PATCH 1117/1185] sunvdc: limit each sg segment to a page

[ Upstream commit 5eed69ffd248c9f68f56c710caf07db134aef28b ]

ldc_map_sg() could fail its check that the number of pages referred to
by the sg scatterlist was <= the number of cookies.

This fixes the issue by doing a similar thing to the xen-blkfront driver,
ensuring that the scatterlist will only ever contain a segment count <=
port->ring_cookies, and each segment will be page aligned, and <= page
size. This ensures that the scatterlist is always mappable.

Orabug: 19347817
OraBZ: 15945

Signed-off-by: Dwight Engen <dwight.engen@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/sunvdc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 1616ad091a5e..1a9360da1f54 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -747,6 +747,10 @@ static int probe_disk(struct vdc_port *port)
 
 	port->disk = g;
 
+	/* Each segment in a request is up to an aligned page in size. */
+	blk_queue_segment_boundary(q, PAGE_SIZE - 1);
+	blk_queue_max_segment_size(q, PAGE_SIZE);
+
 	blk_queue_max_segments(q, port->ring_cookies);
 	blk_queue_max_hw_sectors(q, port->max_xfer_size);
 	g->major = vdc_major;

From 891b60578feb65bdc55b69948ce98fea7ca88b1f Mon Sep 17 00:00:00 2001
From: Dwight Engen <dwight.engen@oracle.com>
Date: Fri, 19 Sep 2014 09:43:02 -0400
Subject: [PATCH 1118/1185] vio: fix reuse of vio_dring slot

[ Upstream commit d0aedcd4f14a22e23b313f42b7e6e6ebfc0fbc31 ]

vio_dring_avail() will allow use of every dring entry, but when the last
entry is allocated then dr->prod == dr->cons which is indistinguishable from
the ring empty condition. This causes the next allocation to reuse an entry.
When this happens in sunvdc, the server side vds driver begins nack'ing the
messages and ends up resetting the ldc channel. This problem does not effect
sunvnet since it checks for < 2.

The fix here is to just never allocate the very last dring slot so that full
and empty are not the same condition. The request start path was changed to
check for the ring being full a bit earlier, and to stop the blk_queue if
there is no space left. The blk_queue will be restarted once the ring is
only half full again. The number of ring entries was increased to 512 which
matches the sunvnet and Solaris vdc drivers, and greatly reduces the
frequency of hitting the ring full condition and the associated blk_queue
stop/starting. The checks in sunvent were adjusted to account for
vio_dring_avail() returning 1 less.

Orabug: 19441666
OraBZ: 14983

Signed-off-by: Dwight Engen <dwight.engen@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/include/asm/vio.h       |  2 +-
 drivers/block/sunvdc.c             | 39 ++++++++++++++++++------------
 drivers/net/ethernet/sun/sunvnet.c |  4 +--
 3 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index a8210c5e5932..55841c184e6d 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -265,7 +265,7 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr,
 				  unsigned int ring_size)
 {
 	return (dr->pending -
-		((dr->prod - dr->cons) & (ring_size - 1)));
+		((dr->prod - dr->cons) & (ring_size - 1)) - 1);
 }
 
 #define VIO_MAX_TYPE_LEN	32
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 1a9360da1f54..756b8ec00f16 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -33,7 +33,7 @@ MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
 
-#define VDC_TX_RING_SIZE	256
+#define VDC_TX_RING_SIZE	512
 
 #define WAITING_FOR_LINK_UP	0x01
 #define WAITING_FOR_TX_SPACE	0x02
@@ -283,7 +283,9 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 
 	__blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
 
-	if (blk_queue_stopped(port->disk->queue))
+	/* restart blk queue when ring is half emptied */
+	if (blk_queue_stopped(port->disk->queue) &&
+	    vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50)
 		blk_start_queue(port->disk->queue);
 }
 
@@ -435,12 +437,6 @@ static int __send_request(struct request *req)
 	for (i = 0; i < nsg; i++)
 		len += sg[i].length;
 
-	if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
-		blk_stop_queue(port->disk->queue);
-		err = -ENOMEM;
-		goto out;
-	}
-
 	desc = vio_dring_cur(dr);
 
 	err = ldc_map_sg(port->vio.lp, sg, nsg,
@@ -480,21 +476,32 @@ static int __send_request(struct request *req)
 		port->req_id++;
 		dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1);
 	}
-out:
 
 	return err;
 }
 
-static void do_vdc_request(struct request_queue *q)
+static void do_vdc_request(struct request_queue *rq)
 {
-	while (1) {
-		struct request *req = blk_fetch_request(q);
+	struct request *req;
 
-		if (!req)
+	while ((req = blk_peek_request(rq)) != NULL) {
+		struct vdc_port *port;
+		struct vio_dring_state *dr;
+
+		port = req->rq_disk->private_data;
+		dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+		if (unlikely(vdc_tx_dring_avail(dr) < 1))
+			goto wait;
+
+		blk_start_request(req);
+
+		if (__send_request(req) < 0) {
+			blk_requeue_request(rq, req);
+wait:
+			/* Avoid pointless unplugs. */
+			blk_stop_queue(rq);
 			break;
-
-		if (__send_request(req) < 0)
-			__blk_end_request_all(req, -EIO);
+		}
 	}
 }
 
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 398faff8be7a..ade8bdfc03af 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -656,7 +656,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	spin_lock_irqsave(&port->vio.lock, flags);
 
 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
-	if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
+	if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
 		if (!netif_queue_stopped(dev)) {
 			netif_stop_queue(dev);
 
@@ -704,7 +704,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	dev->stats.tx_bytes += skb->len;
 
 	dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
-	if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
+	if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
 		netif_stop_queue(dev);
 		if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
 			netif_wake_queue(dev);

From df6329d2eb252866542b9efaef34b2e0d777e649 Mon Sep 17 00:00:00 2001
From: Dwight Engen <dwight.engen@oracle.com>
Date: Thu, 30 Oct 2014 15:55:35 -0400
Subject: [PATCH 1119/1185] sunvdc: don't call VD_OP_GET_VTOC

[ Upstream commit 85b0c6e62c48bb9179fd5b3e954f362fb346cbd5 ]

The VD_OP_GET_VTOC operation will succeed only if the vdisk backend has a
VTOC label, otherwise it will fail. In particular, it will return error
48 (ENOTSUP) if the disk has an EFI label. VTOC disk labels are already
handled by directly reading the disk in block/partitions/sun.c (enabled by
CONFIG_SUN_PARTITION which defaults to y on SPARC). Since port->label is
unused in the driver, remove the call and the field.

Signed-off-by: Dwight Engen <dwight.engen@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/sunvdc.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 756b8ec00f16..0ebadf93b6c5 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -69,8 +69,6 @@ struct vdc_port {
 	u8			vdisk_mtype;
 
 	char			disk_name[32];
-
-	struct vio_disk_vtoc	label;
 };
 
 static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
@@ -710,13 +708,6 @@ static int probe_disk(struct vdc_port *port)
 	if (comp.err)
 		return comp.err;
 
-	err = generic_request(port, VD_OP_GET_VTOC,
-			      &port->label, sizeof(port->label));
-	if (err < 0) {
-		printk(KERN_ERR PFX "VD_OP_GET_VTOC returns error %d\n", err);
-		return err;
-	}
-
 	if (vdc_version_supported(port, 1, 1)) {
 		/* vdisk_size should be set during the handshake, if it wasn't
 		 * then the underlying disk is reserved by another system

From 865db7fbbcf2651989b0134a6babb8b093b53a61 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 1 Nov 2014 00:33:58 -0400
Subject: [PATCH 1120/1185] sparc64: Fix crashes in schizo_pcierr_intr_other().

[ Upstream commit 7da89a2a3776442a57e918ca0b8678d1b16a7072 ]

Meelis Roos reports crashes during bootup on a V480 that look like
this:

====================
[   61.300577] PCI: Scanning PBM /pci@9,600000
[   61.304867] schizo f009b070: PCI host bridge to bus 0003:00
[   61.310385] pci_bus 0003:00: root bus resource [io  0x7ffe9000000-0x7ffe9ffffff] (bus address [0x0000-0xffffff])
[   61.320515] pci_bus 0003:00: root bus resource [mem 0x7fb00000000-0x7fbffffffff] (bus address [0x00000000-0xffffffff])
[   61.331173] pci_bus 0003:00: root bus resource [bus 00]
[   61.385344] Unable to handle kernel NULL pointer dereference
[   61.390970] tsk->{mm,active_mm}->context = 0000000000000000
[   61.396515] tsk->{mm,active_mm}->pgd = fff000b000002000
[   61.401716]               \|/ ____ \|/
[   61.401716]               "@'/ .. \`@"
[   61.401716]               /_| \__/ |_\
[   61.401716]                  \__U_/
[   61.416362] swapper/0(0): Oops [#1]
[   61.419837] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.18.0-rc1-00422-g2cc9188-dirty #24
[   61.427975] task: fff000b0fd8e9c40 ti: fff000b0fd928000 task.ti: fff000b0fd928000
[   61.435426] TSTATE: 0000004480e01602 TPC: 00000000004455e4 TNPC: 00000000004455e8 Y: 00000000    Not tainted
[   61.445230] TPC: <schizo_pcierr_intr+0x104/0x560>
[   61.449897] g0: 0000000000000000 g1: 0000000000000000 g2: 0000000000a10f78 g3: 000000000000000a
[   61.458563] g4: fff000b0fd8e9c40 g5: fff000b0fdd82000 g6: fff000b0fd928000 g7: 000000000000000a
[   61.467229] o0: 000000000000003d o1: 0000000000000000 o2: 0000000000000006 o3: fff000b0ffa5fc7e
[   61.475894] o4: 0000000000060000 o5: c000000000000000 sp: fff000b0ffa5f3c1 ret_pc: 00000000004455cc
[   61.484909] RPC: <schizo_pcierr_intr+0xec/0x560>
[   61.489500] l0: fff000b0fd8e9c40 l1: 0000000000a20800 l2: 0000000000000000 l3: 000000000119a430
[   61.498164] l4: 0000000001742400 l5: 00000000011cfbe0 l6: 00000000011319c0 l7: fff000b0fd8ea348
[   61.506830] i0: 0000000000000000 i1: fff000b0fdb34000 i2: 0000000320000000 i3: 0000000000000000
[   61.515497] i4: 00060002010b003f i5: 0000040004e02000 i6: fff000b0ffa5f481 i7: 00000000004a9920
[   61.524175] I7: <handle_irq_event_percpu+0x40/0x140>
[   61.529099] Call Trace:
[   61.531531]  [00000000004a9920] handle_irq_event_percpu+0x40/0x140
[   61.537681]  [00000000004a9a58] handle_irq_event+0x38/0x80
[   61.543145]  [00000000004ac77c] handle_fasteoi_irq+0xbc/0x200
[   61.548860]  [00000000004a9084] generic_handle_irq+0x24/0x40
[   61.554500]  [000000000042be0c] handler_irq+0xac/0x100
====================

The problem is that pbm->pci_bus->self is NULL.

This code is trying to go through the standard PCI config space
interfaces to read the PCI controller's PCI_STATUS register.

This doesn't work, because we more often than not do not enumerate
the PCI controller as a bonafide PCI device during the OF device
node scan.  Therefore bus->self remains NULL.

Existing common code for PSYCHO and PSYCHO-like PCI controllers
handles this properly, by doing the config space access directly.

Do the same here, pbm->pci_ops->{read,write}().

Reported-by: Meelis Roos <mroos@linux.ee>
Tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/kernel/pci_schizo.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index 8f76f23dac38..f9c6813c132d 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -581,7 +581,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm)
 {
 	unsigned long csr_reg, csr, csr_error_bits;
 	irqreturn_t ret = IRQ_NONE;
-	u16 stat;
+	u32 stat;
 
 	csr_reg = pbm->pbm_regs + SCHIZO_PCI_CTRL;
 	csr = upa_readq(csr_reg);
@@ -617,7 +617,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm)
 			       pbm->name);
 		ret = IRQ_HANDLED;
 	}
-	pci_read_config_word(pbm->pci_bus->self, PCI_STATUS, &stat);
+	pbm->pci_ops->read(pbm->pci_bus, 0, PCI_STATUS, 2, &stat);
 	if (stat & (PCI_STATUS_PARITY |
 		    PCI_STATUS_SIG_TARGET_ABORT |
 		    PCI_STATUS_REC_TARGET_ABORT |
@@ -625,7 +625,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm)
 		    PCI_STATUS_SIG_SYSTEM_ERROR)) {
 		printk("%s: PCI bus error, PCI_STATUS[%04x]\n",
 		       pbm->name, stat);
-		pci_write_config_word(pbm->pci_bus->self, PCI_STATUS, 0xffff);
+		pbm->pci_ops->write(pbm->pci_bus, 0, PCI_STATUS, 2, 0xffff);
 		ret = IRQ_HANDLED;
 	}
 	return ret;

From 134712463afbd65c6b11193cd8000c83fc5b3a1b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 7 Nov 2014 09:50:48 -0800
Subject: [PATCH 1121/1185] sparc64: Do irq_{enter,exit}() around
 generic_smp_call_function*().

[ Upstream commit ab5c780913bca0a5763ca05dd5c2cb5cb08ccb26 ]

Otherwise rcu_irq_{enter,exit}() do not happen and we get dumps like:

====================
[  188.275021] ===============================
[  188.309351] [ INFO: suspicious RCU usage. ]
[  188.343737] 3.18.0-rc3-00068-g20f3963-dirty #54 Not tainted
[  188.394786] -------------------------------
[  188.429170] include/linux/rcupdate.h:883 rcu_read_lock() used
illegally while idle!
[  188.505235]
other info that might help us debug this:

[  188.554230]
RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 0
[  188.637587] RCU used illegally from extended quiescent state!
[  188.690684] 3 locks held by swapper/7/0:
[  188.721932]  #0:  (&x->wait#11){......}, at: [<0000000000495de8>] complete+0x8/0x60
[  188.797994]  #1:  (&p->pi_lock){-.-.-.}, at: [<000000000048510c>] try_to_wake_up+0xc/0x400
[  188.881343]  #2:  (rcu_read_lock){......}, at: [<000000000048a910>] select_task_rq_fair+0x90/0xb40
[  188.973043]stack backtrace:
[  188.993879] CPU: 7 PID: 0 Comm: swapper/7 Not tainted 3.18.0-rc3-00068-g20f3963-dirty #54
[  189.076187] Call Trace:
[  189.089719]  [0000000000499360] lockdep_rcu_suspicious+0xe0/0x100
[  189.147035]  [000000000048a99c] select_task_rq_fair+0x11c/0xb40
[  189.202253]  [00000000004852d8] try_to_wake_up+0x1d8/0x400
[  189.252258]  [000000000048554c] default_wake_function+0xc/0x20
[  189.306435]  [0000000000495554] __wake_up_common+0x34/0x80
[  189.356448]  [00000000004955b4] __wake_up_locked+0x14/0x40
[  189.406456]  [0000000000495e08] complete+0x28/0x60
[  189.448142]  [0000000000636e28] blk_end_sync_rq+0x8/0x20
[  189.496057]  [0000000000639898] __blk_mq_end_request+0x18/0x60
[  189.550249]  [00000000006ee014] scsi_end_request+0x94/0x180
[  189.601286]  [00000000006ee334] scsi_io_completion+0x1d4/0x600
[  189.655463]  [00000000006e51c4] scsi_finish_command+0xc4/0xe0
[  189.708598]  [00000000006ed958] scsi_softirq_done+0x118/0x140
[  189.761735]  [00000000006398ec] __blk_mq_complete_request_remote+0xc/0x20
[  189.827383]  [00000000004c75d0] generic_smp_call_function_single_interrupt+0x150/0x1c0
[  189.906581]  [000000000043e514] smp_call_function_single_client+0x14/0x40
====================

Based almost entirely upon a patch by Paul E. McKenney.

Reported-by: Meelis Roos <mroos@linux.ee>
Tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/kernel/smp_64.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 8565ecd7d48a..173964d5e948 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -821,13 +821,17 @@ void arch_send_call_function_single_ipi(int cpu)
 void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
 {
 	clear_softint(1 << irq);
+	irq_enter();
 	generic_smp_call_function_interrupt();
+	irq_exit();
 }
 
 void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs)
 {
 	clear_softint(1 << irq);
+	irq_enter();
 	generic_smp_call_function_single_interrupt();
+	irq_exit();
 }
 
 static void tsb_sync(void *info)

From 96920746658f98dbdaccc90ab818443471accc89 Mon Sep 17 00:00:00 2001
From: Andreas Larsson <andreas@gaisler.com>
Date: Wed, 5 Nov 2014 15:52:08 +0100
Subject: [PATCH 1122/1185] sparc32: Implement xchg and atomic_xchg using
 ATOMIC_HASH locks

[ Upstream commit 1a17fdc4f4ed06b63fac1937470378a5441a663a ]

Atomicity between xchg and cmpxchg cannot be guaranteed when xchg is
implemented with a swap and cmpxchg is implemented with locks.
Without this, e.g. mcs_spin_lock and mcs_spin_unlock are broken.

Signed-off-by: Andreas Larsson <andreas@gaisler.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/include/asm/atomic_32.h  |  2 +-
 arch/sparc/include/asm/cmpxchg_32.h | 12 ++----------
 arch/sparc/lib/atomic32.c           | 27 +++++++++++++++++++++++++++
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 905832aa9e9e..a0ed182ae73c 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -21,7 +21,7 @@
 
 extern int __atomic_add_return(int, atomic_t *);
 extern int atomic_cmpxchg(atomic_t *, int, int);
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+extern int atomic_xchg(atomic_t *, int);
 extern int __atomic_add_unless(atomic_t *, int, int);
 extern void atomic_set(atomic_t *, int);
 
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
index 1fae1a02e3c2..ae0f9a7a314d 100644
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -11,22 +11,14 @@
 #ifndef __ARCH_SPARC_CMPXCHG__
 #define __ARCH_SPARC_CMPXCHG__
 
-static inline unsigned long xchg_u32(__volatile__ unsigned long *m, unsigned long val)
-{
-	__asm__ __volatile__("swap [%2], %0"
-			     : "=&r" (val)
-			     : "0" (val), "r" (m)
-			     : "memory");
-	return val;
-}
-
+extern unsigned long __xchg_u32(volatile u32 *m, u32 new);
 extern void __xchg_called_with_bad_pointer(void);
 
 static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int size)
 {
 	switch (size) {
 	case 4:
-		return xchg_u32(ptr, x);
+		return __xchg_u32(ptr, x);
 	}
 	__xchg_called_with_bad_pointer();
 	return x;
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 1d32b54089aa..8f2f94d53434 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -40,6 +40,19 @@ int __atomic_add_return(int i, atomic_t *v)
 }
 EXPORT_SYMBOL(__atomic_add_return);
 
+int atomic_xchg(atomic_t *v, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);
+	ret = v->counter;
+	v->counter = new;
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
+	return ret;
+}
+EXPORT_SYMBOL(atomic_xchg);
+
 int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
@@ -132,3 +145,17 @@ unsigned long __cmpxchg_u32(volatile u32 *ptr, u32 old, u32 new)
 	return (unsigned long)prev;
 }
 EXPORT_SYMBOL(__cmpxchg_u32);
+
+unsigned long __xchg_u32(volatile u32 *ptr, u32 new)
+{
+	unsigned long flags;
+	u32 prev;
+
+	spin_lock_irqsave(ATOMIC_HASH(ptr), flags);
+	prev = *ptr;
+	*ptr = new;
+	spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags);
+
+	return (unsigned long)prev;
+}
+EXPORT_SYMBOL(__xchg_u32);

From 89b27dc7ce6465ca4cec9603b7d5fcdc678f30f9 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Fri, 5 Sep 2014 15:13:52 -0700
Subject: [PATCH 1123/1185] x86, x32, audit: Fix x32's AUDIT_ARCH wrt audit

commit 81f49a8fd7088cfcb588d182eeede862c0e3303e upstream.

is_compat_task() is the wrong check for audit arch; the check should
be is_ia32_task(): x32 syscalls should be AUDIT_ARCH_X86_64, not
AUDIT_ARCH_I386.

CONFIG_AUDITSYSCALL is currently incompatible with x32, so this has
no visible effect.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/a0138ed8c709882aec06e4acc30bfa9b623b8717.1409954077.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/ptrace.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 29a8120e6fe8..baa61e7370b7 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1475,15 +1475,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
 	force_sig_info(SIGTRAP, &info, tsk);
 }
 
-
-#ifdef CONFIG_X86_32
-# define IS_IA32	1
-#elif defined CONFIG_IA32_EMULATION
-# define IS_IA32	is_compat_task()
-#else
-# define IS_IA32	0
-#endif
-
 /*
  * We must return the syscall number to actually look up in the table.
  * This can be -1L to skip running any syscall at all.
@@ -1521,7 +1512,7 @@ long syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->orig_ax);
 
-	if (IS_IA32)
+	if (is_ia32_task())
 		audit_syscall_entry(AUDIT_ARCH_I386,
 				    regs->orig_ax,
 				    regs->bx, regs->cx,

From bd501a2eb28282b657555b32acbc65b6c102af1d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 4 Nov 2014 11:27:12 +0100
Subject: [PATCH 1124/1185] audit: keep inode pinned

commit 799b601451b21ebe7af0e6e8f6e2ccd4683c5064 upstream.

Audit rules disappear when an inode they watch is evicted from the cache.
This is likely not what we want.

The guilty commit is "fsnotify: allow marks to not pin inodes in core",
which didn't take into account that audit_tree adds watches with a zero
mask.

Adding any mask should fix this.

Fixes: 90b1e7a57880 ("fsnotify: allow marks to not pin inodes in core")
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Paul Moore <pmoore@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/audit_tree.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 43c307dc9453..00c4459f76df 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -154,6 +154,7 @@ static struct audit_chunk *alloc_chunk(int count)
 		chunk->owners[i].index = i;
 	}
 	fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch);
+	chunk->mark.mask = FS_IN_IGNORED;
 	return chunk;
 }
 

From 4886eb4a453cb51598a9e3796c095e94dd552e5b Mon Sep 17 00:00:00 2001
From: James Ralston <james.d.ralston@intel.com>
Date: Mon, 13 Oct 2014 15:16:38 -0700
Subject: [PATCH 1125/1185] ahci: Add Device IDs for Intel Sunrise Point PCH

commit 690000b930456a98663567d35dd5c54b688d1e3f upstream.

This patch adds the AHCI-mode SATA Device IDs for the Intel Sunrise Point PCH.

Signed-off-by: James Ralston <james.d.ralston@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/ahci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index c3f09505f795..1a81eb26a07f 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -312,6 +312,11 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */
 	{ PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */
 	{ PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */
+	{ PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */
+	{ PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H RAID */
+	{ PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */
+	{ PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */
+	{ PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
 
 	/* JMicron 360/1/3/5/6, match class to avoid IDE function */
 	{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,

From 9921a2d59ae1ad736ef4e6abf2a67d961e74a95c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 27 Oct 2014 10:22:56 -0400
Subject: [PATCH 1126/1185] ahci: disable MSI instead of NCQ on Samsung pci-e
 SSDs on macbooks

commit 66a7cbc303f4d28f201529b06061944d51ab530c upstream.

Samsung pci-e SSDs on macbooks failed miserably on NCQ commands, so
67809f85d31e ("ahci: disable NCQ on Samsung pci-e SSDs on macbooks")
disabled NCQ on them.  It turns out that NCQ is fine as long as MSI is
not used, so let's turn off MSI and leave NCQ on.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=60731
Tested-by: <dorin@i51.org>
Tested-by: Imre Kaloz <kaloz@openwrt.org>
Fixes: 67809f85d31e ("ahci: disable NCQ on Samsung pci-e SSDs on macbooks")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/ahci.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 1a81eb26a07f..64150a9ffff3 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -61,6 +61,7 @@ enum board_ids {
 	/* board IDs by feature in alphabetical order */
 	board_ahci,
 	board_ahci_ign_iferr,
+	board_ahci_nomsi,
 	board_ahci_noncq,
 	board_ahci_nosntf,
 	board_ahci_yes_fbs,
@@ -120,6 +121,13 @@ static const struct ata_port_info ahci_port_info[] = {
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
 	},
+	[board_ahci_nomsi] = {
+		AHCI_HFLAGS	(AHCI_HFLAG_NO_MSI),
+		.flags		= AHCI_FLAG_COMMON,
+		.pio_mask	= ATA_PIO4,
+		.udma_mask	= ATA_UDMA6,
+		.port_ops	= &ahci_ops,
+	},
 	[board_ahci_noncq] = {
 		AHCI_HFLAGS	(AHCI_HFLAG_NO_NCQ),
 		.flags		= AHCI_FLAG_COMMON,
@@ -479,10 +487,10 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci },	/* ASM1062 */
 
 	/*
-	 * Samsung SSDs found on some macbooks.  NCQ times out.
-	 * https://bugzilla.kernel.org/show_bug.cgi?id=60731
+	 * Samsung SSDs found on some macbooks.  NCQ times out if MSI is
+	 * enabled.  https://bugzilla.kernel.org/show_bug.cgi?id=60731
 	 */
-	{ PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_noncq },
+	{ PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_nomsi },
 
 	/* Enmotus */
 	{ PCI_DEVICE(0x1c44, 0x8000), board_ahci },

From 9a262b4b21aaebe6323b2b8362465ca796f12768 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 11 Nov 2014 15:45:57 +0100
Subject: [PATCH 1127/1185] ALSA: usb-audio: Fix memory leak in FTU quirk

commit 1a290581ded60e87276741f8ca97b161d2b226fc upstream.

M-audio FastTrack Ultra quirk doesn't release the kzalloc'ed memory.
This patch adds the private_free callback to release it properly.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/mixer_quirks.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index ebe91440a068..c89a5bf5c00e 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -799,6 +799,11 @@ static int snd_ftu_eff_switch_put(struct snd_kcontrol *kctl,
 	return changed;
 }
 
+static void kctl_private_value_free(struct snd_kcontrol *kctl)
+{
+	kfree((void *)kctl->private_value);
+}
+
 static int snd_ftu_create_effect_switch(struct usb_mixer_interface *mixer,
 	int validx, int bUnitID)
 {
@@ -833,6 +838,7 @@ static int snd_ftu_create_effect_switch(struct usb_mixer_interface *mixer,
 		return -ENOMEM;
 	}
 
+	kctl->private_free = kctl_private_value_free;
 	err = snd_ctl_add(mixer->chip->card, kctl);
 	if (err < 0)
 		return err;

From 86800fc632fd29ff30d557fcf1fbd76d2416c483 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Mon, 6 Oct 2014 21:01:17 +0400
Subject: [PATCH 1128/1185] xtensa: re-wire umount syscall to sys_oldumount

commit 2651cc6974d47fc43bef1cd8cd26966e4f5ba306 upstream.

Userspace actually passes single parameter (path name) to the umount
syscall, so new umount just fails. Fix it by requesting old umount
syscall implementation and re-wiring umount to it.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/include/uapi/asm/unistd.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index 51940fec6990..513effd48060 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -384,7 +384,8 @@ __SYSCALL(174, sys_chroot, 1)
 #define __NR_pivot_root 			175
 __SYSCALL(175, sys_pivot_root, 2)
 #define __NR_umount 				176
-__SYSCALL(176, sys_umount, 2)
+__SYSCALL(176, sys_oldumount, 1)
+#define __ARCH_WANT_SYS_OLDUMOUNT
 #define __NR_swapoff 				177
 __SYSCALL(177, sys_swapoff, 1)
 #define __NR_sync 				178

From 8169b2b999c9e0e196fdf8e96668f535ae648e5f Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@redhat.com>
Date: Thu, 23 Oct 2014 00:25:22 +0400
Subject: [PATCH 1129/1185] libceph: do not crash on large auth tickets

commit aaef31703a0cf6a733e651885bfb49edc3ac6774 upstream.

Large (greater than 32k, the value of PAGE_ALLOC_COSTLY_ORDER) auth
tickets will have their buffers vmalloc'ed, which leads to the
following crash in crypto:

[   28.685082] BUG: unable to handle kernel paging request at ffffeb04000032c0
[   28.686032] IP: [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
[   28.686032] PGD 0
[   28.688088] Oops: 0000 [#1] PREEMPT SMP
[   28.688088] Modules linked in:
[   28.688088] CPU: 0 PID: 878 Comm: kworker/0:2 Not tainted 3.17.0-vm+ #305
[   28.688088] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[   28.688088] Workqueue: ceph-msgr con_work
[   28.688088] task: ffff88011a7f9030 ti: ffff8800d903c000 task.ti: ffff8800d903c000
[   28.688088] RIP: 0010:[<ffffffff81392b42>]  [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
[   28.688088] RSP: 0018:ffff8800d903f688  EFLAGS: 00010286
[   28.688088] RAX: ffffeb04000032c0 RBX: ffff8800d903f718 RCX: ffffeb04000032c0
[   28.688088] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8800d903f750
[   28.688088] RBP: ffff8800d903f688 R08: 00000000000007de R09: ffff8800d903f880
[   28.688088] R10: 18df467c72d6257b R11: 0000000000000000 R12: 0000000000000010
[   28.688088] R13: ffff8800d903f750 R14: ffff8800d903f8a0 R15: 0000000000000000
[   28.688088] FS:  00007f50a41c7700(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000
[   28.688088] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[   28.688088] CR2: ffffeb04000032c0 CR3: 00000000da3f3000 CR4: 00000000000006b0
[   28.688088] Stack:
[   28.688088]  ffff8800d903f698 ffffffff81392ca8 ffff8800d903f6e8 ffffffff81395d32
[   28.688088]  ffff8800dac96000 ffff880000000000 ffff8800d903f980 ffff880119b7e020
[   28.688088]  ffff880119b7e010 0000000000000000 0000000000000010 0000000000000010
[   28.688088] Call Trace:
[   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
[   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
[   28.688088]  [<ffffffff81395d32>] blkcipher_walk_done+0x182/0x220
[   28.688088]  [<ffffffff813990bf>] crypto_cbc_encrypt+0x15f/0x180
[   28.688088]  [<ffffffff81399780>] ? crypto_aes_set_key+0x30/0x30
[   28.688088]  [<ffffffff8156c40c>] ceph_aes_encrypt2+0x29c/0x2e0
[   28.688088]  [<ffffffff8156d2a3>] ceph_encrypt2+0x93/0xb0
[   28.688088]  [<ffffffff8156d7da>] ceph_x_encrypt+0x4a/0x60
[   28.688088]  [<ffffffff8155b39d>] ? ceph_buffer_new+0x5d/0xf0
[   28.688088]  [<ffffffff8156e837>] ceph_x_build_authorizer.isra.6+0x297/0x360
[   28.688088]  [<ffffffff8112089b>] ? kmem_cache_alloc_trace+0x11b/0x1c0
[   28.688088]  [<ffffffff8156b496>] ? ceph_auth_create_authorizer+0x36/0x80
[   28.688088]  [<ffffffff8156ed83>] ceph_x_create_authorizer+0x63/0xd0
[   28.688088]  [<ffffffff8156b4b4>] ceph_auth_create_authorizer+0x54/0x80
[   28.688088]  [<ffffffff8155f7c0>] get_authorizer+0x80/0xd0
[   28.688088]  [<ffffffff81555a8b>] prepare_write_connect+0x18b/0x2b0
[   28.688088]  [<ffffffff81559289>] try_read+0x1e59/0x1f10

This is because we set up crypto scatterlists as if all buffers were
kmalloc'ed.  Fix it.

Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ceph/crypto.c | 169 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 132 insertions(+), 37 deletions(-)

diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 6e7a236525b6..06f19b9e159a 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -89,11 +89,82 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
 
 static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
 
+/*
+ * Should be used for buffers allocated with ceph_kvmalloc().
+ * Currently these are encrypt out-buffer (ceph_buffer) and decrypt
+ * in-buffer (msg front).
+ *
+ * Dispose of @sgt with teardown_sgtable().
+ *
+ * @prealloc_sg is to avoid memory allocation inside sg_alloc_table()
+ * in cases where a single sg is sufficient.  No attempt to reduce the
+ * number of sgs by squeezing physically contiguous pages together is
+ * made though, for simplicity.
+ */
+static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg,
+			 const void *buf, unsigned int buf_len)
+{
+	struct scatterlist *sg;
+	const bool is_vmalloc = is_vmalloc_addr(buf);
+	unsigned int off = offset_in_page(buf);
+	unsigned int chunk_cnt = 1;
+	unsigned int chunk_len = PAGE_ALIGN(off + buf_len);
+	int i;
+	int ret;
+
+	if (buf_len == 0) {
+		memset(sgt, 0, sizeof(*sgt));
+		return -EINVAL;
+	}
+
+	if (is_vmalloc) {
+		chunk_cnt = chunk_len >> PAGE_SHIFT;
+		chunk_len = PAGE_SIZE;
+	}
+
+	if (chunk_cnt > 1) {
+		ret = sg_alloc_table(sgt, chunk_cnt, GFP_NOFS);
+		if (ret)
+			return ret;
+	} else {
+		WARN_ON(chunk_cnt != 1);
+		sg_init_table(prealloc_sg, 1);
+		sgt->sgl = prealloc_sg;
+		sgt->nents = sgt->orig_nents = 1;
+	}
+
+	for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) {
+		struct page *page;
+		unsigned int len = min(chunk_len - off, buf_len);
+
+		if (is_vmalloc)
+			page = vmalloc_to_page(buf);
+		else
+			page = virt_to_page(buf);
+
+		sg_set_page(sg, page, len, off);
+
+		off = 0;
+		buf += len;
+		buf_len -= len;
+	}
+	WARN_ON(buf_len != 0);
+
+	return 0;
+}
+
+static void teardown_sgtable(struct sg_table *sgt)
+{
+	if (sgt->orig_nents > 1)
+		sg_free_table(sgt);
+}
+
 static int ceph_aes_encrypt(const void *key, int key_len,
 			    void *dst, size_t *dst_len,
 			    const void *src, size_t src_len)
 {
-	struct scatterlist sg_in[2], sg_out[1];
+	struct scatterlist sg_in[2], prealloc_sg;
+	struct sg_table sg_out;
 	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
 	struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
 	int ret;
@@ -109,16 +180,18 @@ static int ceph_aes_encrypt(const void *key, int key_len,
 
 	*dst_len = src_len + zero_padding;
 
-	crypto_blkcipher_setkey((void *)tfm, key, key_len);
 	sg_init_table(sg_in, 2);
 	sg_set_buf(&sg_in[0], src, src_len);
 	sg_set_buf(&sg_in[1], pad, zero_padding);
-	sg_init_table(sg_out, 1);
-	sg_set_buf(sg_out, dst, *dst_len);
+	ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len);
+	if (ret)
+		goto out_tfm;
+
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
 	iv = crypto_blkcipher_crt(tfm)->iv;
 	ivsize = crypto_blkcipher_ivsize(tfm);
-
 	memcpy(iv, aes_iv, ivsize);
+
 	/*
 	print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
 		       key, key_len, 1);
@@ -127,16 +200,22 @@ static int ceph_aes_encrypt(const void *key, int key_len,
 	print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
 			pad, zero_padding, 1);
 	*/
-	ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+	ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in,
 				     src_len + zero_padding);
-	crypto_free_blkcipher(tfm);
-	if (ret < 0)
+	if (ret < 0) {
 		pr_err("ceph_aes_crypt failed %d\n", ret);
+		goto out_sg;
+	}
 	/*
 	print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
 		       dst, *dst_len, 1);
 	*/
-	return 0;
+
+out_sg:
+	teardown_sgtable(&sg_out);
+out_tfm:
+	crypto_free_blkcipher(tfm);
+	return ret;
 }
 
 static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
@@ -144,7 +223,8 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
 			     const void *src1, size_t src1_len,
 			     const void *src2, size_t src2_len)
 {
-	struct scatterlist sg_in[3], sg_out[1];
+	struct scatterlist sg_in[3], prealloc_sg;
+	struct sg_table sg_out;
 	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
 	struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
 	int ret;
@@ -160,17 +240,19 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
 
 	*dst_len = src1_len + src2_len + zero_padding;
 
-	crypto_blkcipher_setkey((void *)tfm, key, key_len);
 	sg_init_table(sg_in, 3);
 	sg_set_buf(&sg_in[0], src1, src1_len);
 	sg_set_buf(&sg_in[1], src2, src2_len);
 	sg_set_buf(&sg_in[2], pad, zero_padding);
-	sg_init_table(sg_out, 1);
-	sg_set_buf(sg_out, dst, *dst_len);
+	ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len);
+	if (ret)
+		goto out_tfm;
+
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
 	iv = crypto_blkcipher_crt(tfm)->iv;
 	ivsize = crypto_blkcipher_ivsize(tfm);
-
 	memcpy(iv, aes_iv, ivsize);
+
 	/*
 	print_hex_dump(KERN_ERR, "enc  key: ", DUMP_PREFIX_NONE, 16, 1,
 		       key, key_len, 1);
@@ -181,23 +263,30 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
 	print_hex_dump(KERN_ERR, "enc  pad: ", DUMP_PREFIX_NONE, 16, 1,
 			pad, zero_padding, 1);
 	*/
-	ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+	ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in,
 				     src1_len + src2_len + zero_padding);
-	crypto_free_blkcipher(tfm);
-	if (ret < 0)
+	if (ret < 0) {
 		pr_err("ceph_aes_crypt2 failed %d\n", ret);
+		goto out_sg;
+	}
 	/*
 	print_hex_dump(KERN_ERR, "enc  out: ", DUMP_PREFIX_NONE, 16, 1,
 		       dst, *dst_len, 1);
 	*/
-	return 0;
+
+out_sg:
+	teardown_sgtable(&sg_out);
+out_tfm:
+	crypto_free_blkcipher(tfm);
+	return ret;
 }
 
 static int ceph_aes_decrypt(const void *key, int key_len,
 			    void *dst, size_t *dst_len,
 			    const void *src, size_t src_len)
 {
-	struct scatterlist sg_in[1], sg_out[2];
+	struct sg_table sg_in;
+	struct scatterlist sg_out[2], prealloc_sg;
 	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
 	struct blkcipher_desc desc = { .tfm = tfm };
 	char pad[16];
@@ -209,16 +298,16 @@ static int ceph_aes_decrypt(const void *key, int key_len,
 	if (IS_ERR(tfm))
 		return PTR_ERR(tfm);
 
-	crypto_blkcipher_setkey((void *)tfm, key, key_len);
-	sg_init_table(sg_in, 1);
 	sg_init_table(sg_out, 2);
-	sg_set_buf(sg_in, src, src_len);
 	sg_set_buf(&sg_out[0], dst, *dst_len);
 	sg_set_buf(&sg_out[1], pad, sizeof(pad));
+	ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len);
+	if (ret)
+		goto out_tfm;
 
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
 	iv = crypto_blkcipher_crt(tfm)->iv;
 	ivsize = crypto_blkcipher_ivsize(tfm);
-
 	memcpy(iv, aes_iv, ivsize);
 
 	/*
@@ -227,12 +316,10 @@ static int ceph_aes_decrypt(const void *key, int key_len,
 	print_hex_dump(KERN_ERR, "dec  in: ", DUMP_PREFIX_NONE, 16, 1,
 		       src, src_len, 1);
 	*/
-
-	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
-	crypto_free_blkcipher(tfm);
+	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len);
 	if (ret < 0) {
 		pr_err("ceph_aes_decrypt failed %d\n", ret);
-		return ret;
+		goto out_sg;
 	}
 
 	if (src_len <= *dst_len)
@@ -250,7 +337,12 @@ static int ceph_aes_decrypt(const void *key, int key_len,
 	print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1,
 		       dst, *dst_len, 1);
 	*/
-	return 0;
+
+out_sg:
+	teardown_sgtable(&sg_in);
+out_tfm:
+	crypto_free_blkcipher(tfm);
+	return ret;
 }
 
 static int ceph_aes_decrypt2(const void *key, int key_len,
@@ -258,7 +350,8 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
 			     void *dst2, size_t *dst2_len,
 			     const void *src, size_t src_len)
 {
-	struct scatterlist sg_in[1], sg_out[3];
+	struct sg_table sg_in;
+	struct scatterlist sg_out[3], prealloc_sg;
 	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
 	struct blkcipher_desc desc = { .tfm = tfm };
 	char pad[16];
@@ -270,17 +363,17 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
 	if (IS_ERR(tfm))
 		return PTR_ERR(tfm);
 
-	sg_init_table(sg_in, 1);
-	sg_set_buf(sg_in, src, src_len);
 	sg_init_table(sg_out, 3);
 	sg_set_buf(&sg_out[0], dst1, *dst1_len);
 	sg_set_buf(&sg_out[1], dst2, *dst2_len);
 	sg_set_buf(&sg_out[2], pad, sizeof(pad));
+	ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len);
+	if (ret)
+		goto out_tfm;
 
 	crypto_blkcipher_setkey((void *)tfm, key, key_len);
 	iv = crypto_blkcipher_crt(tfm)->iv;
 	ivsize = crypto_blkcipher_ivsize(tfm);
-
 	memcpy(iv, aes_iv, ivsize);
 
 	/*
@@ -289,12 +382,10 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
 	print_hex_dump(KERN_ERR, "dec   in: ", DUMP_PREFIX_NONE, 16, 1,
 		       src, src_len, 1);
 	*/
-
-	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
-	crypto_free_blkcipher(tfm);
+	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len);
 	if (ret < 0) {
 		pr_err("ceph_aes_decrypt failed %d\n", ret);
-		return ret;
+		goto out_sg;
 	}
 
 	if (src_len <= *dst1_len)
@@ -324,7 +415,11 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
 		       dst2, *dst2_len, 1);
 	*/
 
-	return 0;
+out_sg:
+	teardown_sgtable(&sg_in);
+out_tfm:
+	crypto_free_blkcipher(tfm);
+	return ret;
 }
 
 
From 597b38963392a700288552da78db8f4f3d088d21 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 23 Sep 2014 23:02:41 +0300
Subject: [PATCH 1130/1185] iwlwifi: configure the LTR

commit 9180ac50716a097a407c6d7e7e4589754a922260 upstream.

The LTR is the handshake between the device and the root
complex about the latency allowed when the bus exits power
save. This configuration was missing and this led to high
latency in the link power up. The end user could experience
high latency in the network because of this.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/iwl-trans.h      |  2 ++
 .../net/wireless/iwlwifi/mvm/fw-api-power.h   | 35 ++++++++++++++++++-
 drivers/net/wireless/iwlwifi/mvm/fw-api.h     |  1 +
 drivers/net/wireless/iwlwifi/mvm/fw.c         |  9 +++++
 drivers/net/wireless/iwlwifi/mvm/ops.c        |  1 +
 drivers/net/wireless/iwlwifi/pcie/trans.c     | 17 +++++----
 6 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h
index 72d2ecce0b8d..d8df1d9b0de3 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/iwlwifi/iwl-trans.h
@@ -489,6 +489,7 @@ enum iwl_trans_state {
  *	Set during transport allocation.
  * @hw_id_str: a string with info about HW ID. Set during transport allocation.
  * @pm_support: set to true in start_hw if link pm is supported
+ * @ltr_enabled: set to true if the LTR is enabled
  * @dev_cmd_pool: pool for Tx cmd allocation - for internal use only.
  *	The user should use iwl_trans_{alloc,free}_tx_cmd.
  * @dev_cmd_headroom: room needed for the transport's private use before the
@@ -513,6 +514,7 @@ struct iwl_trans {
 	u8 rx_mpdu_cmd, rx_mpdu_cmd_hdr_size;
 
 	bool pm_support;
+	bool ltr_enabled;
 
 	/* The following fields are internal only */
 	struct kmem_cache *dev_cmd_pool;
diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h
index 81fe45f46be7..ac38ecf13c18 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h
@@ -67,7 +67,40 @@
 /* Power Management Commands, Responses, Notifications */
 
 /**
- * enum iwl_scan_flags - masks for power table command flags
+ * enum iwl_ltr_config_flags - masks for LTR config command flags
+ * @LTR_CFG_FLAG_FEATURE_ENABLE: Feature operational status
+ * @LTR_CFG_FLAG_HW_DIS_ON_SHADOW_REG_ACCESS: allow LTR change on shadow
+ *	memory access
+ * @LTR_CFG_FLAG_HW_EN_SHRT_WR_THROUGH: allow LTR msg send on ANY LTR
+ *	reg change
+ * @LTR_CFG_FLAG_HW_DIS_ON_D0_2_D3: allow LTR msg send on transition from
+ *	D0 to D3
+ * @LTR_CFG_FLAG_SW_SET_SHORT: fixed static short LTR register
+ * @LTR_CFG_FLAG_SW_SET_LONG: fixed static short LONG register
+ * @LTR_CFG_FLAG_DENIE_C10_ON_PD: allow going into C10 on PD
+ */
+enum iwl_ltr_config_flags {
+	LTR_CFG_FLAG_FEATURE_ENABLE = BIT(0),
+	LTR_CFG_FLAG_HW_DIS_ON_SHADOW_REG_ACCESS = BIT(1),
+	LTR_CFG_FLAG_HW_EN_SHRT_WR_THROUGH = BIT(2),
+	LTR_CFG_FLAG_HW_DIS_ON_D0_2_D3 = BIT(3),
+	LTR_CFG_FLAG_SW_SET_SHORT = BIT(4),
+	LTR_CFG_FLAG_SW_SET_LONG = BIT(5),
+	LTR_CFG_FLAG_DENIE_C10_ON_PD = BIT(6),
+};
+
+/**
+ * struct iwl_ltr_config_cmd - configures the LTR
+ * @flags: See %enum iwl_ltr_config_flags
+ */
+struct iwl_ltr_config_cmd {
+	__le32 flags;
+	__le32 static_long;
+	__le32 static_short;
+} __packed;
+
+/**
+ * enum iwl_power_flags - masks for power table command flags
  * @POWER_FLAGS_POWER_SAVE_ENA_MSK: '1' Allow to save power by turning off
  *		receiver and transmitter. '0' - does not allow.
  * @POWER_FLAGS_POWER_MANAGEMENT_ENA_MSK: '0' Driver disables power management,
diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/iwlwifi/mvm/fw-api.h
index c6384555aab4..4b6730db42a5 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api.h
@@ -138,6 +138,7 @@ enum {
 
 	/* Power */
 	POWER_TABLE_CMD = 0x77,
+	LTR_CONFIG = 0xee,
 
 	/* Scanning */
 	SCAN_REQUEST_CMD = 0x80,
diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c
index e18c92dd60ec..d250d451fd01 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/iwlwifi/mvm/fw.c
@@ -443,6 +443,15 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 	if (ret)
 		goto error;
 
+	if (mvm->trans->ltr_enabled) {
+		struct iwl_ltr_config_cmd cmd = {
+			.flags = cpu_to_le32(LTR_CFG_FLAG_FEATURE_ENABLE),
+		};
+
+		WARN_ON(iwl_mvm_send_cmd_pdu(mvm, LTR_CONFIG, 0,
+					     sizeof(cmd), &cmd));
+	}
+
 	IWL_DEBUG_INFO(mvm, "RT uCode started.\n");
 
 	return 0;
diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c
index 388c8a914960..649d301cfa2a 100644
--- a/drivers/net/wireless/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/iwlwifi/mvm/ops.c
@@ -293,6 +293,7 @@ static const char *iwl_mvm_cmd_strings[REPLY_MAX] = {
 	CMD(BT_PROFILE_NOTIFICATION),
 	CMD(BT_CONFIG),
 	CMD(MCAST_FILTER_CMD),
+	CMD(LTR_CONFIG),
 };
 #undef CMD
 
diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c
index ff04135d37af..6a5eb2b29418 100644
--- a/drivers/net/wireless/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/iwlwifi/pcie/trans.c
@@ -116,11 +116,13 @@ static void iwl_pcie_set_pwr(struct iwl_trans *trans, bool vaux)
 
 /* PCI registers */
 #define PCI_CFG_RETRY_TIMEOUT	0x041
+#define PCI_EXP_DEVCTL2_LTR_EN	0x0400
 
 static void iwl_pcie_apm_config(struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	u16 lctl;
+	u16 cap;
 
 	/*
 	 * HW bug W/A for instability in PCIe bus L0S->L1 transition.
@@ -131,16 +133,17 @@ static void iwl_pcie_apm_config(struct iwl_trans *trans)
 	 *    power savings, even without L1.
 	 */
 	pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_LNKCTL, &lctl);
-	if (lctl & PCI_EXP_LNKCTL_ASPM_L1) {
-		/* L1-ASPM enabled; disable(!) L0S */
+	if (lctl & PCI_EXP_LNKCTL_ASPM_L1)
 		iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED);
-		dev_info(trans->dev, "L1 Enabled; Disabling L0S\n");
-	} else {
-		/* L1-ASPM disabled; enable(!) L0S */
+	else
 		iwl_clear_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED);
-		dev_info(trans->dev, "L1 Disabled; Enabling L0S\n");
-	}
 	trans->pm_support = !(lctl & PCI_EXP_LNKCTL_ASPM_L0S);
+
+	pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_DEVCTL2, &cap);
+	trans->ltr_enabled = cap & PCI_EXP_DEVCTL2_LTR_EN;
+	dev_info(trans->dev, "L1 %sabled - LTR %sabled\n",
+		 (lctl & PCI_EXP_LNKCTL_ASPM_L1) ? "En" : "Dis",
+		 trans->ltr_enabled ? "En" : "Dis");
 }
 
 /*

From b34fafa32078fec415d2d5ec2361fc3b072351cf Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 3 Nov 2014 14:01:25 +0800
Subject: [PATCH 1131/1185] macvtap: Fix csum_start when VLAN tags are present

commit 3ce9b20f1971690b8b3b620e735ec99431573b39 upstream.

When VLAN is in use in macvtap_put_user, we end up setting
csum_start to the wrong place.  The result is that the whoever
ends up doing the checksum setting will corrupt the packet instead
of writing the checksum to the expected location, usually this
means writing the checksum with an offset of -4.

This patch fixes this by adjusting csum_start when VLAN tags are
detected.

Fixes: f09e2249c4f5 ("macvtap: restore vlan header on user read")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 9e56eb479a4f..2d255ba911d5 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -625,6 +625,8 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
 		vnet_hdr->csum_start = skb_checksum_start_offset(skb);
+		if (vlan_tx_tag_present(skb))
+			vnet_hdr->csum_start += VLAN_HLEN;
 		vnet_hdr->csum_offset = skb->csum_offset;
 	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 		vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;

From 2e613ff8d8221da89904473a8136ee29efeca6f0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 3 Nov 2014 13:57:46 +0100
Subject: [PATCH 1132/1185] mac80211: fix use-after-free in defragmentation

commit b8fff407a180286aa683d543d878d98d9fc57b13 upstream.

Upon receiving the last fragment, all but the first fragment
are freed, but the multicast check for statistics at the end
of the function refers to the current skb (the last fragment)
causing a use-after-free bug.

Since multicast frames cannot be fragmented and we check for
this early in the function, just modify that check to also
do the accounting to fix the issue.

Reported-by: Yosef Khyal <yosefx.khyal@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/rx.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fae73b0ef14b..85bc6d498b46 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1585,11 +1585,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	frag = sc & IEEE80211_SCTL_FRAG;
 
-	if (likely((!ieee80211_has_morefrags(fc) && frag == 0) ||
-		   is_multicast_ether_addr(hdr->addr1))) {
-		/* not fragmented */
+	if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
+		goto out;
+
+	if (is_multicast_ether_addr(hdr->addr1)) {
+		rx->local->dot11MulticastReceivedFrameCount++;
 		goto out;
 	}
+
 	I802_DEBUG_INC(rx->local->rx_handlers_fragments);
 
 	if (skb_linearize(rx->skb))
@@ -1682,10 +1685,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
  out:
 	if (rx->sta)
 		rx->sta->rx_packets++;
-	if (is_multicast_ether_addr(hdr->addr1))
-		rx->local->dot11MulticastReceivedFrameCount++;
-	else
-		ieee80211_led_rx(rx->local);
+	ieee80211_led_rx(rx->local);
 	return RX_CONTINUE;
 }
 

From 9458c73ccd64b1ed8c53793e9afd1daa6b826081 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 5 Nov 2014 17:14:32 -0500
Subject: [PATCH 1133/1185] drm/radeon: add missing crtc unlock when setting up
 the MC

commit f0d7bfb9407fccb6499ec01c33afe43512a439a2 upstream.

Need to unlock the crtc after updating the blanking state.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/evergreen.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index e62a9ce3e4dc..ead08a49bec0 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2379,6 +2379,7 @@ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *sav
 					WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 1);
 					tmp |= EVERGREEN_CRTC_BLANK_DATA_EN;
 					WREG32(EVERGREEN_CRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
+					WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 0);
 				}
 			} else {
 				tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]);

From 3e1f6a23ed6ae2a031fb3cc539744c3d8691be5b Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathan_lynch@mentor.com>
Date: Mon, 10 Nov 2014 23:46:27 +0100
Subject: [PATCH 1134/1185] ARM: 8198/1: make kuser helpers depend on MMU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 08b964ff3c51b10aaf2e6ba639f40054c09f0f7a upstream.

The kuser helpers page is not set up on non-MMU systems, so it does
not make sense to allow CONFIG_KUSER_HELPERS to be enabled when
CONFIG_MMU=n.  Allowing it to be set on !MMU results in an oops in
set_tls (used in execve and the arm_syscall trap handler):

Unhandled exception: IPSR = 00000005 LR = fffffff1
CPU: 0 PID: 1 Comm: swapper Not tainted 3.18.0-rc1-00041-ga30465a #216
task: 8b838000 ti: 8b82a000 task.ti: 8b82a000
PC is at flush_thread+0x32/0x40
LR is at flush_thread+0x21/0x40
pc : [<8f00157a>]    lr : [<8f001569>]    psr: 4100000b
sp : 8b82be20  ip : 00000000  fp : 8b83c000
r10: 00000001  r9 : 88018c84  r8 : 8bb85000
r7 : 8b838000  r6 : 00000000  r5 : 8bb77400  r4 : 8b82a000
r3 : ffff0ff0  r2 : 8b82a000  r1 : 00000000  r0 : 88020354
xPSR: 4100000b
CPU: 0 PID: 1 Comm: swapper Not tainted 3.18.0-rc1-00041-ga30465a #216
[<8f002bc1>] (unwind_backtrace) from [<8f002033>] (show_stack+0xb/0xc)
[<8f002033>] (show_stack) from [<8f00265b>] (__invalid_entry+0x4b/0x4c)

As best I can tell this issue existed for the set_tls ARM syscall
before commit fbfb872f5f41 "ARM: 8148/1: flush TLS and thumbee
register state during exec" consolidated the TLS manipulation code
into the set_tls helper function, but now that we're using it to flush
register state during execve, !MMU users encounter the oops at the
first exec.

Prevent CONFIG_MMU=n configurations from enabling
CONFIG_KUSER_HELPERS.

Fixes: fbfb872f5f41 (ARM: 8148/1: flush TLS and thumbee register state during exec)

Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com>
Reported-by: Stefan Agner <stefan@agner.ch>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index c21082d664ed..c6926eae4fe0 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -778,6 +778,7 @@ config NEED_KUSER_HELPERS
 
 config KUSER_HELPERS
 	bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS
+	depends on MMU
 	default y
 	help
 	  Warning: disabling this option may break user programs.

From 16640ca660f4980fb5c1f4e4febce19875f4c1b8 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@redhat.com>
Date: Wed, 12 Nov 2014 21:07:44 +0000
Subject: [PATCH 1135/1185] arm64: __clear_user: handle exceptions on strb
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 97fc15436b36ee3956efad83e22a557991f7d19d upstream.

ARM64 currently doesn't fix up faults on the single-byte (strb) case of
__clear_user... which means that we can cause a nasty kernel panic as an
ordinary user with any multiple PAGE_SIZE+1 read from /dev/zero.
i.e.: dd if=/dev/zero of=foo ibs=1 count=1 (or ibs=65537, etc.)

This is a pretty obscure bug in the general case since we'll only
__do_kernel_fault (since there's no extable entry for pc) if the
mmap_sem is contended. However, with CONFIG_DEBUG_VM enabled, we'll
always fault.

if (!down_read_trylock(&mm->mmap_sem)) {
	if (!user_mode(regs) && !search_exception_tables(regs->pc))
		goto no_context;
retry:
	down_read(&mm->mmap_sem);
} else {
	/*
	 * The above down_read_trylock() might have succeeded in
	 * which
	 * case, we'll have missed the might_sleep() from
	 * down_read().
	 */
	might_sleep();
	if (!user_mode(regs) && !search_exception_tables(regs->pc))
		goto no_context;
}

Fix that by adding an extable entry for the strb instruction, since it
touches user memory, similar to the other stores in __clear_user.

Signed-off-by: Kyle McMartin <kyle@redhat.com>
Reported-by: Miloš Prchlík <mprchlik@redhat.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/lib/clear_user.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 6e0ed93d51fe..c17967fdf5f6 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -46,7 +46,7 @@ USER(9f, strh	wzr, [x0], #2	)
 	sub	x1, x1, #2
 4:	adds	x1, x1, #1
 	b.mi	5f
-	strb	wzr, [x0]
+USER(9f, strb	wzr, [x0]	)
 5:	mov	x0, #0
 	ret
 ENDPROC(__clear_user)

From 562e494829ef4d54cf9c6c0676038dac3e2917af Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Tue, 11 Nov 2014 17:16:44 +0100
Subject: [PATCH 1136/1185] firewire: cdev: prevent kernel stack leaking into
 ioctl arguments

commit eaca2d8e75e90a70a63a6695c9f61932609db212 upstream.

Found by the UC-KLEE tool:  A user could supply less input to
firewire-cdev ioctls than write- or write/read-type ioctl handlers
expect.  The handlers used data from uninitialized kernel stack then.

This could partially leak back to the user if the kernel subsequently
generated fw_cdev_event_'s (to be read from the firewire-cdev fd)
which notably would contain the _u64 closure field which many of the
ioctl argument structures contain.

The fact that the handlers would act on random garbage input is a
lesser issue since all handlers must check their input anyway.

The fix simply always null-initializes the entire ioctl argument buffer
regardless of the actual length of expected user input.  That is, a
runtime overhead of memset(..., 40) is added to each firewirew-cdev
ioctl() call.  [Comment from Clemens Ladisch:  This part of the stack is
most likely to be already in the cache.]

Remarks:
  - There was never any leak from kernel stack to the ioctl output
    buffer itself.  IOW, it was not possible to read kernel stack by a
    read-type or write/read-type ioctl alone; the leak could at most
    happen in combination with read()ing subsequent event data.
  - The actual expected minimum user input of each ioctl from
    include/uapi/linux/firewire-cdev.h is, in bytes:
    [0x00] = 32, [0x05] =  4, [0x0a] = 16, [0x0f] = 20, [0x14] = 16,
    [0x01] = 36, [0x06] = 20, [0x0b] =  4, [0x10] = 20, [0x15] = 20,
    [0x02] = 20, [0x07] =  4, [0x0c] =  0, [0x11] =  0, [0x16] =  8,
    [0x03] =  4, [0x08] = 24, [0x0d] = 20, [0x12] = 36, [0x17] = 12,
    [0x04] = 20, [0x09] = 24, [0x0e] =  4, [0x13] = 40, [0x18] =  4.

Reported-by: David Ramos <daramos@stanford.edu>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firewire/core-cdev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index ac1b43a04285..4f73c727a97a 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1637,8 +1637,7 @@ static int dispatch_ioctl(struct client *client,
 	    _IOC_SIZE(cmd) > sizeof(buffer))
 		return -ENOTTY;
 
-	if (_IOC_DIR(cmd) == _IOC_READ)
-		memset(&buffer, 0, _IOC_SIZE(cmd));
+	memset(&buffer, 0, sizeof(buffer));
 
 	if (_IOC_DIR(cmd) & _IOC_WRITE)
 		if (copy_from_user(&buffer, arg, _IOC_SIZE(cmd)))

From 32049712c4d803139fecf7a59cfa3e24c8456d03 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Wed, 5 Nov 2014 22:36:50 +0800
Subject: [PATCH 1137/1185] nfs: fix pnfs direct write memory leak

commit 8c393f9a721c30a030049a680e1bf896669bb279 upstream.

For pNFS direct writes, layout driver may dynamically allocate ds_cinfo.buckets.
So we need to take care to free them when freeing dreq.

Ideally this needs to be done inside layout driver where ds_cinfo.buckets
are allocated. But buckets are attached to dreq and reused across LD IO iterations.
So I feel it's OK to free them in the generic layer.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/direct.c         |  1 +
 include/linux/nfs_xdr.h | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0bd7a55a5f07..725e87538c98 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -180,6 +180,7 @@ static void nfs_direct_req_free(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
 
+	nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo);
 	if (dreq->l_ctx != NULL)
 		nfs_put_lock_context(dreq->l_ctx);
 	if (dreq->ctx != NULL)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 104b62f23ee0..54e351aa4d2e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1184,11 +1184,22 @@ struct nfs41_free_stateid_res {
 	unsigned int			status;
 };
 
+static inline void
+nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo)
+{
+	kfree(cinfo->buckets);
+}
+
 #else
 
 struct pnfs_ds_commit_info {
 };
 
+static inline void
+nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo)
+{
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs_page;

From 945f341afb991b94fce08f633353efa0c623f719 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 3 Nov 2014 19:36:40 +0100
Subject: [PATCH 1138/1185] scsi: only re-lock door after EH on devices that
 were reset

commit 48379270fe6808cf4612ee094adc8da2b7a83baa upstream.

Setups that use the blk-mq I/O path can lock up if a host with a single
device that has its door locked enters EH.  Make sure to only send the
command to re-lock the door to devices that actually were reset and thus
might have lost their state.  Otherwise the EH code might be get blocked
on blk_get_request as all requests for non-reset devices might be in use.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Meelis Roos <meelis.roos@ut.ee>
Tested-by: Meelis Roos <meelis.roos@ut.ee>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/scsi_error.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index f43de1e56420..3668b1b23b5a 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1689,8 +1689,10 @@ static void scsi_restart_operations(struct Scsi_Host *shost)
 	 * is no point trying to lock the door of an off-line device.
 	 */
 	shost_for_each_device(sdev, shost) {
-		if (scsi_device_online(sdev) && sdev->locked)
+		if (scsi_device_online(sdev) && sdev->was_reset && sdev->locked) {
 			scsi_eh_lock_door(sdev);
+			sdev->was_reset = 0;
+		}
 	}
 
 	/*

From aca0ab61812decb0bd0335fc9c4b065991884b66 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Mon, 10 Nov 2014 21:46:18 +0100
Subject: [PATCH 1139/1185] parisc: Use compat layer for msgctl, shmat, shmctl
 and semtimedop syscalls

commit 2fe749f50b0bec07650ef135b29b1f55bf543869 upstream.

Switch over the msgctl, shmat, shmctl and semtimedop syscalls to use the compat
layer. The problem was found with the debian procenv package, which called
	shmctl(0, SHM_INFO, &info);
in which the shmctl syscall then overwrote parts of the surrounding areas on
the stack on which the info variable was stored and thus lead to a segfault
later on.

Additionally fix the definition of struct shminfo64 to use unsigned longs like
the other architectures. This has no impact on userspace since we only have a
32bit userspace up to now.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/include/uapi/asm/shmbuf.h | 25 +++++++++----------------
 arch/parisc/kernel/syscall_table.S    |  8 ++++----
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/arch/parisc/include/uapi/asm/shmbuf.h b/arch/parisc/include/uapi/asm/shmbuf.h
index 0a3eada1863b..f395cde7b593 100644
--- a/arch/parisc/include/uapi/asm/shmbuf.h
+++ b/arch/parisc/include/uapi/asm/shmbuf.h
@@ -36,23 +36,16 @@ struct shmid64_ds {
 	unsigned int		__unused2;
 };
 
-#ifdef CONFIG_64BIT
-/* The 'unsigned int' (formerly 'unsigned long') data types below will
- * ensure that a 32-bit app calling shmctl(*,IPC_INFO,*) will work on
- * a wide kernel, but if some of these values are meant to contain pointers
- * they may need to be 'long long' instead. -PB XXX FIXME
- */
-#endif
 struct shminfo64 {
-	unsigned int	shmmax;
-	unsigned int	shmmin;
-	unsigned int	shmmni;
-	unsigned int	shmseg;
-	unsigned int	shmall;
-	unsigned int	__unused1;
-	unsigned int	__unused2;
-	unsigned int	__unused3;
-	unsigned int	__unused4;
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
 };
 
 #endif /* _PARISC_SHMBUF_H */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 10a0c2aad8cf..b24732d1bdbf 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -286,11 +286,11 @@
 	ENTRY_COMP(msgsnd)
 	ENTRY_COMP(msgrcv)
 	ENTRY_SAME(msgget)		/* 190 */
-	ENTRY_SAME(msgctl)
-	ENTRY_SAME(shmat)
+	ENTRY_COMP(msgctl)
+	ENTRY_COMP(shmat)
 	ENTRY_SAME(shmdt)
 	ENTRY_SAME(shmget)
-	ENTRY_SAME(shmctl)		/* 195 */
+	ENTRY_COMP(shmctl)		/* 195 */
 	ENTRY_SAME(ni_syscall)		/* streams1 */
 	ENTRY_SAME(ni_syscall)		/* streams2 */
 	ENTRY_SAME(lstat64)
@@ -323,7 +323,7 @@
 	ENTRY_SAME(epoll_ctl)		/* 225 */
 	ENTRY_SAME(epoll_wait)
  	ENTRY_SAME(remap_file_pages)
-	ENTRY_SAME(semtimedop)
+	ENTRY_COMP(semtimedop)
 	ENTRY_COMP(mq_open)
 	ENTRY_SAME(mq_unlink)		/* 230 */
 	ENTRY_COMP(mq_timedsend)

From 9d0c27027cba7de2e42695d7bd29fa42666dac63 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 30 Oct 2014 20:43:38 +0100
Subject: [PATCH 1140/1185] block: Fix computation of merged request priority

commit ece9c72accdc45c3a9484dacb1125ce572647288 upstream.

Priority of a merged request is computed by ioprio_best(). If one of the
requests has undefined priority (IOPRIO_CLASS_NONE) and another request
has priority from IOPRIO_CLASS_BE, the function will return the
undefined priority which is wrong. Fix the function to properly return
priority of a request with the defined priority.

Fixes: d58cdfb89ce0c6bd5f81ae931a984ef298dbda20
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ioprio.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/fs/ioprio.c b/fs/ioprio.c
index e50170ca7c33..31666c92b46a 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -157,14 +157,16 @@ static int get_task_ioprio(struct task_struct *p)
 
 int ioprio_best(unsigned short aprio, unsigned short bprio)
 {
-	unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
-	unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
+	unsigned short aclass;
+	unsigned short bclass;
 
-	if (aclass == IOPRIO_CLASS_NONE)
-		aclass = IOPRIO_CLASS_BE;
-	if (bclass == IOPRIO_CLASS_NONE)
-		bclass = IOPRIO_CLASS_BE;
+	if (!ioprio_valid(aprio))
+		aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+	if (!ioprio_valid(bprio))
+		bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
 
+	aclass = IOPRIO_PRIO_CLASS(aprio);
+	bclass = IOPRIO_PRIO_CLASS(bprio);
 	if (aclass == bclass)
 		return min(aprio, bprio);
 	if (aclass > bclass)

From fe30b804a20bbc3218193f0d528e9749332fb06a Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Mon, 10 Nov 2014 15:03:24 +0000
Subject: [PATCH 1141/1185] dm btree: fix a recursion depth bug in btree
 walking code

commit 9b460d3699324d570a4d4161c3741431887f102f upstream.

The walk code was using a 'ro_spine' to hold it's locked btree nodes.
But this data structure is designed for the rolling lock scheme, and
as such automatically unlocks blocks that are two steps up the call
chain.  This is not suitable for the simple recursive walk algorithm,
which retraces its steps.

This code is only used by the persistent array code, which in turn is
only used by dm-cache.  In order to trigger it you need to have a
mapping tree that is more than 2 levels deep; which equates to 8-16
million cache blocks.  For instance a 4T ssd with a very small block
size of 32k only just triggers this bug.

The fix just places the locked blocks on the stack, and stops using
the ro_spine altogether.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../md/persistent-data/dm-btree-internal.h    |  6 +++++
 drivers/md/persistent-data/dm-btree-spine.c   |  2 +-
 drivers/md/persistent-data/dm-btree.c         | 24 ++++++++-----------
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index 37d367bb9aa8..bf2b80d5c470 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -42,6 +42,12 @@ struct btree_node {
 } __packed;
 
 
+/*
+ * Locks a block using the btree node validator.
+ */
+int bn_read_lock(struct dm_btree_info *info, dm_block_t b,
+		 struct dm_block **result);
+
 void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
 		  struct dm_btree_value_type *vt);
 
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index cf9fd676ae44..1b5e13ec7f96 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -92,7 +92,7 @@ struct dm_block_validator btree_node_validator = {
 
 /*----------------------------------------------------------------*/
 
-static int bn_read_lock(struct dm_btree_info *info, dm_block_t b,
+int bn_read_lock(struct dm_btree_info *info, dm_block_t b,
 		 struct dm_block **result)
 {
 	return dm_tm_read_lock(info->tm, b, &btree_node_validator, result);
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 35865425e4b4..0a7592e88811 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -812,22 +812,26 @@ EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
  * FIXME: We shouldn't use a recursive algorithm when we have limited stack
  * space.  Also this only works for single level trees.
  */
-static int walk_node(struct ro_spine *s, dm_block_t block,
+static int walk_node(struct dm_btree_info *info, dm_block_t block,
 		     int (*fn)(void *context, uint64_t *keys, void *leaf),
 		     void *context)
 {
 	int r;
 	unsigned i, nr;
+	struct dm_block *node;
 	struct btree_node *n;
 	uint64_t keys;
 
-	r = ro_step(s, block);
-	n = ro_node(s);
+	r = bn_read_lock(info, block, &node);
+	if (r)
+		return r;
+
+	n = dm_block_data(node);
 
 	nr = le32_to_cpu(n->header.nr_entries);
 	for (i = 0; i < nr; i++) {
 		if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) {
-			r = walk_node(s, value64(n, i), fn, context);
+			r = walk_node(info, value64(n, i), fn, context);
 			if (r)
 				goto out;
 		} else {
@@ -839,7 +843,7 @@ static int walk_node(struct ro_spine *s, dm_block_t block,
 	}
 
 out:
-	ro_pop(s);
+	dm_tm_unlock(info->tm, node);
 	return r;
 }
 
@@ -847,15 +851,7 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
 		  int (*fn)(void *context, uint64_t *keys, void *leaf),
 		  void *context)
 {
-	int r;
-	struct ro_spine spine;
-
 	BUG_ON(info->levels > 1);
-
-	init_ro_spine(&spine, info);
-	r = walk_node(&spine, root, fn, context);
-	exit_ro_spine(&spine);
-
-	return r;
+	return walk_node(info, root, fn, context);
 }
 EXPORT_SYMBOL_GPL(dm_btree_walk);

From 513f8da83b3ef6cf1475da6ef3d851286e8466fa Mon Sep 17 00:00:00 2001
From: Heinz Mauelshagen <heinzm@redhat.com>
Date: Fri, 17 Oct 2014 13:38:50 +0200
Subject: [PATCH 1142/1185] dm raid: ensure superblock's size matches device's
 logical block size

commit 40d43c4b4cac4c2647bf07110d7b07d35f399a84 upstream.

The dm-raid superblock (struct dm_raid_superblock) is padded to 512
bytes and that size is being used to read it in from the metadata
device into one preallocated page.

Reading or writing this on a 512-byte sector device works fine but on
a 4096-byte sector device this fails.

Set the dm-raid superblock's size to the logical block size of the
metadata device, because IO at that size is guaranteed too work.  Also
add a size check to avoid silent partial metadata loss in case the
superblock should ever grow past the logical block size or PAGE_SIZE.

[includes pointer math fix from Dan Carpenter]
Reported-by: "Liuhua Wang" <lwang@suse.com>
Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-raid.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 2dea49c4279e..84cddccc0249 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -785,8 +785,7 @@ struct dm_raid_superblock {
 	__le32 layout;
 	__le32 stripe_sectors;
 
-	__u8 pad[452];		/* Round struct to 512 bytes. */
-				/* Always set to 0 when writing. */
+	/* Remainder of a logical block is zero-filled when writing (see super_sync()). */
 } __packed;
 
 static int read_disk_sb(struct md_rdev *rdev, int size)
@@ -823,7 +822,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
 		    test_bit(Faulty, &(rs->dev[i].rdev.flags)))
 			failed_devices |= (1ULL << i);
 
-	memset(sb, 0, sizeof(*sb));
+	memset(sb + 1, 0, rdev->sb_size - sizeof(*sb));
 
 	sb->magic = cpu_to_le32(DM_RAID_MAGIC);
 	sb->features = cpu_to_le32(0);	/* No features yet */
@@ -858,7 +857,11 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
 	uint64_t events_sb, events_refsb;
 
 	rdev->sb_start = 0;
-	rdev->sb_size = sizeof(*sb);
+	rdev->sb_size = bdev_logical_block_size(rdev->meta_bdev);
+	if (rdev->sb_size < sizeof(*sb) || rdev->sb_size > PAGE_SIZE) {
+		DMERR("superblock size of a logical block is no longer valid");
+		return -EINVAL;
+	}
 
 	ret = read_disk_sb(rdev, rdev->sb_size);
 	if (ret)

From bff25f7d4005a0dceba5042698d6a66f7c1821fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Sat, 8 Nov 2014 12:45:23 -0800
Subject: [PATCH 1143/1185] Input: alps - ignore potential bare packets when
 device is out of sync
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 4ab8f7f320f91f279c3f06a9795cfea5c972888a upstream.

5th and 6th byte of ALPS trackstick V3 protocol match condition for first
byte of PS/2 3 bytes packet. When driver enters out of sync state and ALPS
trackstick is sending data then driver match 5th, 6th and next 1st bytes as
PS/2.

It basically means if user is using trackstick when driver is in out of
sync state driver will never resync. Processing these bytes as 3 bytes PS/2
data cause total mess (random cursor movements, random clicks) and make
trackstick unusable until psmouse driver decide to do full device reset.

Lot of users reported problems with ALPS devices on Dell Latitude E6440,
E6540 and E7440 laptops. ALPS device or Dell EC for unknown reason send
some invalid ALPS PS/2 bytes which cause driver out of sync. It looks like
that i8042 and psmouse/alps driver always receive group of 6 bytes packets
so there are no missing bytes and no bytes were inserted between valid
ones.

This patch does not fix root of problem with ALPS devices found in Dell
Latitude laptops but it does not allow to process some (invalid)
subsequence of 6 bytes ALPS packets as 3 bytes PS/2 when driver is out of
sync.

So with this patch trackstick input device does not report bogus data when
also driver is out of sync, so trackstick should be usable on those
machines.

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Tested-by: Pali Rohár <pali.rohar@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/alps.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 7c5d72a6a26a..071bd835594c 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -873,7 +873,13 @@ static psmouse_ret_t alps_process_byte(struct psmouse *psmouse)
 {
 	struct alps_data *priv = psmouse->private;
 
-	if ((psmouse->packet[0] & 0xc8) == 0x08) { /* PS/2 packet */
+	/*
+	 * Check if we are dealing with a bare PS/2 packet, presumably from
+	 * a device connected to the external PS/2 port. Because bare PS/2
+	 * protocol does not have enough constant bits to self-synchronize
+	 * properly we only do this if the device is fully synchronized.
+	 */
+	if (!psmouse->out_of_sync_cnt && (psmouse->packet[0] & 0xc8) == 0x08) {
 		if (psmouse->pktcnt == 3) {
 			alps_report_bare_ps2_packet(psmouse, psmouse->packet,
 						    true);

From abf9765d3b73f3bf7ebea763b9b86aa38e29bd24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Sat, 8 Nov 2014 12:58:57 -0800
Subject: [PATCH 1144/1185] Input: alps - allow up to 2 invalid packets without
 resetting device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 9d720b34c0a432639252f63012e18b0507f5b432 upstream.

On some Dell Latitude laptops ALPS device or Dell EC send one invalid byte
in 6 bytes ALPS packet. In this case psmouse driver enter out of sync
state. It looks like that all other bytes in packets are valid and also
device working properly. So there is no need to do full device reset, just
need to wait for byte which match condition for first byte (start of
packet). Because ALPS packets are bigger (6 or 8 bytes) default limit is
small.

This patch increase number of invalid bytes to size of 2 ALPS packets which
psmouse driver can drop before do full reset.

Resetting ALPS devices take some time and when doing reset on some Dell
laptops touchpad, trackstick and also keyboard do not respond. So it is
better to do it only if really necessary.

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Tested-by: Pali Rohár <pali.rohar@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/alps.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 071bd835594c..19e070f16e6b 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -1822,6 +1822,9 @@ int alps_init(struct psmouse *psmouse)
 	/* We are having trouble resyncing ALPS touchpads so disable it for now */
 	psmouse->resync_time = 0;
 
+	/* Allow 2 invalid packets without resetting device */
+	psmouse->resetafter = psmouse->pktsize * 2;
+
 	return 0;
 
 init_fail:

From 361eeee70ea9c57a2ce018ea0ce720f44a3fc07d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Oct 2014 15:10:25 +0300
Subject: [PATCH 1145/1185] NFSv4: Ensure that we remove NFSv4.0 delegations
 when state has expired

commit 4dfd4f7af0afd201706ad186352ca423b0f17d4b upstream.

NFSv4.0 does not have TEST_STATEID/FREE_STATEID functionality, so
unlike NFSv4.1, the recovery procedure when stateids have expired or
have been revoked requires us to just forget the delegation.

http://lkml.kernel.org/r/CAN-5tyHwG=Cn2Q9KsHWadewjpTTy_K26ee+UnSvHvG4192p-Xw@mail.gmail.com
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4proc.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 69fc437be661..2407d21db0f0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1845,6 +1845,28 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	return ret;
 }
 
+static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state)
+{
+	nfs_remove_bad_delegation(state->inode);
+	write_seqlock(&state->seqlock);
+	nfs4_stateid_copy(&state->stateid, &state->open_stateid);
+	write_sequnlock(&state->seqlock);
+	clear_bit(NFS_DELEGATED_STATE, &state->flags);
+}
+
+static void nfs40_clear_delegation_stateid(struct nfs4_state *state)
+{
+	if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL)
+		nfs_finish_clear_delegation_stateid(state);
+}
+
+static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+	/* NFSv4.0 doesn't allow for delegation recovery on open expire */
+	nfs40_clear_delegation_stateid(state);
+	return nfs4_open_expired(sp, state);
+}
+
 #if defined(CONFIG_NFS_V4_1)
 static void nfs41_clear_delegation_stateid(struct nfs4_state *state)
 {
@@ -6974,7 +6996,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
 static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
 	.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
 	.state_flag_bit	= NFS_STATE_RECLAIM_NOGRACE,
-	.recover_open	= nfs4_open_expired,
+	.recover_open	= nfs40_open_expired,
 	.recover_lock	= nfs4_lock_expired,
 	.establish_clid = nfs4_init_clientid,
 	.get_clid_cred	= nfs4_get_setclientid_cred,

From bc2075d558b1395a80c1354665679c8970ba5829 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Oct 2014 23:02:52 +0300
Subject: [PATCH 1146/1185] NFS: Don't try to reclaim delegation open state if
 recovery failed

commit f8ebf7a8ca35dde321f0cd385fee6f1950609367 upstream.

If state recovery failed, then we should not attempt to reclaim delegated
state.

http://lkml.kernel.org/r/CAN-5tyHwG=Cn2Q9KsHWadewjpTTy_K26ee+UnSvHvG4192p-Xw@mail.gmail.com
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/delegation.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 4b49a8c6ccad..3fa904c9e7ef 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -108,6 +108,8 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s
 			continue;
 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
 			continue;
+		if (!nfs4_valid_open_stateid(state))
+			continue;
 		if (!nfs4_stateid_match(&state->stateid, stateid))
 			continue;
 		get_nfs_open_context(ctx);

From 29d312e94b2ede1cd3270ed1237a2817032f287d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Oct 2014 14:02:47 +0200
Subject: [PATCH 1147/1185] nfs: Fix use of uninitialized variable in
 nfs_getattr()

commit 16caf5b6101d03335b386e77e9e14136f989be87 upstream.

Variable 'err' needn't be initialized when nfs_getattr() uses it to
check whether it should call generic_fillattr() or not. That can result
in spurious error returns. Initialize 'err' properly.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index cd4b9073dd20..e9be01b2cc5a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -519,7 +519,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
 	struct inode *inode = dentry->d_inode;
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
-	int err;
+	int err = 0;
 
 	/* Flush out writes to the server in order to update c/mtime.  */
 	if (S_ISREG(inode->i_mode)) {

From 270e234c60d21681ac2afc04329cc0b5ab4ff035 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 10 Nov 2014 18:43:56 -0500
Subject: [PATCH 1148/1185] NFSv4: Fix races between
 nfs_remove_bad_delegation() and delegation return

commit 869f9dfa4d6d57b79e0afc3af14772c2a023eeb1 upstream.

Any attempt to call nfs_remove_bad_delegation() while a delegation is being
returned is currently a no-op. This means that we can end up looping
forever in nfs_end_delegation_return() if something causes the delegation
to be revoked.
This patch adds a mechanism whereby the state recovery code can communicate
to the delegation return code that the delegation is no longer valid and
that it should not be used when reclaiming state.
It also changes the return value for nfs4_handle_delegation_recall_error()
to ensure that nfs_end_delegation_return() does not reattempt the lock
reclaim before state recovery is done.

http://lkml.kernel.org/r/CAN-5tyHwG=Cn2Q9KsHWadewjpTTy_K26ee+UnSvHvG4192p-Xw@mail.gmail.com
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/delegation.c | 23 +++++++++++++++++++++--
 fs/nfs/delegation.h |  1 +
 fs/nfs/nfs4proc.c   |  2 +-
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 3fa904c9e7ef..ef0c394b7bf5 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -177,7 +177,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
 {
 	int res = 0;
 
-	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync);
+	if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+		res = nfs4_proc_delegreturn(inode,
+				delegation->cred,
+				&delegation->stateid,
+				issync);
 	nfs_free_delegation(delegation);
 	return res;
 }
@@ -363,11 +367,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
 {
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
-	int err;
+	int err = 0;
 
 	if (delegation == NULL)
 		return 0;
 	do {
+		if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+			break;
 		err = nfs_delegation_claim_opens(inode, &delegation->stateid);
 		if (!issync || err != -EAGAIN)
 			break;
@@ -588,10 +594,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl
 	rcu_read_unlock();
 }
 
+static void nfs_revoke_delegation(struct inode *inode)
+{
+	struct nfs_delegation *delegation;
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation != NULL) {
+		set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
+		nfs_mark_return_delegation(NFS_SERVER(inode), delegation);
+	}
+	rcu_read_unlock();
+}
+
 void nfs_remove_bad_delegation(struct inode *inode)
 {
 	struct nfs_delegation *delegation;
 
+	nfs_revoke_delegation(inode);
 	delegation = nfs_inode_detach_delegation(inode);
 	if (delegation) {
 		nfs_inode_find_state_and_recover(inode, &delegation->stateid);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9a79c7a99d6d..e02b090ab9da 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -31,6 +31,7 @@ enum {
 	NFS_DELEGATION_RETURN_IF_CLOSED,
 	NFS_DELEGATION_REFERENCED,
 	NFS_DELEGATION_RETURNING,
+	NFS_DELEGATION_REVOKED,
 };
 
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2407d21db0f0..78787948f69d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1416,7 +1416,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
 			nfs_inode_find_state_and_recover(state->inode,
 					stateid);
 			nfs4_schedule_stateid_recovery(server, state);
-			return 0;
+			return -EAGAIN;
 		case -NFS4ERR_DELAY:
 		case -NFS4ERR_GRACE:
 			set_bit(NFS_DELEGATED_STATE, &state->flags);

From 0ec4fc584c3ee470f5150450acf49dd2dab5d1e7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 5 Sep 2014 09:09:28 -0300
Subject: [PATCH 1149/1185] media: ttusb-dec: buffer overflow in ioctl

commit f2e323ec96077642d397bb1c355def536d489d16 upstream.

We need to add a limit check here so we don't overflow the buffer.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/ttusb-dec/ttusbdecfe.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/media/usb/ttusb-dec/ttusbdecfe.c b/drivers/media/usb/ttusb-dec/ttusbdecfe.c
index 5c45c9d0712d..9c29552aedec 100644
--- a/drivers/media/usb/ttusb-dec/ttusbdecfe.c
+++ b/drivers/media/usb/ttusb-dec/ttusbdecfe.c
@@ -156,6 +156,9 @@ static int ttusbdecfe_dvbs_diseqc_send_master_cmd(struct dvb_frontend* fe, struc
 		   0x00, 0x00, 0x00, 0x00,
 		   0x00, 0x00 };
 
+	if (cmd->msg_len > sizeof(b) - 4)
+		return -EINVAL;
+
 	memcpy(&b[4], cmd->msg, cmd->msg_len);
 
 	state->config->send_command(fe, 0x72,

From 6078e7a5ce5e52245481faf71590744aae2c17a1 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 19:00:25 -0600
Subject: [PATCH 1150/1185] kgdb: Remove "weak" from kgdb_arch_pc() declaration

commit 107bcc6d566cb40184068d888637f9aefe6252dd upstream.

kernel/debug/debug_core.c provides a default kgdb_arch_pc() definition
explicitly marked "weak".  Several architectures provide their own
definitions intended to override the default, but the "weak" attribute on
the declaration applied to the arch definitions as well, so the linker
chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak
annotation from pcibios_get_phb_of_node decl")).

Remove the "weak" attribute from the declaration so we always prefer a
non-weak definition over the weak one, independent of link order.

Fixes: 688b744d8bc8 ("kgdb: fix signedness mixmatches, add statics, add declaration to header")
Tested-by: Vineet Gupta <vgupta@synopsys.com>	# for ARC build
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kgdb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index c6e091bf39a5..bdfc95bddde9 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -283,7 +283,7 @@ struct kgdb_io {
 
 extern struct kgdb_arch		arch_kgdb_ops;
 
-extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
+extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs);
 
 #ifdef CONFIG_SERIAL_KGDB_NMI
 extern int kgdb_register_nmi_console(void);

From 88d96d8e9ece6591b19a9989f010db1b623c0d9a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 18:59:09 -0600
Subject: [PATCH 1151/1185] clocksource: Remove "weak" from
 clocksource_default_clock() declaration

commit 96a2adbc6f501996418da9f7afe39bf0e4d006a9 upstream.

kernel/time/jiffies.c provides a default clocksource_default_clock()
definition explicitly marked "weak".  arch/s390 provides its own definition
intended to override the default, but the "weak" attribute on the
declaration applied to the s390 definition as well, so the linker chose one
based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from
pcibios_get_phb_of_node decl")).

Remove the "weak" attribute from the clocksource_default_clock()
declaration so we always prefer a non-weak definition over the weak one,
independent of link order.

Fixes: f1b82746c1e9 ("clocksource: Cleanup clocksource selection")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: John Stultz <john.stultz@linaro.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
CC: Daniel Lezcano <daniel.lezcano@linaro.org>
CC: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/clocksource.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7279b94c01da..91aa89e1aaa0 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -285,7 +285,7 @@ extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
 extern void clocksource_suspend(void);
 extern void clocksource_resume(void);
-extern struct clocksource * __init __weak clocksource_default_clock(void);
+extern struct clocksource * __init clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 
 extern void

From f2f25589e727f0257ee95d9e8521b7f30d3616a4 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Mon, 13 Oct 2014 15:54:10 -0700
Subject: [PATCH 1152/1185] ipc: always handle a new value of auto_msgmni

commit 1195d94e006b23c6292e78857e154872e33b6d7e upstream.

proc_dointvec_minmax() returns zero if a new value has been set.  So we
don't need to check all charecters have been handled.

Below you can find two examples.  In the new value has not been handled
properly.

$ strace ./a.out
open("/proc/sys/kernel/auto_msgmni", O_WRONLY) = 3
write(3, "0\n\0", 3)                    = 2
close(3)                                = 0
exit_group(0)
$ cat /sys/kernel/debug/tracing/trace

$strace ./a.out
open("/proc/sys/kernel/auto_msgmni", O_WRONLY) = 3
write(3, "0\n", 2)                      = 2
close(3)                                = 0

$ cat /sys/kernel/debug/tracing/trace
a.out-697   [000] ....  3280.998235: unregister_ipcns_notifier <-proc_ipcauto_dointvec_minmax

Fixes: 9eefe520c814 ("ipc: do not use a negative value to re-enable msgmni automatic recomputin")
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Cc: Mathias Krause <minipli@googlemail.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Joe Perches <joe@perches.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 ipc/ipc_sysctl.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index b0e99deb6d05..a0f0ab2ac2a8 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -123,7 +123,6 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
-	size_t lenp_bef = *lenp;
 	int oldval;
 	int rc;
 
@@ -133,7 +132,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 
 	rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
 
-	if (write && !rc && lenp_bef == *lenp) {
+	if (write && !rc) {
 		int newval = *((int *)(ipc_table.data));
 		/*
 		 * The file "auto_msgmni" has correctly been set.

From 3a758a2b78da2f49f7165678faf999e946a0c4b5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 23 Oct 2014 10:36:06 +0200
Subject: [PATCH 1153/1185] netfilter: nf_log: account for size of NLMSG_DONE
 attribute

commit 9dfa1dfe4d5e5e66a991321ab08afe69759d797a upstream.

We currently neither account for the nlattr size, nor do we consider
the size of the trailing NLMSG_DONE when allocating nlmsg skb.

This can result in nflog to stop working, as __nfulnl_send() re-tries
sending forever if it failed to append NLMSG_DONE (which will never
work if buffer is not large enough).

Reported-by: Houcheng Lin <houcheng@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nfnetlink_log.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 962e9792e317..478b66950007 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -647,7 +647,8 @@ nfulnl_log_packet(struct net *net,
 		+ nla_total_size(sizeof(u_int32_t))	/* gid */
 		+ nla_total_size(plen)			/* prefix */
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
-		+ nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
+		+ nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp))
+		+ nla_total_size(sizeof(struct nfgenmsg));	/* NLMSG_DONE */
 
 	if (in && skb_mac_header_was_set(skb)) {
 		size +=   nla_total_size(skb->dev->hard_header_len)
@@ -690,8 +691,7 @@ nfulnl_log_packet(struct net *net,
 		goto unlock_and_release;
 	}
 
-	if (inst->skb &&
-	    size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) {
+	if (inst->skb && size > skb_tailroom(inst->skb)) {
 		/* either the queue len is too high or we don't have
 		 * enough room in the skb left. flush to userspace. */
 		__nfulnl_flush(inst);

From 07b170693adf94237d767b5545be013c1cab18c1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 23 Oct 2014 10:36:07 +0200
Subject: [PATCH 1154/1185] netfilter: nfnetlink_log: fix maximum packet length
 logged to userspace

commit c1e7dc91eed0ed1a51c9b814d648db18bf8fc6e9 upstream.

don't try to queue payloads > 0xffff - NLA_HDRLEN, it does not work.
The nla length includes the size of the nla struct, so anything larger
results in u16 integer overflow.

This patch is similar to
9cefbbc9c8f9abe (netfilter: nfnetlink_queue: cleanup copy_range usage).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nfnetlink_log.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 478b66950007..3d66b83a5c88 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -45,7 +45,8 @@
 #define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
 #define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
 #define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
-#define NFULNL_COPY_RANGE_MAX	0xFFFF	/* max packet size is limited by 16-bit struct nfattr nfa_len field */
+/* max packet size is limited by 16-bit struct nfattr nfa_len field */
+#define NFULNL_COPY_RANGE_MAX	(0xFFFF - NLA_HDRLEN)
 
 #define PRINTR(x, args...)	do { if (net_ratelimit()) \
 				     printk(x, ## args); } while (0);
@@ -255,6 +256,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
 
 	case NFULNL_COPY_PACKET:
 		inst->copy_mode = mode;
+		if (range == 0)
+			range = NFULNL_COPY_RANGE_MAX;
 		inst->copy_range = min_t(unsigned int,
 					 range, NFULNL_COPY_RANGE_MAX);
 		break;
@@ -677,8 +680,7 @@ nfulnl_log_packet(struct net *net,
 		break;
 
 	case NFULNL_COPY_PACKET:
-		if (inst->copy_range == 0
-		    || inst->copy_range > skb->len)
+		if (inst->copy_range > skb->len)
 			data_len = skb->len;
 		else
 			data_len = inst->copy_range;

From 0bf7a5e16a5356b9dadc503aac66f4f587823e8b Mon Sep 17 00:00:00 2001
From: Houcheng Lin <houcheng@gmail.com>
Date: Thu, 23 Oct 2014 10:36:08 +0200
Subject: [PATCH 1155/1185] netfilter: nf_log: release skbuff on nlmsg put
 failure

commit b51d3fa364885a2c1e1668f88776c67c95291820 upstream.

The kernel should reserve enough room in the skb so that the DONE
message can always be appended.  However, in case of e.g. new attribute
erronously not being size-accounted for, __nfulnl_send() will still
try to put next nlmsg into this full skbuf, causing the skb to be stuck
forever and blocking delivery of further messages.

Fix issue by releasing skb immediately after nlmsg_put error and
WARN() so we can track down the cause of such size mismatch.

[ fw@strlen.de: add tailroom/len info to WARN ]

Signed-off-by: Houcheng Lin <houcheng@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nfnetlink_log.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 3d66b83a5c88..216261dd32ae 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -348,26 +348,25 @@ nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)
 	return skb;
 }
 
-static int
+static void
 __nfulnl_send(struct nfulnl_instance *inst)
 {
-	int status = -1;
-
 	if (inst->qlen > 1) {
 		struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0,
 						 NLMSG_DONE,
 						 sizeof(struct nfgenmsg),
 						 0);
-		if (!nlh)
+		if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n",
+			      inst->skb->len, skb_tailroom(inst->skb))) {
+			kfree_skb(inst->skb);
 			goto out;
+		}
 	}
-	status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
-				   MSG_DONTWAIT);
-
+	nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
+			  MSG_DONTWAIT);
+out:
 	inst->qlen = 0;
 	inst->skb = NULL;
-out:
-	return status;
 }
 
 static void

From 7c059c04ffe3de25d5aa66ad3541e53b94233ea3 Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Tue, 29 Jul 2014 18:12:15 +0200
Subject: [PATCH 1156/1185] netfilter: xt_bpf: add mising opaque struct
 sk_filter definition

commit e10038a8ec06ac819b7552bb67aaa6d2d6f850c1 upstream.

This structure is not exposed to userspace, so fix this by defining
struct sk_filter; so we skip the casting in kernelspace. This is safe
since userspace has no way to lurk with that internal pointer.

Fixes: e6f30c7 ("netfilter: x_tables: add xt_bpf match")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/netfilter/xt_bpf.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
index 5dda450eb55b..2ec9fbcd06f9 100644
--- a/include/uapi/linux/netfilter/xt_bpf.h
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -6,6 +6,8 @@
 
 #define XT_BPF_MAX_NUM_INSTR	64
 
+struct sk_filter;
+
 struct xt_bpf_info {
 	__u16 bpf_program_num_elem;
 	struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];

From 5eb4491e33b498f05bf51c75ed4abc46a5fccaba Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 7 Jun 2014 21:17:04 +0200
Subject: [PATCH 1157/1185] netfilter: nf_nat: fix oops on netns removal

commit 945b2b2d259d1a4364a2799e80e8ff32f8c6ee6f upstream.

Quoting Samu Kallio:

 Basically what's happening is, during netns cleanup,
 nf_nat_net_exit gets called before ipv4_net_exit. As I understand
 it, nf_nat_net_exit is supposed to kill any conntrack entries which
 have NAT context (through nf_ct_iterate_cleanup), but for some
 reason this doesn't happen (perhaps something else is still holding
 refs to those entries?).

 When ipv4_net_exit is called, conntrack entries (including those
 with NAT context) are cleaned up, but the
 nat_bysource hashtable is long gone - freed in nf_nat_net_exit. The
 bug happens when attempting to free a conntrack entry whose NAT hash
 'prev' field points to a slot in the freed hash table (head for that
 bin).

We ignore conntracks with null nat bindings.  But this is wrong,
as these are in bysource hash table as well.

Restore nat-cleaning for the netns-is-being-removed case.

bug:
https://bugzilla.kernel.org/show_bug.cgi?id=65191

Fixes: c2d421e1718 ('netfilter: nf_nat: fix race when unloading protocol modules')
Reported-by: Samu Kallio <samu.kallio@aberdeencloud.com>
Debugged-by: Samu Kallio <samu.kallio@aberdeencloud.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Tested-by: Samu Kallio <samu.kallio@aberdeencloud.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
[samu.kallio@aberdeencloud.com: backport to 3.10-stable]
Signed-off-by: Samu Kallio <samu.kallio@aberdeencloud.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_nat_core.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 038eee5c8f85..2bb801e3ee8c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -487,6 +487,39 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
 	return i->status & IPS_NAT_MASK ? 1 : 0;
 }
 
+static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
+{
+	struct nf_conn_nat *nat = nfct_nat(ct);
+
+	if (nf_nat_proto_remove(ct, data))
+		return 1;
+
+	if (!nat || !nat->ct)
+		return 0;
+
+	/* This netns is being destroyed, and conntrack has nat null binding.
+	 * Remove it from bysource hash, as the table will be freed soon.
+	 *
+	 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
+	 * will delete entry from already-freed table.
+	 */
+	if (!del_timer(&ct->timeout))
+		return 1;
+
+	spin_lock_bh(&nf_nat_lock);
+	hlist_del_rcu(&nat->bysource);
+	ct->status &= ~IPS_NAT_DONE_MASK;
+	nat->ct = NULL;
+	spin_unlock_bh(&nf_nat_lock);
+
+	add_timer(&ct->timeout);
+
+	/* don't delete conntrack.  Although that would make things a lot
+	 * simpler, we'd end up flushing all conntracks on nat rmmod.
+	 */
+	return 0;
+}
+
 static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
 {
 	struct nf_nat_proto_clean clean = {
@@ -749,7 +782,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 {
 	struct nf_nat_proto_clean clean = {};
 
-	nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean);
+	nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
 	synchronize_rcu();
 	nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
 }

From a4ad890a7e7fac8177d93a4345c7a239339840ed Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 5 Dec 2013 16:27:37 +0100
Subject: [PATCH 1158/1185] br: fix use of ->rx_handler_data in code executed
 on non-rx_handler path

commit 859828c0ea476b42f3a93d69d117aaba90994b6f upstream.

br_stp_rcv() is reached by non-rx_handler path. That means there is no
guarantee that dev is bridge port and therefore simple NULL check of
->rx_handler_data is not enough. There is need to check if dev is really
bridge port and since only rcu read lock is held here, do it by checking
->rx_handler pointer.

Note that synchronize_net() in netdev_rx_handler_unregister() ensures
this approach as valid.

Introduced originally by:
commit f350a0a87374418635689471606454abc7beaa3a
  "bridge: use rx_handler_data pointer to store net_bridge_port pointer"

Fixed but not in the best way by:
commit b5ed54e94d324f17c97852296d61a143f01b227a
  "bridge: fix RCU races with bridge port"

Reintroduced by:
commit 716ec052d2280d511e10e90ad54a86f5b5d4dcc2
  "bridge: fix NULL pointer deref of br_port_get_rcu"

Please apply to stable trees as well. Thanks.

RH bugzilla reference: https://bugzilla.redhat.com/show_bug.cgi?id=1025770

Reported-by: Laine Stump <laine@redhat.com>
Debugged-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Andrew Collins <bsderandrew@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/br_private.h  | 10 ++++++++++
 net/bridge/br_stp_bpdu.c |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e696833a31b5..11ab6628027a 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -429,6 +429,16 @@ extern netdev_features_t br_features_recompute(struct net_bridge *br,
 extern int br_handle_frame_finish(struct sk_buff *skb);
 extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
 
+static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
+{
+	return rcu_dereference(dev->rx_handler) == br_handle_frame;
+}
+
+static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev)
+{
+	return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL;
+}
+
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 8660ea3be705..bdb459d21ad8 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -153,7 +153,7 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
 	if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0)
 		goto err;
 
-	p = br_port_get_rcu(dev);
+	p = br_port_get_check_rcu(dev);
 	if (!p)
 		goto err;
 

From a2ad9bef40181939a8b3469a98f33775e4b0a23a Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben.dooks@codethink.co.uk>
Date: Fri, 8 Nov 2013 18:29:25 +0000
Subject: [PATCH 1159/1185] ARM: probes: fix instruction fetch order with
 <asm/opcodes.h>

commit 888be25402021a425da3e85e2d5a954d7509286e upstream.

If we are running BE8, the data and instruction endianness do not
match, so use <asm/opcodes.h> to correctly translate memory accesses
into ARM instructions.

Acked-by: Jon Medhurst <tixy@linaro.org>
Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
[taras.kondratiuk@linaro.org: fixed Thumb instruction fetch order]
Signed-off-by: Taras Kondratiuk <taras.kondratiuk@linaro.org>
[wangnan: backport to 3.10 and 3.14:
 - adjust context
 - backport all changes on arch/arm/kernel/probes.c to
   arch/arm/kernel/kprobes-common.c since we don't have
   commit c18377c303787ded44b7decd7dee694db0f205e9.
 - After the above adjustments, becomes same to Taras Kondratiuk's
   original patch:
     http://lists.linaro.org/pipermail/linaro-kernel/2014-January/010346.html
]
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/kernel/kprobes-common.c | 19 +++++++++++--------
 arch/arm/kernel/kprobes-thumb.c  | 20 ++++++++++++--------
 arch/arm/kernel/kprobes.c        |  9 +++++----
 3 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/kernel/kprobes-common.c
index 18a76282970e..380c20fb9c85 100644
--- a/arch/arm/kernel/kprobes-common.c
+++ b/arch/arm/kernel/kprobes-common.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <asm/system_info.h>
+#include <asm/opcodes.h>
 
 #include "kprobes.h"
 
@@ -305,7 +306,8 @@ kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 	if (handler) {
 		/* We can emulate the instruction in (possibly) modified form */
-		asi->insn[0] = (insn & 0xfff00000) | (rn << 16) | reglist;
+		asi->insn[0] = __opcode_to_mem_arm((insn & 0xfff00000) |
+						   (rn << 16) | reglist);
 		asi->insn_handler = handler;
 		return INSN_GOOD;
 	}
@@ -334,13 +336,14 @@ prepare_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
 #ifdef CONFIG_THUMB2_KERNEL
 	if (thumb) {
 		u16 *thumb_insn = (u16 *)asi->insn;
-		thumb_insn[1] = 0x4770; /* Thumb bx lr */
-		thumb_insn[2] = 0x4770; /* Thumb bx lr */
+		/* Thumb bx lr */
+		thumb_insn[1] = __opcode_to_mem_thumb16(0x4770);
+		thumb_insn[2] = __opcode_to_mem_thumb16(0x4770);
 		return insn;
 	}
-	asi->insn[1] = 0xe12fff1e; /* ARM bx lr */
+	asi->insn[1] = __opcode_to_mem_arm(0xe12fff1e); /* ARM bx lr */
 #else
-	asi->insn[1] = 0xe1a0f00e; /* mov pc, lr */
+	asi->insn[1] = __opcode_to_mem_arm(0xe1a0f00e); /* mov pc, lr */
 #endif
 	/* Make an ARM instruction unconditional */
 	if (insn < 0xe0000000)
@@ -360,12 +363,12 @@ set_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
 	if (thumb) {
 		u16 *ip = (u16 *)asi->insn;
 		if (is_wide_instruction(insn))
-			*ip++ = insn >> 16;
-		*ip++ = insn;
+			*ip++ = __opcode_to_mem_thumb16(insn >> 16);
+		*ip++ = __opcode_to_mem_thumb16(insn);
 		return;
 	}
 #endif
-	asi->insn[0] = insn;
+	asi->insn[0] = __opcode_to_mem_arm(insn);
 }
 
 /*
diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c
index 6123daf397a7..b82e798983c4 100644
--- a/arch/arm/kernel/kprobes-thumb.c
+++ b/arch/arm/kernel/kprobes-thumb.c
@@ -163,9 +163,9 @@ t32_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	enum kprobe_insn ret = kprobe_decode_ldmstm(insn, asi);
 
 	/* Fixup modified instruction to have halfwords in correct order...*/
-	insn = asi->insn[0];
-	((u16 *)asi->insn)[0] = insn >> 16;
-	((u16 *)asi->insn)[1] = insn & 0xffff;
+	insn = __mem_to_opcode_arm(asi->insn[0]);
+	((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(insn >> 16);
+	((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0xffff);
 
 	return ret;
 }
@@ -1153,7 +1153,7 @@ t16_decode_hiregs(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
 	insn &= ~0x00ff;
 	insn |= 0x001; /* Set Rdn = R1 and Rm = R0 */
-	((u16 *)asi->insn)[0] = insn;
+	((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(insn);
 	asi->insn_handler = t16_emulate_hiregs;
 	return INSN_GOOD;
 }
@@ -1182,8 +1182,10 @@ t16_decode_push(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	 * and call it with R9=SP and LR in the register list represented
 	 * by R8.
 	 */
-	((u16 *)asi->insn)[0] = 0xe929;		/* 1st half STMDB R9!,{} */
-	((u16 *)asi->insn)[1] = insn & 0x1ff;	/* 2nd half (register list) */
+	/* 1st half STMDB R9!,{} */
+	((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(0xe929);
+	/* 2nd half (register list) */
+	((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0x1ff);
 	asi->insn_handler = t16_emulate_push;
 	return INSN_GOOD;
 }
@@ -1232,8 +1234,10 @@ t16_decode_pop(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	 * and call it with R9=SP and PC in the register list represented
 	 * by R8.
 	 */
-	((u16 *)asi->insn)[0] = 0xe8b9;		/* 1st half LDMIA R9!,{} */
-	((u16 *)asi->insn)[1] = insn & 0x1ff;	/* 2nd half (register list) */
+	/* 1st half LDMIA R9!,{} */
+	((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(0xe8b9);
+	/* 2nd half (register list) */
+	((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0x1ff);
 	asi->insn_handler = insn & 0x100 ? t16_emulate_pop_pc
 					 : t16_emulate_pop_nopc;
 	return INSN_GOOD;
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 170e9f34003f..1c6ece51781c 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -26,6 +26,7 @@
 #include <linux/stop_machine.h>
 #include <linux/stringify.h>
 #include <asm/traps.h>
+#include <asm/opcodes.h>
 #include <asm/cacheflush.h>
 
 #include "kprobes.h"
@@ -62,10 +63,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 #ifdef CONFIG_THUMB2_KERNEL
 	thumb = true;
 	addr &= ~1; /* Bit 0 would normally be set to indicate Thumb code */
-	insn = ((u16 *)addr)[0];
+	insn = __mem_to_opcode_thumb16(((u16 *)addr)[0]);
 	if (is_wide_instruction(insn)) {
-		insn <<= 16;
-		insn |= ((u16 *)addr)[1];
+		u16 inst2 = __mem_to_opcode_thumb16(((u16 *)addr)[1]);
+		insn = __opcode_thumb32_compose(insn, inst2);
 		decode_insn = thumb32_kprobe_decode_insn;
 	} else
 		decode_insn = thumb16_kprobe_decode_insn;
@@ -73,7 +74,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	thumb = false;
 	if (addr & 0x3)
 		return -EINVAL;
-	insn = *p->addr;
+	insn = __mem_to_opcode_arm(*p->addr);
 	decode_insn = arm_kprobe_decode_insn;
 #endif
 

From 2a7978ef959430e21d93a6ab59284a11bc2c9bb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Mon, 29 Sep 2014 15:10:51 +0200
Subject: [PATCH 1160/1185] dell-wmi: Fix access out of memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit a666b6ffbc9b6705a3ced704f52c3fe9ea8bf959 upstream.

Without this patch, dell-wmi is trying to access elements of dynamically
allocated array without checking the array size. This can lead to memory
corruption or a kernel panic. This patch adds the missing checks for
array size.

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/platform/x86/dell-wmi.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index fa9a2171cc13..b264d8fe1908 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -163,18 +163,24 @@ static void dell_wmi_notify(u32 value, void *context)
 		const struct key_entry *key;
 		int reported_key;
 		u16 *buffer_entry = (u16 *)obj->buffer.pointer;
+		int buffer_size = obj->buffer.length/2;
 
-		if (dell_new_hk_type && (buffer_entry[1] != 0x10)) {
+		if (buffer_size >= 2 && dell_new_hk_type && buffer_entry[1] != 0x10) {
 			pr_info("Received unknown WMI event (0x%x)\n",
 				buffer_entry[1]);
 			kfree(obj);
 			return;
 		}
 
-		if (dell_new_hk_type || buffer_entry[1] == 0x0)
+		if (buffer_size >= 3 && (dell_new_hk_type || buffer_entry[1] == 0x0))
 			reported_key = (int)buffer_entry[2];
-		else
+		else if (buffer_size >= 2)
 			reported_key = (int)buffer_entry[1] & 0xffff;
+		else {
+			pr_info("Received unknown WMI event\n");
+			kfree(obj);
+			return;
+		}
 
 		key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev,
 							reported_key);

From 3b851c17c479cfe176c98dd1519af46d5b8e571b Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@linux-mips.org>
Date: Wed, 2 Oct 2013 15:03:03 +0900
Subject: [PATCH 1161/1185] MIPS: Fix forgotten preempt_enable() when CPU has
 inclusive pcaches

commit 5596b0b245fb9d2cefb5023b11061050351c1398 upstream.

[    1.904000] BUG: scheduling while atomic: swapper/1/0x00000002
[    1.908000] Modules linked in:
[    1.916000] CPU: 0 PID: 1 Comm: swapper Not tainted 3.12.0-rc2-lemote-los.git-5318619-dirty #1
[    1.920000] Stack : 0000000031aac000 ffffffff810d0000 0000000000000052 ffffffff802730a4
          0000000000000000 0000000000000001 ffffffff810cdf90 ffffffff810d0000
          ffffffff8068b968 ffffffff806f5537 ffffffff810cdf90 980000009f0782e8
          0000000000000001 ffffffff80720000 ffffffff806b0000 980000009f078000
          980000009f290000 ffffffff805f312c 980000009f05b5d8 ffffffff80233518
          980000009f05b5e8 ffffffff80274b7c 980000009f078000 ffffffff8068b968
          0000000000000000 0000000000000000 0000000000000000 0000000000000000
          0000000000000000 980000009f05b520 0000000000000000 ffffffff805f2f6c
          0000000000000000 ffffffff80700000 ffffffff80700000 ffffffff806fc758
          ffffffff80700000 ffffffff8020be98 ffffffff806fceb0 ffffffff805f2f6c
          ...
[    2.028000] Call Trace:
[    2.032000] [<ffffffff8020be98>] show_stack+0x80/0x98
[    2.036000] [<ffffffff805f2f6c>] __schedule_bug+0x44/0x6c
[    2.040000] [<ffffffff805fac58>] __schedule+0x518/0x5b0
[    2.044000] [<ffffffff805f8a58>] schedule_timeout+0x128/0x1f0
[    2.048000] [<ffffffff80240314>] msleep+0x3c/0x60
[    2.052000] [<ffffffff80495400>] do_probe+0x238/0x3a8
[    2.056000] [<ffffffff804958b0>] ide_probe_port+0x340/0x7e8
[    2.060000] [<ffffffff80496028>] ide_host_register+0x2d0/0x7a8
[    2.064000] [<ffffffff8049c65c>] ide_pci_init_two+0x4e4/0x790
[    2.068000] [<ffffffff8049f9b8>] amd74xx_probe+0x148/0x2c8
[    2.072000] [<ffffffff803f571c>] pci_device_probe+0xc4/0x130
[    2.076000] [<ffffffff80478f60>] driver_probe_device+0x98/0x270
[    2.080000] [<ffffffff80479298>] __driver_attach+0xe0/0xe8
[    2.084000] [<ffffffff80476ab0>] bus_for_each_dev+0x78/0xe0
[    2.088000] [<ffffffff80478468>] bus_add_driver+0x230/0x310
[    2.092000] [<ffffffff80479b44>] driver_register+0x84/0x158
[    2.096000] [<ffffffff80200504>] do_one_initcall+0x104/0x160

Signed-off-by: Yoichi Yuasa <yuasa@linux-mips.org>
Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Tested-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: linux-mips@linux-mips.org
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Patchwork: https://patchwork.linux-mips.org/patch/5941/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Alexandre Oliva <lxoliva@fsfla.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/mm/c-r4k.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 5495101d32c8..c2ec87e5d1cc 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -608,6 +608,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 			r4k_blast_scache();
 		else
 			blast_scache_range(addr, addr + size);
+		preempt_enable();
 		__sync();
 		return;
 	}
@@ -649,6 +650,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 			 */
 			blast_inv_scache_range(addr, addr + size);
 		}
+		preempt_enable();
 		__sync();
 		return;
 	}

From 858879737bb5b7b7dca1d84df9018e7eb46d7294 Mon Sep 17 00:00:00 2001
From: Pawel Moll <pawel.moll@arm.com>
Date: Fri, 13 Jun 2014 16:03:32 +0100
Subject: [PATCH 1162/1185] perf: Handle compat ioctl

commit b3f207855f57b9c8f43a547a801340bb5cbc59e5 upstream.

When running a 32-bit userspace on a 64-bit kernel (eg. i386
application on x86_64 kernel or 32-bit arm userspace on arm64
kernel) some of the perf ioctls must be treated with special
care, as they have a pointer size encoded in the command.

For example, PERF_EVENT_IOC_ID in 32-bit world will be encoded
as 0x80042407, but 64-bit kernel will expect 0x80082407. In
result the ioctl will fail returning -ENOTTY.

This patch solves the problem by adding code fixing up the
size as compat_ioctl file operation.

Reported-by: Drew Richardson <drew.richardson@arm.com>
Signed-off-by: Pawel Moll <pawel.moll@arm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1402671812-9078-1-git-send-email-pawel.moll@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: David Ahern <daahern@cisco.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/events/core.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0b4733447151..3f63ea6464ca 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -39,6 +39,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/mm_types.h>
 #include <linux/cgroup.h>
+#include <linux/compat.h>
 
 #include "internal.h"
 
@@ -3490,6 +3491,25 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return 0;
 }
 
+#ifdef CONFIG_COMPAT
+static long perf_compat_ioctl(struct file *file, unsigned int cmd,
+				unsigned long arg)
+{
+	switch (_IOC_NR(cmd)) {
+	case _IOC_NR(PERF_EVENT_IOC_SET_FILTER):
+		/* Fix up pointer size (usually 4 -> 8 in 32-on-64-bit case */
+		if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
+			cmd &= ~IOCSIZE_MASK;
+			cmd |= sizeof(void *) << IOCSIZE_SHIFT;
+		}
+		break;
+	}
+	return perf_ioctl(file, cmd, arg);
+}
+#else
+# define perf_compat_ioctl NULL
+#endif
+
 int perf_event_task_enable(void)
 {
 	struct perf_event *event;
@@ -3961,7 +3981,7 @@ static const struct file_operations perf_fops = {
 	.read			= perf_read,
 	.poll			= perf_poll,
 	.unlocked_ioctl		= perf_ioctl,
-	.compat_ioctl		= perf_ioctl,
+	.compat_ioctl		= perf_compat_ioctl,
 	.mmap			= perf_mmap,
 	.fasync			= perf_fasync,
 };

From ba8beb4ca84e22996b0f553248ea52c760afb930 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Mon, 25 Aug 2014 16:46:53 +0300
Subject: [PATCH 1163/1185] mei: bus: fix possible boundaries violation

commit cfda2794b5afe7ce64ee9605c64bef0e56a48125 upstream.

function 'strncpy' will fill whole buffer 'id.name' of fixed size (32)
with string value and will not leave place for NULL-terminator.
Possible buffer boundaries violation in following string operations.
Replace strncpy with strlcpy.

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 99cc0b07a713..0513ea0906dd 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -71,7 +71,7 @@ static int mei_cl_device_probe(struct device *dev)
 
 	dev_dbg(dev, "Device probe\n");
 
-	strncpy(id.name, dev_name(dev), MEI_CL_NAME_SIZE);
+	strlcpy(id.name, dev_name(dev), sizeof(id.name));
 
 	return driver->probe(device, &id);
 }

From f3c34e7e7a12401b080643beafbbbf249e017f24 Mon Sep 17 00:00:00 2001
From: Vince Weaver <vincent.weaver@maine.edu>
Date: Mon, 14 Jul 2014 15:33:25 -0400
Subject: [PATCH 1164/1185] perf/x86/intel: Use proper dTLB-load-misses event
 on IvyBridge

commit 1996388e9f4e3444db8273bc08d25164d2967c21 upstream.

This was discussed back in February:

	https://lkml.org/lkml/2014/2/18/956

But I never saw a patch come out of it.

On IvyBridge we share the SandyBridge cache event tables, but the
dTLB-load-miss event is not compatible.  Patch it up after
the fact to the proper DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK

Signed-off-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1407141528200.17214@vincent-weaver-1.umelst.maine.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Hou Pengyang <houpengyang@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index b45ac6affa9c..6d6bb6f4fd43 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2172,6 +2172,9 @@ __init int intel_pmu_init(void)
 	case 62: /* IvyBridge EP */
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		/* dTLB-load-misses on IVB is different than SNB */
+		hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
+
 		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
 		       sizeof(hw_cache_extra_regs));
 

From 9f6bb0c21dbe0f0604a5fd3f8717677ffccd7aed Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben.dooks@codethink.co.uk>
Date: Thu, 25 Jul 2013 14:38:03 +0100
Subject: [PATCH 1165/1185] ARM: Correct BUG() assembly to ensure it is
 endian-agnostic

commit 63328070eff2f4fd730c86966a0dbc976147c39f upstream.

Currently BUG() uses .word or .hword to create the necessary illegal
instructions. However if we are building BE8 then these get swapped
by the linker into different illegal instructions in the text. This
means that the BUG() macro does not get trapped properly.

Change to using <asm/opcodes.h> to provide the necessary ARM instruction
building as we cannot rely on gcc/gas having the `.inst` instructions
which where added to try and resolve this issue (reported by Dave Martin
<Dave.Martin@arm.com>).

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Cc: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/include/asm/bug.h | 10 ++++++----
 arch/arm/kernel/traps.c    |  8 +++++---
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h
index 7af5c6c3653a..b274bde24905 100644
--- a/arch/arm/include/asm/bug.h
+++ b/arch/arm/include/asm/bug.h
@@ -2,6 +2,8 @@
 #define _ASMARM_BUG_H
 
 #include <linux/linkage.h>
+#include <linux/types.h>
+#include <asm/opcodes.h>
 
 #ifdef CONFIG_BUG
 
@@ -12,10 +14,10 @@
  */
 #ifdef CONFIG_THUMB2_KERNEL
 #define BUG_INSTR_VALUE 0xde02
-#define BUG_INSTR_TYPE ".hword "
+#define BUG_INSTR(__value) __inst_thumb16(__value)
 #else
 #define BUG_INSTR_VALUE 0xe7f001f2
-#define BUG_INSTR_TYPE ".word "
+#define BUG_INSTR(__value) __inst_arm(__value)
 #endif
 
 
@@ -33,7 +35,7 @@
 
 #define __BUG(__file, __line, __value)				\
 do {								\
-	asm volatile("1:\t" BUG_INSTR_TYPE #__value "\n"	\
+	asm volatile("1:\t" BUG_INSTR(__value) "\n"  \
 		".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \
 		"2:\t.asciz " #__file "\n" 			\
 		".popsection\n" 				\
@@ -48,7 +50,7 @@ do {								\
 
 #define __BUG(__file, __line, __value)				\
 do {								\
-	asm volatile(BUG_INSTR_TYPE #__value);			\
+	asm volatile(BUG_INSTR(__value) "\n");			\
 	unreachable();						\
 } while (0)
 #endif  /* CONFIG_DEBUG_BUGVERBOSE */
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index d6a0fdb6c2ee..a2a2804b1bc2 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -347,15 +347,17 @@ void arm_notify_die(const char *str, struct pt_regs *regs,
 int is_valid_bugaddr(unsigned long pc)
 {
 #ifdef CONFIG_THUMB2_KERNEL
-	unsigned short bkpt;
+	u16 bkpt;
+	u16 insn = __opcode_to_mem_thumb16(BUG_INSTR_VALUE);
 #else
-	unsigned long bkpt;
+	u32 bkpt;
+	u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE);
 #endif
 
 	if (probe_kernel_address((unsigned *)pc, bkpt))
 		return 0;
 
-	return bkpt == BUG_INSTR_VALUE;
+	return bkpt == insn;
 }
 
 #endif

From 50e0289d813aceddedf962ea92299b68ac264671 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 25 Jul 2013 19:21:23 +0300
Subject: [PATCH 1166/1185] net/mlx4_en: Fix BlueFlame race

commit 2d4b646613d6b12175b017aca18113945af1faf3 upstream.

Fix a race between BlueFlame flow and stamping in post send flow.
Example:
	SW: Build WQE 0 on the TX buffer, except the ownership bit
	SW: Set ownership for WQE 0 on the TX buffer
	SW: Ring doorbell for WQE 0
	SW: Build WQE 1 on the TX buffer, except the ownership bit
	SW: Set ownership for WQE 1 on the TX buffer
	HW: Read WQE 0 and then WQE 1, before doorbell was rung/BF was done for WQE 1
	HW: Produce CQEs for WQE 0 and WQE 1
	SW: Process the CQEs, and stamp WQE 0 and WQE 1 accordingly (on the TX buffer)
	SW: Copy WQE 1 from the TX buffer to the BF register - ALREADY STAMPED!
	HW: CQE error with index 0xFFFF  - the BF WQE's control segment is STAMPED,
		so the BF index is 0xFFFF. Error: Invalid Opcode.
As a result QP enters the error state and no traffic can be sent.

Solution:
When stamping - do not stamp last completed wqe.

Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Vinson Lee <vlee@twopensource.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c | 61 +++++++++++++++-------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 4e6877a032a8..bd8800c85525 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -191,6 +191,39 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
 		       MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
 }
 
+static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
+			      struct mlx4_en_tx_ring *ring, int index,
+			      u8 owner)
+{
+	__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
+	struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
+	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
+	void *end = ring->buf + ring->buf_size;
+	__be32 *ptr = (__be32 *)tx_desc;
+	int i;
+
+	/* Optimize the common case when there are no wraparounds */
+	if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
+		/* Stamp the freed descriptor */
+		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE;
+		     i += STAMP_STRIDE) {
+			*ptr = stamp;
+			ptr += STAMP_DWORDS;
+		}
+	} else {
+		/* Stamp the freed descriptor */
+		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE;
+		     i += STAMP_STRIDE) {
+			*ptr = stamp;
+			ptr += STAMP_DWORDS;
+			if ((void *)ptr >= end) {
+				ptr = ring->buf;
+				stamp ^= cpu_to_be32(0x80000000);
+			}
+		}
+	}
+}
+
 
 static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 				struct mlx4_en_tx_ring *ring,
@@ -205,8 +238,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 	void *end = ring->buf + ring->buf_size;
 	int frags = skb_shinfo(skb)->nr_frags;
 	int i;
-	__be32 *ptr = (__be32 *)tx_desc;
-	__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
 	struct skb_shared_hwtstamps hwts;
 
 	if (timestamp) {
@@ -232,12 +263,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 					skb_frag_size(frag), PCI_DMA_TODEVICE);
 			}
 		}
-		/* Stamp the freed descriptor */
-		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
-			*ptr = stamp;
-			ptr += STAMP_DWORDS;
-		}
-
 	} else {
 		if (!tx_info->inl) {
 			if ((void *) data >= end) {
@@ -263,16 +288,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 				++data;
 			}
 		}
-		/* Stamp the freed descriptor */
-		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
-			*ptr = stamp;
-			ptr += STAMP_DWORDS;
-			if ((void *) ptr >= end) {
-				ptr = ring->buf;
-				stamp ^= cpu_to_be32(0x80000000);
-			}
-		}
-
 	}
 	dev_kfree_skb_any(skb);
 	return tx_info->nr_txbb;
@@ -318,8 +333,9 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
 	struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
 	struct mlx4_cqe *cqe;
 	u16 index;
-	u16 new_index, ring_index;
+	u16 new_index, ring_index, stamp_index;
 	u32 txbbs_skipped = 0;
+	u32 txbbs_stamp = 0;
 	u32 cons_index = mcq->cons_index;
 	int size = cq->size;
 	u32 size_mask = ring->size_mask;
@@ -335,6 +351,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
 	index = cons_index & size_mask;
 	cqe = &buf[(index << factor) + factor];
 	ring_index = ring->cons & size_mask;
+	stamp_index = ring_index;
 
 	/* Process all completed CQEs */
 	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
@@ -359,6 +376,12 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
 					priv, ring, ring_index,
 					!!((ring->cons + txbbs_skipped) &
 					ring->size), timestamp);
+
+			mlx4_en_stamp_wqe(priv, ring, stamp_index,
+					  !!((ring->cons + txbbs_stamp) &
+						ring->size));
+			stamp_index = ring_index;
+			txbbs_stamp = txbbs_skipped;
 			packets++;
 			bytes += ring->tx_info[ring_index].nr_bytes;
 		} while (ring_index != new_index);

From 2e4ce498811f1aa2e9f2e600d442ad8da0ab6534 Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Thu, 1 Aug 2013 15:14:00 +0200
Subject: [PATCH 1167/1185] SCSI: hpsa: fix a race in cmd_free/scsi_done

commit 2cc5bfaf854463d9d1aa52091f60110fbf102a96 upstream.

When the driver calls scsi_done and after that frees it's internal
preallocated memory it can happen that a new job is enqueud before
the memory is freed. The allocation fails and the message
"cmd_alloc returned NULL" is shown.
Patch below fixes it by moving cmd->scsi_done after cmd_free.

Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Acked-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Cc: Masoud Sharbiani <msharbiani@twitter.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/hpsa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 287667c20c6a..62ed744bbe06 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -1206,8 +1206,8 @@ static void complete_scsi_command(struct CommandList *cp)
 	scsi_set_resid(cmd, ei->ResidualCnt);
 
 	if (ei->CommandStatus == 0) {
-		cmd->scsi_done(cmd);
 		cmd_free(h, cp);
+		cmd->scsi_done(cmd);
 		return;
 	}
 
@@ -1380,8 +1380,8 @@ static void complete_scsi_command(struct CommandList *cp)
 		dev_warn(&h->pdev->dev, "cp %p returned unknown status %x\n",
 				cp, ei->CommandStatus);
 	}
-	cmd->scsi_done(cmd);
 	cmd_free(h, cp);
+	cmd->scsi_done(cmd);
 }
 
 static void hpsa_pci_unmap(struct pci_dev *pdev,

From c75f394964bc21d0b890bd62ead90ff51b3e1e72 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Wed, 17 Sep 2014 02:50:50 +0300
Subject: [PATCH 1168/1185] KVM: x86: Don't report guest userspace emulation
 error to userspace

commit a2b9e6c1a35afcc0973acb72e591c714e78885ff upstream.

Commit fc3a9157d314 ("KVM: X86: Don't report L2 emulation failures to
user-space") disabled the reporting of L2 (nested guest) emulation failures to
userspace due to race-condition between a vmexit and the instruction emulator.
The same rational applies also to userspace applications that are permitted by
the guest OS to access MMIO area or perform PIO.

This patch extends the current behavior - of injecting a #UD instead of
reporting it to userspace - also for guest userspace code.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 684f46dc87de..adfc30d9f9f4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4834,7 +4834,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 
 	++vcpu->stat.insn_emulation_fail;
 	trace_kvm_emulate_insn_failed(vcpu);
-	if (!is_guest_mode(vcpu)) {
+	if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
 		vcpu->run->internal.ndata = 0;

From bf53932bce5c58cf006ca2e1f81bd1d66d14ba45 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 9 Oct 2014 22:55:33 +0200
Subject: [PATCH 1169/1185] net: sctp: fix remote memory pressure from
 excessive queueing

commit 26b87c7881006311828bb0ab271a551a62dcceb4 upstream.

This scenario is not limited to ASCONF, just taken as one
example triggering the issue. When receiving ASCONF probes
in the form of ...

  -------------- INIT[ASCONF; ASCONF_ACK] ------------->
  <----------- INIT-ACK[ASCONF; ASCONF_ACK] ------------
  -------------------- COOKIE-ECHO -------------------->
  <-------------------- COOKIE-ACK ---------------------
  ---- ASCONF_a; [ASCONF_b; ...; ASCONF_n;] JUNK ------>
  [...]
  ---- ASCONF_m; [ASCONF_o; ...; ASCONF_z;] JUNK ------>

... where ASCONF_a, ASCONF_b, ..., ASCONF_z are good-formed
ASCONFs and have increasing serial numbers, we process such
ASCONF chunk(s) marked with !end_of_packet and !singleton,
since we have not yet reached the SCTP packet end. SCTP does
only do verification on a chunk by chunk basis, as an SCTP
packet is nothing more than just a container of a stream of
chunks which it eats up one by one.

We could run into the case that we receive a packet with a
malformed tail, above marked as trailing JUNK. All previous
chunks are here goodformed, so the stack will eat up all
previous chunks up to this point. In case JUNK does not fit
into a chunk header and there are no more other chunks in
the input queue, or in case JUNK contains a garbage chunk
header, but the encoded chunk length would exceed the skb
tail, or we came here from an entirely different scenario
and the chunk has pdiscard=1 mark (without having had a flush
point), it will happen, that we will excessively queue up
the association's output queue (a correct final chunk may
then turn it into a response flood when flushing the
queue ;)): I ran a simple script with incremental ASCONF
serial numbers and could see the server side consuming
excessive amount of RAM [before/after: up to 2GB and more].

The issue at heart is that the chunk train basically ends
with !end_of_packet and !singleton markers and since commit
2e3216cd54b1 ("sctp: Follow security requirement of responding
with 1 packet") therefore preventing an output queue flush
point in sctp_do_sm() -> sctp_cmd_interpreter() on the input
chunk (chunk = event_arg) even though local_cork is set,
but its precedence has changed since then. In the normal
case, the last chunk with end_of_packet=1 would trigger the
queue flush to accommodate possible outgoing bundling.

In the input queue, sctp_inq_pop() seems to do the right thing
in terms of discarding invalid chunks. So, above JUNK will
not enter the state machine and instead be released and exit
the sctp_assoc_bh_rcv() chunk processing loop. It's simply
the flush point being missing at loop exit. Adding a try-flush
approach on the output queue might not work as the underlying
infrastructure might be long gone at this point due to the
side-effect interpreter run.

One possibility, albeit a bit of a kludge, would be to defer
invalid chunk freeing into the state machine in order to
possibly trigger packet discards and thus indirectly a queue
flush on error. It would surely be better to discard chunks
as in the current, perhaps better controlled environment, but
going back and forth, it's simply architecturally not possible.
I tried various trailing JUNK attack cases and it seems to
look good now.

Joint work with Vlad Yasevich.

Fixes: 2e3216cd54b1 ("sctp: Follow security requirement of responding with 1 packet")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/inqueue.c      | 33 +++++++--------------------------
 net/sctp/sm_statefuns.c |  3 +++
 2 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 3221d073448c..49c58eadbfa2 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -147,18 +147,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 		} else {
 			/* Nothing to do. Next chunk in the packet, please. */
 			ch = (sctp_chunkhdr_t *) chunk->chunk_end;
-
 			/* Force chunk->skb->data to chunk->chunk_end.  */
-			skb_pull(chunk->skb,
-				 chunk->chunk_end - chunk->skb->data);
-
-			/* Verify that we have at least chunk headers
-			 * worth of buffer left.
-			 */
-			if (skb_headlen(chunk->skb) < sizeof(sctp_chunkhdr_t)) {
-				sctp_chunk_free(chunk);
-				chunk = queue->in_progress = NULL;
-			}
+			skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data);
+			/* We are guaranteed to pull a SCTP header. */
 		}
 	}
 
@@ -194,24 +185,14 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 	skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
 	chunk->subh.v = NULL; /* Subheader is no longer valid.  */
 
-	if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) {
+	if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) <
+	    skb_tail_pointer(chunk->skb)) {
 		/* This is not a singleton */
 		chunk->singleton = 0;
 	} else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
-		/* RFC 2960, Section 6.10  Bundling
-		 *
-		 * Partial chunks MUST NOT be placed in an SCTP packet.
-		 * If the receiver detects a partial chunk, it MUST drop
-		 * the chunk.
-		 *
-		 * Since the end of the chunk is past the end of our buffer
-		 * (which contains the whole packet, we can freely discard
-		 * the whole packet.
-		 */
-		sctp_chunk_free(chunk);
-		chunk = queue->in_progress = NULL;
-
-		return NULL;
+		/* Discard inside state machine. */
+		chunk->pdiscard = 1;
+		chunk->chunk_end = skb_tail_pointer(chunk->skb);
 	} else {
 		/* We are at the end of the packet, so mark the chunk
 		 * in case we need to send a SACK.
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index edc204b05c82..5e32dd580270 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -177,6 +177,9 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk,
 {
 	__u16 chunk_length = ntohs(chunk->chunk_hdr->length);
 
+	/* Previously already marked? */
+	if (unlikely(chunk->pdiscard))
+		return 0;
 	if (unlikely(chunk_length < required_length))
 		return 0;
 

From 3329125539de90e5fa6ab83009f5f82ef73a3259 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 9 Oct 2014 22:55:32 +0200
Subject: [PATCH 1170/1185] net: sctp: fix panic on duplicate ASCONF chunks

commit b69040d8e39f20d5215a03502a8e8b4c6ab78395 upstream.

When receiving a e.g. semi-good formed connection scan in the
form of ...

  -------------- INIT[ASCONF; ASCONF_ACK] ------------->
  <----------- INIT-ACK[ASCONF; ASCONF_ACK] ------------
  -------------------- COOKIE-ECHO -------------------->
  <-------------------- COOKIE-ACK ---------------------
  ---------------- ASCONF_a; ASCONF_b ----------------->

... where ASCONF_a equals ASCONF_b chunk (at least both serials
need to be equal), we panic an SCTP server!

The problem is that good-formed ASCONF chunks that we reply with
ASCONF_ACK chunks are cached per serial. Thus, when we receive a
same ASCONF chunk twice (e.g. through a lost ASCONF_ACK), we do
not need to process them again on the server side (that was the
idea, also proposed in the RFC). Instead, we know it was cached
and we just resend the cached chunk instead. So far, so good.

Where things get nasty is in SCTP's side effect interpreter, that
is, sctp_cmd_interpreter():

While incoming ASCONF_a (chunk = event_arg) is being marked
!end_of_packet and !singleton, and we have an association context,
we do not flush the outqueue the first time after processing the
ASCONF_ACK singleton chunk via SCTP_CMD_REPLY. Instead, we keep it
queued up, although we set local_cork to 1. Commit 2e3216cd54b1
changed the precedence, so that as long as we get bundled, incoming
chunks we try possible bundling on outgoing queue as well. Before
this commit, we would just flush the output queue.

Now, while ASCONF_a's ASCONF_ACK sits in the corked outq, we
continue to process the same ASCONF_b chunk from the packet. As
we have cached the previous ASCONF_ACK, we find it, grab it and
do another SCTP_CMD_REPLY command on it. So, effectively, we rip
the chunk->list pointers and requeue the same ASCONF_ACK chunk
another time. Since we process ASCONF_b, it's correctly marked
with end_of_packet and we enforce an uncork, and thus flush, thus
crashing the kernel.

Fix it by testing if the ASCONF_ACK is currently pending and if
that is the case, do not requeue it. When flushing the output
queue we may relink the chunk for preparing an outgoing packet,
but eventually unlink it when it's copied into the skb right
before transmission.

Joint work with Vlad Yasevich.

Fixes: 2e3216cd54b1 ("sctp: Follow security requirement of responding with 1 packet")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sctp/sctp.h | 5 +++++
 net/sctp/associola.c    | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index cd89510eab2a..845ab6decc45 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -540,6 +540,11 @@ static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_associat
 	asoc->pmtu_pending = 0;
 }
 
+static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk)
+{
+	return !list_empty(&chunk->list);
+}
+
 /* Walk through a list of TLV parameters.  Don't trust the
  * individual parameter lengths and instead depend on
  * the chunk length to indicate when to stop.  Make sure
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 62e86d98bc36..ca4a1a1b8e69 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1659,6 +1659,8 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack(
 	 * ack chunk whose serial number matches that of the request.
 	 */
 	list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) {
+		if (sctp_chunk_pending(ack))
+			continue;
 		if (ack->subh.addip_hdr->serial == serial) {
 			sctp_chunk_hold(ack);
 			return ack;

From cda702df4736ab981f81ea4b529d14a2858fdc36 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 9 Oct 2014 22:55:31 +0200
Subject: [PATCH 1171/1185] net: sctp: fix skb_over_panic when receiving
 malformed ASCONF chunks

commit 9de7922bc709eee2f609cd01d98aaedc4cf5ea74 upstream.

Commit 6f4c618ddb0 ("SCTP : Add paramters validity check for
ASCONF chunk") added basic verification of ASCONF chunks, however,
it is still possible to remotely crash a server by sending a
special crafted ASCONF chunk, even up to pre 2.6.12 kernels:

skb_over_panic: text:ffffffffa01ea1c3 len:31056 put:30768
 head:ffff88011bd81800 data:ffff88011bd81800 tail:0x7950
 end:0x440 dev:<NULL>
 ------------[ cut here ]------------
kernel BUG at net/core/skbuff.c:129!
[...]
Call Trace:
 <IRQ>
 [<ffffffff8144fb1c>] skb_put+0x5c/0x70
 [<ffffffffa01ea1c3>] sctp_addto_chunk+0x63/0xd0 [sctp]
 [<ffffffffa01eadaf>] sctp_process_asconf+0x1af/0x540 [sctp]
 [<ffffffff8152d025>] ? _read_unlock_bh+0x15/0x20
 [<ffffffffa01e0038>] sctp_sf_do_asconf+0x168/0x240 [sctp]
 [<ffffffffa01e3751>] sctp_do_sm+0x71/0x1210 [sctp]
 [<ffffffff8147645d>] ? fib_rules_lookup+0xad/0xf0
 [<ffffffffa01e6b22>] ? sctp_cmp_addr_exact+0x32/0x40 [sctp]
 [<ffffffffa01e8393>] sctp_assoc_bh_rcv+0xd3/0x180 [sctp]
 [<ffffffffa01ee986>] sctp_inq_push+0x56/0x80 [sctp]
 [<ffffffffa01fcc42>] sctp_rcv+0x982/0xa10 [sctp]
 [<ffffffffa01d5123>] ? ipt_local_in_hook+0x23/0x28 [iptable_filter]
 [<ffffffff8148bdc9>] ? nf_iterate+0x69/0xb0
 [<ffffffff81496d10>] ? ip_local_deliver_finish+0x0/0x2d0
 [<ffffffff8148bf86>] ? nf_hook_slow+0x76/0x120
 [<ffffffff81496d10>] ? ip_local_deliver_finish+0x0/0x2d0
 [<ffffffff81496ded>] ip_local_deliver_finish+0xdd/0x2d0
 [<ffffffff81497078>] ip_local_deliver+0x98/0xa0
 [<ffffffff8149653d>] ip_rcv_finish+0x12d/0x440
 [<ffffffff81496ac5>] ip_rcv+0x275/0x350
 [<ffffffff8145c88b>] __netif_receive_skb+0x4ab/0x750
 [<ffffffff81460588>] netif_receive_skb+0x58/0x60

This can be triggered e.g., through a simple scripted nmap
connection scan injecting the chunk after the handshake, for
example, ...

  -------------- INIT[ASCONF; ASCONF_ACK] ------------->
  <----------- INIT-ACK[ASCONF; ASCONF_ACK] ------------
  -------------------- COOKIE-ECHO -------------------->
  <-------------------- COOKIE-ACK ---------------------
  ------------------ ASCONF; UNKNOWN ------------------>

... where ASCONF chunk of length 280 contains 2 parameters ...

  1) Add IP address parameter (param length: 16)
  2) Add/del IP address parameter (param length: 255)

... followed by an UNKNOWN chunk of e.g. 4 bytes. Here, the
Address Parameter in the ASCONF chunk is even missing, too.
This is just an example and similarly-crafted ASCONF chunks
could be used just as well.

The ASCONF chunk passes through sctp_verify_asconf() as all
parameters passed sanity checks, and after walking, we ended
up successfully at the chunk end boundary, and thus may invoke
sctp_process_asconf(). Parameter walking is done with
WORD_ROUND() to take padding into account.

In sctp_process_asconf()'s TLV processing, we may fail in
sctp_process_asconf_param() e.g., due to removal of the IP
address that is also the source address of the packet containing
the ASCONF chunk, and thus we need to add all TLVs after the
failure to our ASCONF response to remote via helper function
sctp_add_asconf_response(), which basically invokes a
sctp_addto_chunk() adding the error parameters to the given
skb.

When walking to the next parameter this time, we proceed
with ...

  length = ntohs(asconf_param->param_hdr.length);
  asconf_param = (void *)asconf_param + length;

... instead of the WORD_ROUND()'ed length, thus resulting here
in an off-by-one that leads to reading the follow-up garbage
parameter length of 12336, and thus throwing an skb_over_panic
for the reply when trying to sctp_addto_chunk() next time,
which implicitly calls the skb_put() with that length.

Fix it by using sctp_walk_params() [ which is also used in
INIT parameter processing ] macro in the verification *and*
in ASCONF processing: it will make sure we don't spill over,
that we walk parameters WORD_ROUND()'ed. Moreover, we're being
more defensive and guard against unknown parameter types and
missized addresses.

Joint work with Vlad Yasevich.

Fixes: b896b82be4ae ("[SCTP] ADDIP: Support for processing incoming ASCONF_ACK chunks.")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sctp/sm.h    |  6 +--
 net/sctp/sm_make_chunk.c | 99 ++++++++++++++++++++++------------------
 net/sctp/sm_statefuns.c  | 18 +-------
 3 files changed, 60 insertions(+), 63 deletions(-)

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 2a82d1384706..c4c9458f37cd 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -255,9 +255,9 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *,
 					      int, __be16);
 struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 					     union sctp_addr *addr);
-int sctp_verify_asconf(const struct sctp_association *asoc,
-		       struct sctp_paramhdr *param_hdr, void *chunk_end,
-		       struct sctp_paramhdr **errp);
+bool sctp_verify_asconf(const struct sctp_association *asoc,
+			struct sctp_chunk *chunk, bool addr_param_needed,
+			struct sctp_paramhdr **errp);
 struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 				       struct sctp_chunk *asconf);
 int sctp_process_asconf_ack(struct sctp_association *asoc,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 6ca48b16f6bf..29fc16f3633f 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3097,50 +3097,63 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 	return SCTP_ERROR_NO_ERROR;
 }
 
-/* Verify the ASCONF packet before we process it.  */
-int sctp_verify_asconf(const struct sctp_association *asoc,
-		       struct sctp_paramhdr *param_hdr, void *chunk_end,
-		       struct sctp_paramhdr **errp) {
-	sctp_addip_param_t *asconf_param;
+/* Verify the ASCONF packet before we process it. */
+bool sctp_verify_asconf(const struct sctp_association *asoc,
+			struct sctp_chunk *chunk, bool addr_param_needed,
+			struct sctp_paramhdr **errp)
+{
+	sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr;
 	union sctp_params param;
-	int length, plen;
+	bool addr_param_seen = false;
+
+	sctp_walk_params(param, addip, addip_hdr.params) {
+		size_t length = ntohs(param.p->length);
 
-	param.v = (sctp_paramhdr_t *) param_hdr;
-	while (param.v <= chunk_end - sizeof(sctp_paramhdr_t)) {
-		length = ntohs(param.p->length);
 		*errp = param.p;
-
-		if (param.v > chunk_end - length ||
-		    length < sizeof(sctp_paramhdr_t))
-			return 0;
-
 		switch (param.p->type) {
+		case SCTP_PARAM_ERR_CAUSE:
+			break;
+		case SCTP_PARAM_IPV4_ADDRESS:
+			if (length != sizeof(sctp_ipv4addr_param_t))
+				return false;
+			addr_param_seen = true;
+			break;
+		case SCTP_PARAM_IPV6_ADDRESS:
+			if (length != sizeof(sctp_ipv6addr_param_t))
+				return false;
+			addr_param_seen = true;
+			break;
 		case SCTP_PARAM_ADD_IP:
 		case SCTP_PARAM_DEL_IP:
 		case SCTP_PARAM_SET_PRIMARY:
-			asconf_param = (sctp_addip_param_t *)param.v;
-			plen = ntohs(asconf_param->param_hdr.length);
-			if (plen < sizeof(sctp_addip_param_t) +
-			    sizeof(sctp_paramhdr_t))
-				return 0;
+			/* In ASCONF chunks, these need to be first. */
+			if (addr_param_needed && !addr_param_seen)
+				return false;
+			length = ntohs(param.addip->param_hdr.length);
+			if (length < sizeof(sctp_addip_param_t) +
+				     sizeof(sctp_paramhdr_t))
+				return false;
 			break;
 		case SCTP_PARAM_SUCCESS_REPORT:
 		case SCTP_PARAM_ADAPTATION_LAYER_IND:
 			if (length != sizeof(sctp_addip_param_t))
-				return 0;
-
+				return false;
 			break;
 		default:
-			break;
+			/* This is unkown to us, reject! */
+			return false;
 		}
-
-		param.v += WORD_ROUND(length);
 	}
 
-	if (param.v != chunk_end)
-		return 0;
+	/* Remaining sanity checks. */
+	if (addr_param_needed && !addr_param_seen)
+		return false;
+	if (!addr_param_needed && addr_param_seen)
+		return false;
+	if (param.v != chunk->chunk_end)
+		return false;
 
-	return 1;
+	return true;
 }
 
 /* Process an incoming ASCONF chunk with the next expected serial no. and
@@ -3149,16 +3162,17 @@ int sctp_verify_asconf(const struct sctp_association *asoc,
 struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 				       struct sctp_chunk *asconf)
 {
+	sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr;
+	bool all_param_pass = true;
+	union sctp_params param;
 	sctp_addiphdr_t		*hdr;
 	union sctp_addr_param	*addr_param;
 	sctp_addip_param_t	*asconf_param;
 	struct sctp_chunk	*asconf_ack;
-
 	__be16	err_code;
 	int	length = 0;
 	int	chunk_len;
 	__u32	serial;
-	int	all_param_pass = 1;
 
 	chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
 	hdr = (sctp_addiphdr_t *)asconf->skb->data;
@@ -3186,9 +3200,14 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 		goto done;
 
 	/* Process the TLVs contained within the ASCONF chunk. */
-	while (chunk_len > 0) {
+	sctp_walk_params(param, addip, addip_hdr.params) {
+		/* Skip preceeding address parameters. */
+		if (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
+		    param.p->type == SCTP_PARAM_IPV6_ADDRESS)
+			continue;
+
 		err_code = sctp_process_asconf_param(asoc, asconf,
-						     asconf_param);
+						     param.addip);
 		/* ADDIP 4.1 A7)
 		 * If an error response is received for a TLV parameter,
 		 * all TLVs with no response before the failed TLV are
@@ -3196,28 +3215,20 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 		 * the failed response are considered unsuccessful unless
 		 * a specific success indication is present for the parameter.
 		 */
-		if (SCTP_ERROR_NO_ERROR != err_code)
-			all_param_pass = 0;
-
+		if (err_code != SCTP_ERROR_NO_ERROR)
+			all_param_pass = false;
 		if (!all_param_pass)
-			sctp_add_asconf_response(asconf_ack,
-						 asconf_param->crr_id, err_code,
-						 asconf_param);
+			sctp_add_asconf_response(asconf_ack, param.addip->crr_id,
+						 err_code, param.addip);
 
 		/* ADDIP 4.3 D11) When an endpoint receiving an ASCONF to add
 		 * an IP address sends an 'Out of Resource' in its response, it
 		 * MUST also fail any subsequent add or delete requests bundled
 		 * in the ASCONF.
 		 */
-		if (SCTP_ERROR_RSRC_LOW == err_code)
+		if (err_code == SCTP_ERROR_RSRC_LOW)
 			goto done;
-
-		/* Move to the next ASCONF param. */
-		length = ntohs(asconf_param->param_hdr.length);
-		asconf_param = (void *)asconf_param + length;
-		chunk_len -= length;
 	}
-
 done:
 	asoc->peer.addip_serial++;
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5e32dd580270..c52763a26297 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3596,9 +3596,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 	struct sctp_chunk	*asconf_ack = NULL;
 	struct sctp_paramhdr	*err_param = NULL;
 	sctp_addiphdr_t		*hdr;
-	union sctp_addr_param	*addr_param;
 	__u32			serial;
-	int			length;
 
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3623,17 +3621,8 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 	hdr = (sctp_addiphdr_t *)chunk->skb->data;
 	serial = ntohl(hdr->serial);
 
-	addr_param = (union sctp_addr_param *)hdr->params;
-	length = ntohs(addr_param->p.length);
-	if (length < sizeof(sctp_paramhdr_t))
-		return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
-			   (void *)addr_param, commands);
-
 	/* Verify the ASCONF chunk before processing it. */
-	if (!sctp_verify_asconf(asoc,
-			    (sctp_paramhdr_t *)((void *)addr_param + length),
-			    (void *)chunk->chunk_end,
-			    &err_param))
+	if (!sctp_verify_asconf(asoc, chunk, true, &err_param))
 		return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
 						  (void *)err_param, commands);
 
@@ -3751,10 +3740,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 	rcvd_serial = ntohl(addip_hdr->serial);
 
 	/* Verify the ASCONF-ACK chunk before processing it. */
-	if (!sctp_verify_asconf(asoc,
-	    (sctp_paramhdr_t *)addip_hdr->params,
-	    (void *)asconf_ack->chunk_end,
-	    &err_param))
+	if (!sctp_verify_asconf(asoc, asconf_ack, false, &err_param))
 		return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
 			   (void *)err_param, commands);
 

From b13a714fb4e374d9e23185d6f47e86109909cfe8 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 8 Jul 2013 15:59:50 -0700
Subject: [PATCH 1172/1185] mm: invoke oom-killer from remaining unconverted
 page fault handlers

commit 609838cfed972d49a65aac7923a9ff5cbe482e30 upstream.

A few remaining architectures directly kill the page faulting task in an
out of memory situation.  This is usually not a good idea since that
task might not even use a significant amount of memory and so may not be
the optimal victim to resolve the situation.

Since 2.6.29's 1c0fe6e ("mm: invoke oom-killer from page fault") there
is a hook that architecture page fault handlers are supposed to call to
invoke the OOM killer and let it pick the right task to kill.  Convert
the remaining architectures over to this hook.

To have the previous behavior of simply taking out the faulting task the
vm.oom_kill_allocating_task sysctl can be set to 1.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>   [arch/arc bits]
Cc: James Hogan <james.hogan@imgtec.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/mm/fault.c      | 6 ++++--
 arch/metag/mm/fault.c    | 6 ++++--
 arch/mn10300/mm/fault.c  | 7 ++++---
 arch/openrisc/mm/fault.c | 8 ++++----
 arch/score/mm/fault.c    | 8 ++++----
 arch/tile/mm/fault.c     | 8 ++++----
 6 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 331a0846628e..0902d399b6c9 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -206,8 +206,10 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address,
 	}
 	up_read(&mm->mmap_sem);
 
-	if (user_mode(regs))
-		do_group_exit(SIGKILL);	/* This will never return */
+	if (user_mode(regs)) {
+		pagefault_out_of_memory();
+		return;
+	}
 
 	goto no_context;
 
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
index 2c75bf7357c5..8fddf46e6c62 100644
--- a/arch/metag/mm/fault.c
+++ b/arch/metag/mm/fault.c
@@ -224,8 +224,10 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 	 */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (user_mode(regs))
-		do_group_exit(SIGKILL);
+	if (user_mode(regs)) {
+		pagefault_out_of_memory();
+		return 1;
+	}
 
 no_context:
 	/* Are we prepared to handle this kernel fault?  */
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
index d48a84fd7fae..8a2e6ded9a44 100644
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -345,9 +345,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	printk(KERN_ALERT "VM: killing process %s\n", tsk->comm);
-	if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
-		do_exit(SIGKILL);
+	if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) {
+		pagefault_out_of_memory();
+		return;
+	}
 	goto no_context;
 
 do_sigbus:
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index e2bfafce66c5..4a41f8493ab0 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -267,10 +267,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
 	__asm__ __volatile__("l.nop 1");
 
 	up_read(&mm->mmap_sem);
-	printk("VM: killing process %s\n", tsk->comm);
-	if (user_mode(regs))
-		do_exit(SIGKILL);
-	goto no_context;
+	if (!user_mode(regs))
+		goto no_context;
+	pagefault_out_of_memory();
+	return;
 
 do_sigbus:
 	up_read(&mm->mmap_sem);
diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c
index 47b600e4b2c5..6b18fb0189ae 100644
--- a/arch/score/mm/fault.c
+++ b/arch/score/mm/fault.c
@@ -172,10 +172,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (user_mode(regs))
-		do_group_exit(SIGKILL);
-	goto no_context;
+	if (!user_mode(regs))
+		goto no_context;
+	pagefault_out_of_memory();
+	return;
 
 do_sigbus:
 	up_read(&mm->mmap_sem);
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 3d2b81c163a6..f7f99f90cbe0 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -573,10 +573,10 @@ static int handle_page_fault(struct pt_regs *regs,
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
-	pr_alert("VM: killing process %s\n", tsk->comm);
-	if (!is_kernel_mode)
-		do_group_exit(SIGKILL);
-	goto no_context;
+	if (is_kernel_mode)
+		goto no_context;
+	pagefault_out_of_memory();
+	return 0;
 
 do_sigbus:
 	up_read(&mm->mmap_sem);

From 20c92c01bfe0d5046f16853c07e2a24ea463cdf2 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 12 Sep 2013 15:13:36 -0700
Subject: [PATCH 1173/1185] arch: mm: remove obsolete init OOM protection

commit 94bce453c78996cc4373d5da6cfabe07fcc6d9f9 upstream.

The memcg code can trap tasks in the context of the failing allocation
until an OOM situation is resolved.  They can hold all kinds of locks
(fs, mm) at this point, which makes it prone to deadlocking.

This series converts memcg OOM handling into a two step process that is
started in the charge context, but any waiting is done after the fault
stack is fully unwound.

Patches 1-4 prepare architecture handlers to support the new memcg
requirements, but in doing so they also remove old cruft and unify
out-of-memory behavior across architectures.

Patch 5 disables the memcg OOM handling for syscalls, readahead, kernel
faults, because they can gracefully unwind the stack with -ENOMEM.  OOM
handling is restricted to user triggered faults that have no other
option.

Patch 6 reworks memcg's hierarchical OOM locking to make it a little
more obvious wth is going on in there: reduce locked regions, rename
locking functions, reorder and document.

Patch 7 implements the two-part OOM handling such that tasks are never
trapped with the full charge stack in an OOM situation.

This patch:

Back before smart OOM killing, when faulting tasks were killed directly on
allocation failures, the arch-specific fault handlers needed special
protection for the init process.

Now that all fault handlers call into the generic OOM killer (see commit
609838cfed97: "mm: invoke oom-killer from remaining unconverted page
fault handlers"), which already provides init protection, the
arch-specific leftovers can be removed.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Acked-by: Vineet Gupta <vgupta@synopsys.com>	[arch/arc bits]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/mm/fault.c   | 5 -----
 arch/score/mm/fault.c | 6 ------
 arch/tile/mm/fault.c  | 6 ------
 3 files changed, 17 deletions(-)

diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 0902d399b6c9..87f4281a4b64 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -120,7 +120,6 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address,
 			goto bad_area;
 	}
 
-survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -200,10 +199,6 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address,
 	die("Oops", regs, address, cause_code);
 
 out_of_memory:
-	if (is_global_init(tsk)) {
-		yield();
-		goto survive;
-	}
 	up_read(&mm->mmap_sem);
 
 	if (user_mode(regs)) {
diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c
index 6b18fb0189ae..4b71a626d41e 100644
--- a/arch/score/mm/fault.c
+++ b/arch/score/mm/fault.c
@@ -100,7 +100,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 			goto bad_area;
 	}
 
-survive:
 	/*
 	* If for any reason at all we couldn't handle the fault,
 	* make sure we exit gracefully rather than endlessly redo
@@ -167,11 +166,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 	*/
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_global_init(tsk)) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
 	if (!user_mode(regs))
 		goto no_context;
 	pagefault_out_of_memory();
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index f7f99f90cbe0..ac553eed6390 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -430,7 +430,6 @@ static int handle_page_fault(struct pt_regs *regs,
 			goto bad_area;
 	}
 
- survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -568,11 +567,6 @@ static int handle_page_fault(struct pt_regs *regs,
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_global_init(tsk)) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
 	if (is_kernel_mode)
 		goto no_context;
 	pagefault_out_of_memory();

From 086c6cc5377d0908667e8f7082633aebf45cf95f Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 12 Sep 2013 15:13:38 -0700
Subject: [PATCH 1174/1185] arch: mm: do not invoke OOM killer on kernel fault
 OOM

commit 871341023c771ad233620b7a1fb3d9c7031c4e5c upstream.

Kernel faults are expected to handle OOM conditions gracefully (gup,
uaccess etc.), so they should never invoke the OOM killer.  Reserve this
for faults triggered in user context when it is the only option.

Most architectures already do this, fix up the remaining few.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/fault.c       | 14 +++++++-------
 arch/arm64/mm/fault.c     | 14 +++++++-------
 arch/avr32/mm/fault.c     |  2 +-
 arch/mips/mm/fault.c      |  2 ++
 arch/um/kernel/trap.c     |  2 ++
 arch/unicore32/mm/fault.c | 14 +++++++-------
 6 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 5dbf13f954f6..2ebf4f6dc026 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -349,6 +349,13 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
 		return 0;
 
+	/*
+	 * If we are in kernel mode at this point, we
+	 * have no context to handle this fault with.
+	 */
+	if (!user_mode(regs))
+		goto no_context;
+
 	if (fault & VM_FAULT_OOM) {
 		/*
 		 * We ran out of memory, call the OOM killer, and return to
@@ -359,13 +366,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 		return 0;
 	}
 
-	/*
-	 * If we are in kernel mode at this point, we
-	 * have no context to handle this fault with.
-	 */
-	if (!user_mode(regs))
-		goto no_context;
-
 	if (fault & VM_FAULT_SIGBUS) {
 		/*
 		 * We had some memory, but were unable to
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f51d669c8ebd..f5f0c70da7a1 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -288,6 +288,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 			      VM_FAULT_BADACCESS))))
 		return 0;
 
+	/*
+	 * If we are in kernel mode at this point, we have no context to
+	 * handle this fault with.
+	 */
+	if (!user_mode(regs))
+		goto no_context;
+
 	if (fault & VM_FAULT_OOM) {
 		/*
 		 * We ran out of memory, call the OOM killer, and return to
@@ -298,13 +305,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 		return 0;
 	}
 
-	/*
-	 * If we are in kernel mode at this point, we have no context to
-	 * handle this fault with.
-	 */
-	if (!user_mode(regs))
-		goto no_context;
-
 	if (fault & VM_FAULT_SIGBUS) {
 		/*
 		 * We had some memory, but were unable to successfully fix up
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index b2f2d2d66849..2ca27b055825 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -228,9 +228,9 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
 	 */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	pagefault_out_of_memory();
 	if (!user_mode(regs))
 		goto no_context;
+	pagefault_out_of_memory();
 	return;
 
 do_sigbus:
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 0fead53d1c26..da47e9b7f425 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -240,6 +240,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ
 	 * (which will retry the fault, or kill us if we got oom-killed).
 	 */
 	up_read(&mm->mmap_sem);
+	if (!user_mode(regs))
+		goto no_context;
 	pagefault_out_of_memory();
 	return;
 
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 089f3987e273..b2f5adf838dd 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -124,6 +124,8 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 	 * (which will retry the fault, or kill us if we got oom-killed).
 	 */
 	up_read(&mm->mmap_sem);
+	if (!is_user)
+		goto out_nosemaphore;
 	pagefault_out_of_memory();
 	return 0;
 }
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index f9b5c10bccee..8ed3c4509d84 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -278,6 +278,13 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	       (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
 		return 0;
 
+	/*
+	 * If we are in kernel mode at this point, we
+	 * have no context to handle this fault with.
+	 */
+	if (!user_mode(regs))
+		goto no_context;
+
 	if (fault & VM_FAULT_OOM) {
 		/*
 		 * We ran out of memory, call the OOM killer, and return to
@@ -288,13 +295,6 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 		return 0;
 	}
 
-	/*
-	 * If we are in kernel mode at this point, we
-	 * have no context to handle this fault with.
-	 */
-	if (!user_mode(regs))
-		goto no_context;
-
 	if (fault & VM_FAULT_SIGBUS) {
 		/*
 		 * We had some memory, but were unable to

From e2ec2c2b96808afa2f57ec7d7949691146fca341 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 12 Sep 2013 15:13:39 -0700
Subject: [PATCH 1175/1185] arch: mm: pass userspace fault flag to generic
 fault handler

commit 759496ba6407c6994d6a5ce3a5e74937d7816208 upstream.

Unlike global OOM handling, memory cgroup code will invoke the OOM killer
in any OOM situation because it has no way of telling faults occuring in
kernel context - which could be handled more gracefully - from
user-triggered faults.

Pass a flag that identifies faults originating in user space from the
architecture-specific fault handlers to generic code so that memcg OOM
handling can be improved.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/mm/fault.c      |  7 ++++---
 arch/arc/mm/fault.c        |  6 ++++--
 arch/arm/mm/fault.c        |  9 ++++++---
 arch/arm64/mm/fault.c      | 17 ++++++++++-------
 arch/avr32/mm/fault.c      |  2 ++
 arch/cris/mm/fault.c       |  6 ++++--
 arch/frv/mm/fault.c        | 10 ++++++----
 arch/hexagon/mm/vm_fault.c |  6 ++++--
 arch/ia64/mm/fault.c       |  6 ++++--
 arch/m32r/mm/fault.c       | 10 ++++++----
 arch/m68k/mm/fault.c       |  2 ++
 arch/metag/mm/fault.c      |  6 ++++--
 arch/microblaze/mm/fault.c |  7 +++++--
 arch/mips/mm/fault.c       |  6 ++++--
 arch/mn10300/mm/fault.c    |  2 ++
 arch/openrisc/mm/fault.c   |  1 +
 arch/parisc/mm/fault.c     |  7 +++++--
 arch/powerpc/mm/fault.c    |  7 ++++---
 arch/s390/mm/fault.c       |  2 ++
 arch/score/mm/fault.c      |  7 ++++++-
 arch/sh/mm/fault.c         |  9 ++++++---
 arch/sparc/mm/fault_32.c   | 12 +++++++++---
 arch/sparc/mm/fault_64.c   |  6 ++++--
 arch/tile/mm/fault.c       |  7 +++++--
 arch/um/kernel/trap.c      | 20 ++++++++++++--------
 arch/unicore32/mm/fault.c  |  8 ++++++--
 arch/x86/mm/fault.c        |  8 +++++---
 arch/xtensa/mm/fault.c     |  2 ++
 include/linux/mm.h         |  1 +
 29 files changed, 135 insertions(+), 64 deletions(-)

diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 0c4132dd3507..98838a05ba6d 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -89,8 +89,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 	const struct exception_table_entry *fixup;
 	int fault, si_code = SEGV_MAPERR;
 	siginfo_t info;
-	unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-			      (cause > 0 ? FAULT_FLAG_WRITE : 0));
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
 	   (or is suppressed by the PALcode).  Support that for older CPUs
@@ -115,7 +114,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 	if (address >= TASK_SIZE)
 		goto vmalloc_fault;
 #endif
-
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -142,6 +142,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 	} else {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	}
 
 	/* If for any reason at all we couldn't handle the fault,
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 87f4281a4b64..50533b750a99 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -59,8 +59,7 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address,
 	struct mm_struct *mm = tsk->mm;
 	siginfo_t info;
 	int fault, ret;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				(write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
@@ -88,6 +87,8 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -115,6 +116,7 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address,
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 2ebf4f6dc026..160da6d65546 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -261,9 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	int fault, sig, code;
-	int write = fsr & FSR_WRITE;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				(write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	if (notify_page_fault(regs, fsr))
 		return 0;
@@ -282,6 +280,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (fsr & FSR_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * As per x86, we may deadlock here.  However, since the kernel only
 	 * validly references user space from well defined areas of the code,
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f5f0c70da7a1..b5d458769b65 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -199,13 +199,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
 	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
-	if (esr & ESR_LNX_EXEC) {
-		vm_flags = VM_EXEC;
-	} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
-		vm_flags = VM_WRITE;
-		mm_flags |= FAULT_FLAG_WRITE;
-	}
-
 	tsk = current;
 	mm  = tsk->mm;
 
@@ -220,6 +213,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		mm_flags |= FAULT_FLAG_USER;
+
+	if (esr & ESR_LNX_EXEC) {
+		vm_flags = VM_EXEC;
+	} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
+		vm_flags = VM_WRITE;
+		mm_flags |= FAULT_FLAG_WRITE;
+	}
+
 	/*
 	 * As per x86, we may deadlock here. However, since the kernel only
 	 * validly references user space from well defined areas of the code,
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index 2ca27b055825..0eca93327195 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -86,6 +86,8 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
 
 	local_irq_enable();
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 73312ab6c696..1790f22e71a2 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -58,8 +58,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
 	struct vm_area_struct * vma;
 	siginfo_t info;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				((writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	D(printk(KERN_DEBUG
 		 "Page fault for %lX on %X at %lX, prot %d write %d\n",
@@ -117,6 +116,8 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -155,6 +156,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
 	} else if (writeaccess == 1) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c
index 331c1e2cfb67..9a66372fc7c7 100644
--- a/arch/frv/mm/fault.c
+++ b/arch/frv/mm/fault.c
@@ -34,11 +34,11 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
 	struct vm_area_struct *vma;
 	struct mm_struct *mm;
 	unsigned long _pme, lrai, lrad, fixup;
+	unsigned long flags = 0;
 	siginfo_t info;
 	pgd_t *pge;
 	pud_t *pue;
 	pte_t *pte;
-	int write;
 	int fault;
 
 #if 0
@@ -81,6 +81,9 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(__frame))
+		flags |= FAULT_FLAG_USER;
+
 	down_read(&mm->mmap_sem);
 
 	vma = find_vma(mm, ear0);
@@ -129,7 +132,6 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
  */
  good_area:
 	info.si_code = SEGV_ACCERR;
-	write = 0;
 	switch (esr0 & ESR0_ATXC) {
 	default:
 		/* handle write to write protected page */
@@ -140,7 +142,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
 #endif
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
-		write = 1;
+		flags |= FAULT_FLAG_WRITE;
 		break;
 
 		 /* handle read from protected page */
@@ -162,7 +164,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0);
+	fault = handle_mm_fault(mm, vma, ear0, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 1bd276dbec7d..8704c9320032 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -53,8 +53,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
 	int si_code = SEGV_MAPERR;
 	int fault;
 	const struct exception_table_entry *fixup;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				 (cause > 0 ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	/*
 	 * If we're in an interrupt or have no user context,
@@ -65,6 +64,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
 
 	local_irq_enable();
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -96,6 +97,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
 	case FLT_STORE:
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 		break;
 	}
 
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 6cf0341f978e..7225dad87094 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -90,8 +90,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
 		| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
 
-	flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
-
 	/* mmap_sem is performance critical.... */
 	prefetchw(&mm->mmap_sem);
 
@@ -119,6 +117,10 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	if (notify_page_fault(regs, TRAP_BRKPT))
 		return;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (mask & VM_WRITE)
+		flags |= FAULT_FLAG_WRITE;
 retry:
 	down_read(&mm->mmap_sem);
 
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index 3cdfa9c1d091..e9c6a8014bd6 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	struct mm_struct *mm;
 	struct vm_area_struct * vma;
 	unsigned long page, addr;
-	int write;
+	unsigned long flags = 0;
 	int fault;
 	siginfo_t info;
 
@@ -117,6 +117,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
 
+	if (error_code & ACE_USERMODE)
+		flags |= FAULT_FLAG_USER;
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -166,14 +169,13 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
  */
 good_area:
 	info.si_code = SEGV_ACCERR;
-	write = 0;
 	switch (error_code & (ACE_WRITE|ACE_PROTECTION)) {
 		default:	/* 3: write, present */
 			/* fall through */
 		case ACE_WRITE:	/* write, not present */
 			if (!(vma->vm_flags & VM_WRITE))
 				goto bad_area;
-			write++;
+			flags |= FAULT_FLAG_WRITE;
 			break;
 		case ACE_PROTECTION:	/* read, present */
 		case 0:		/* read, not present */
@@ -194,7 +196,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	 */
 	addr = (address & PAGE_MASK);
 	set_thread_fault_code(error_code);
-	fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0);
+	fault = handle_mm_fault(mm, vma, addr, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index a563727806bf..eb1d61f68725 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -88,6 +88,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
index 8fddf46e6c62..332680e5ebf2 100644
--- a/arch/metag/mm/fault.c
+++ b/arch/metag/mm/fault.c
@@ -53,8 +53,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 	struct vm_area_struct *vma, *prev_vma;
 	siginfo_t info;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				(write_access ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	tsk = current;
 
@@ -109,6 +108,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 
@@ -121,6 +122,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 	if (write_access) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 			goto bad_area;
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index 731f739d17a1..fa4cf52aa7a6 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -92,8 +92,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
 	int code = SEGV_MAPERR;
 	int is_write = error_code & ESR_S;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-					 (is_write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	regs->ear = address;
 	regs->esr = error_code;
@@ -121,6 +120,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -199,6 +201,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
 	if (unlikely(is_write)) {
 		if (unlikely(!(vma->vm_flags & VM_WRITE)))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	/* a read */
 	} else {
 		/* protection fault */
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index da47e9b7f425..0214a43b9911 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -41,8 +41,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ
 	const int field = sizeof(unsigned long) * 2;
 	siginfo_t info;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-						 (write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 #if 0
 	printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
@@ -92,6 +91,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -113,6 +114,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (cpu_has_rixi) {
 			if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
index 8a2e6ded9a44..3516cbdf1ee9 100644
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -171,6 +171,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 4a41f8493ab0..0703acf7d327 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -86,6 +86,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
 	if (user_mode(regs)) {
 		/* Exception was in userspace: reenable interrupts */
 		local_irq_enable();
+		flags |= FAULT_FLAG_USER;
 	} else {
 		/* If exception was in a syscall, then IRQ's may have
 		 * been enabled or disabled.  If they were enabled,
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index f247a3480e8e..d10d27a720c0 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -180,6 +180,10 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (acc_type & VM_WRITE)
+		flags |= FAULT_FLAG_WRITE;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma_prev(mm, address, &prev_vma);
@@ -203,8 +207,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 	 * fault.
 	 */
 
-	fault = handle_mm_fault(mm, vma, address,
-			flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0));
+	fault = handle_mm_fault(mm, vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8726779e1409..d9196c9f93d9 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -223,9 +223,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 	is_write = error_code & ESR_DST;
 #endif /* CONFIG_4xx || CONFIG_BOOKE */
 
-	if (is_write)
-		flags |= FAULT_FLAG_WRITE;
-
 #ifdef CONFIG_PPC_ICSWX
 	/*
 	 * we need to do this early because this "data storage
@@ -280,6 +277,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -408,6 +408,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 	} else if (is_write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	/* a read */
 	} else {
 		/* protection fault */
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 047c3e4c59a2..416facec4a33 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -302,6 +302,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
 	address = trans_exc_code & __FAIL_ADDR_MASK;
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 	if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
 		flags |= FAULT_FLAG_WRITE;
 	down_read(&mm->mmap_sem);
diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c
index 4b71a626d41e..52238983527d 100644
--- a/arch/score/mm/fault.c
+++ b/arch/score/mm/fault.c
@@ -47,6 +47,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 	const int field = sizeof(unsigned long) * 2;
+	unsigned long flags = 0;
 	siginfo_t info;
 	int fault;
 
@@ -75,6 +76,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
 	if (!vma)
@@ -95,6 +99,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
 			goto bad_area;
@@ -105,7 +110,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 	* make sure we exit gracefully rather than endlessly redo
 	* the fault.
 	*/
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 1f49c28affa9..541dc6101508 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -400,9 +400,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	struct mm_struct *mm;
 	struct vm_area_struct * vma;
 	int fault;
-	int write = error_code & FAULT_CODE_WRITE;
-	unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-			      (write ? FAULT_FLAG_WRITE : 0));
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -476,6 +474,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 
 	set_thread_fault_code(error_code);
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (error_code & FAULT_CODE_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index e98bfda205a2..59dbd4645725 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -177,8 +177,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
 	unsigned long g2;
 	int from_user = !(regs->psr & PSR_PS);
 	int fault, code;
-	unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-			      (write ? FAULT_FLAG_WRITE : 0));
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	if (text_fault)
 		address = regs->pc;
@@ -235,6 +234,11 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
 			goto bad_area;
 	}
 
+	if (from_user)
+		flags |= FAULT_FLAG_USER;
+	if (write)
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -383,6 +387,7 @@ static void force_user_fault(unsigned long address, int write)
 	struct vm_area_struct *vma;
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
+	unsigned int flags = FAULT_FLAG_USER;
 	int code;
 
 	code = SEGV_MAPERR;
@@ -402,11 +407,12 @@ static void force_user_fault(unsigned long address, int write)
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
 	}
-	switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) {
+	switch (handle_mm_fault(mm, vma, address, flags)) {
 	case VM_FAULT_SIGBUS:
 	case VM_FAULT_OOM:
 		goto do_sigbus;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index ea83f82464da..3841a081beb3 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -323,7 +323,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 			bad_kernel_pc(regs, address);
 			return;
 		}
-	}
+	} else
+		flags |= FAULT_FLAG_USER;
 
 	/*
 	 * If we're in an interrupt or have no user
@@ -426,13 +427,14 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 		    vma->vm_file != NULL)
 			set_thread_fault_code(fault_code |
 					      FAULT_CODE_BLKCOMMIT);
+
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		/* Allow reads even for write-only mappings */
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
 	}
 
-	flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0);
 	fault = handle_mm_fault(mm, vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index ac553eed6390..3ff289f422e6 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -280,8 +280,7 @@ static int handle_page_fault(struct pt_regs *regs,
 	if (!is_page_fault)
 		write = 1;
 
-	flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-		 (write ? FAULT_FLAG_WRITE : 0));
+	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
 
@@ -365,6 +364,9 @@ static int handle_page_fault(struct pt_regs *regs,
 		goto bad_area_nosemaphore;
 	}
 
+	if (!is_kernel_mode)
+		flags |= FAULT_FLAG_USER;
+
 	/*
 	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -425,6 +427,7 @@ static int handle_page_fault(struct pt_regs *regs,
 #endif
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!is_page_fault || !(vma->vm_flags & VM_READ))
 			goto bad_area;
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index b2f5adf838dd..5c3aef74237f 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -30,8 +30,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 	pmd_t *pmd;
 	pte_t *pte;
 	int err = -EFAULT;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				 (is_write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	*code_out = SEGV_MAPERR;
 
@@ -42,6 +41,8 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 	if (in_atomic())
 		goto out_nosemaphore;
 
+	if (is_user)
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -58,12 +59,15 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 
 good_area:
 	*code_out = SEGV_ACCERR;
-	if (is_write && !(vma->vm_flags & VM_WRITE))
-		goto out;
-
-	/* Don't require VM_READ|VM_EXEC for write faults! */
-	if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
-		goto out;
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto out;
+		flags |= FAULT_FLAG_WRITE;
+	} else {
+		/* Don't require VM_READ|VM_EXEC for write faults! */
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto out;
+	}
 
 	do {
 		int fault;
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index 8ed3c4509d84..0dc922dba915 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -209,8 +209,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	int fault, sig, code;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				 ((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -222,6 +221,11 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (!(fsr ^ 0x12))
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * As per x86, we may deadlock here.  However, since the kernel only
 	 * validly references user space from well defined areas of the code,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c1e9e4cbbd76..78dee3ef04d1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1017,9 +1017,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	unsigned long address;
 	struct mm_struct *mm;
 	int fault;
-	int write = error_code & PF_WRITE;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-					(write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -1089,6 +1087,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (user_mode_vm(regs)) {
 		local_irq_enable();
 		error_code |= PF_USER;
+		flags |= FAULT_FLAG_USER;
 	} else {
 		if (regs->flags & X86_EFLAGS_IF)
 			local_irq_enable();
@@ -1113,6 +1112,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
 		return;
 	}
 
+	if (error_code & PF_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 4b7bc8db170f..70fa7bc42b4a 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -72,6 +72,8 @@ void do_page_fault(struct pt_regs *regs)
 	       address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
 #endif
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7da14357aa76..d4cdac903468 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -167,6 +167,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
 #define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
 #define FAULT_FLAG_TRIED	0x40	/* second try */
+#define FAULT_FLAG_USER		0x80	/* The fault originated in userspace */
 
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's

From ed368ae78e6eafc10368e97246610d676bec3060 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 12 Sep 2013 15:13:40 -0700
Subject: [PATCH 1176/1185] x86: finish user fault error path with fatal signal

commit 3a13c4d761b4b979ba8767f42345fed3274991b0 upstream.

The x86 fault handler bails in the middle of error handling when the
task has a fatal signal pending.  For a subsequent patch this is a
problem in OOM situations because it relies on pagefault_out_of_memory()
being called even when the task has been killed, to perform proper
per-task OOM state unwinding.

Shortcutting the fault like this is a rather minor optimization that
saves a few instructions in rare cases.  Just remove it for
user-triggered faults.

Use the opportunity to split the fault retry handling from actual fault
errors and add locking documentation that reads suprisingly similar to
ARM's.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/fault.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 78dee3ef04d1..d8b1ff68dbb9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -842,23 +842,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 	force_sig_info_fault(SIGBUS, code, address, tsk, fault);
 }
 
-static noinline int
+static noinline void
 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 	       unsigned long address, unsigned int fault)
 {
-	/*
-	 * Pagefault was interrupted by SIGKILL. We have no reason to
-	 * continue pagefault.
-	 */
-	if (fatal_signal_pending(current)) {
-		if (!(fault & VM_FAULT_RETRY))
-			up_read(&current->mm->mmap_sem);
-		if (!(error_code & PF_USER))
-			no_context(regs, error_code, address, 0, 0);
-		return 1;
+	if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
+		up_read(&current->mm->mmap_sem);
+		no_context(regs, error_code, address, 0, 0);
+		return;
 	}
-	if (!(fault & VM_FAULT_ERROR))
-		return 0;
 
 	if (fault & VM_FAULT_OOM) {
 		/* Kernel mode? Handle exceptions or die: */
@@ -866,7 +858,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 			up_read(&current->mm->mmap_sem);
 			no_context(regs, error_code, address,
 				   SIGSEGV, SEGV_MAPERR);
-			return 1;
+			return;
 		}
 
 		up_read(&current->mm->mmap_sem);
@@ -884,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 		else
 			BUG();
 	}
-	return 1;
 }
 
 static int spurious_fault_check(unsigned long error_code, pte_t *pte)
@@ -1193,9 +1184,17 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	 */
 	fault = handle_mm_fault(mm, vma, address, flags);
 
-	if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
-		if (mm_fault_error(regs, error_code, address, fault))
-			return;
+	/*
+	 * If we need to retry but a fatal signal is pending, handle the
+	 * signal first. We do not need to release the mmap_sem because it
+	 * would already be released in __lock_page_or_retry in mm/filemap.c.
+	 */
+	if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)))
+		return;
+
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		mm_fault_error(regs, error_code, address, fault);
+		return;
 	}
 
 	/*

From 11f34787b50ce71f66b85ad8790beaa5eee3f897 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 12 Sep 2013 15:13:42 -0700
Subject: [PATCH 1177/1185] mm: memcg: enable memcg OOM killer only for user
 faults

commit 519e52473ebe9db5cdef44670d5a97f1fd53d721 upstream.

System calls and kernel faults (uaccess, gup) can handle an out of memory
situation gracefully and just return -ENOMEM.

Enable the memcg OOM killer only for user faults, where it's really the
only option available.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/memcontrol.h | 44 ++++++++++++++++++++++++++++++++++++++
 include/linux/sched.h      |  3 +++
 mm/filemap.c               | 11 +++++++++-
 mm/memcontrol.c            |  2 +-
 mm/memory.c                | 40 +++++++++++++++++++++++++---------
 5 files changed, 88 insertions(+), 12 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d6183f06d8c1..2c911c95b1ac 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -124,6 +124,37 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
 					struct page *newpage);
 
+/**
+ * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task
+ * @new: true to enable, false to disable
+ *
+ * Toggle whether a failed memcg charge should invoke the OOM killer
+ * or just return -ENOMEM.  Returns the previous toggle state.
+ */
+static inline bool mem_cgroup_toggle_oom(bool new)
+{
+	bool old;
+
+	old = current->memcg_oom.may_oom;
+	current->memcg_oom.may_oom = new;
+
+	return old;
+}
+
+static inline void mem_cgroup_enable_oom(void)
+{
+	bool old = mem_cgroup_toggle_oom(true);
+
+	WARN_ON(old == true);
+}
+
+static inline void mem_cgroup_disable_oom(void)
+{
+	bool old = mem_cgroup_toggle_oom(false);
+
+	WARN_ON(old == false);
+}
+
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
 #endif
@@ -347,6 +378,19 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page,
 {
 }
 
+static inline bool mem_cgroup_toggle_oom(bool new)
+{
+	return false;
+}
+
+static inline void mem_cgroup_enable_oom(void)
+{
+}
+
+static inline void mem_cgroup_disable_oom(void)
+{
+}
+
 static inline void mem_cgroup_inc_page_stat(struct page *page,
 					    enum mem_cgroup_page_stat_item idx)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f87e9a8d364f..42a58ce480bc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1411,6 +1411,9 @@ struct task_struct {
 		unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
 	} memcg_batch;
 	unsigned int memcg_kmem_skip_account;
+	struct memcg_oom_info {
+		unsigned int may_oom:1;
+	} memcg_oom;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	atomic_t ptrace_bp_refcnt;
diff --git a/mm/filemap.c b/mm/filemap.c
index 7905fe721aa8..c466f4c449e9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1614,6 +1614,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct inode *inode = mapping->host;
 	pgoff_t offset = vmf->pgoff;
 	struct page *page;
+	bool memcg_oom;
 	pgoff_t size;
 	int ret = 0;
 
@@ -1622,7 +1623,11 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 
 	/*
-	 * Do we have something in the page cache already?
+	 * Do we have something in the page cache already?  Either
+	 * way, try readahead, but disable the memcg OOM killer for it
+	 * as readahead is optional and no errors are propagated up
+	 * the fault stack.  The OOM killer is enabled while trying to
+	 * instantiate the faulting page individually below.
 	 */
 	page = find_get_page(mapping, offset);
 	if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
@@ -1630,10 +1635,14 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		 * We found the page, so try async readahead before
 		 * waiting for the lock.
 		 */
+		memcg_oom = mem_cgroup_toggle_oom(false);
 		do_async_mmap_readahead(vma, ra, file, page, offset);
+		mem_cgroup_toggle_oom(memcg_oom);
 	} else if (!page) {
 		/* No page in the page cache at all */
+		memcg_oom = mem_cgroup_toggle_oom(false);
 		do_sync_mmap_readahead(vma, ra, file, offset);
+		mem_cgroup_toggle_oom(memcg_oom);
 		count_vm_event(PGMAJFAULT);
 		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
 		ret = VM_FAULT_MAJOR;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f45e21ab9cea..c0607d27ce87 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2613,7 +2613,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		return CHARGE_RETRY;
 
 	/* If we don't need to call oom-killer at el, return immediately */
-	if (!oom_check)
+	if (!oom_check || !current->memcg_oom.may_oom)
 		return CHARGE_NOMEM;
 	/* check OOM */
 	if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize)))
diff --git a/mm/memory.c b/mm/memory.c
index ebe0f285c0e7..59f450c5c0a3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3754,22 +3754,14 @@ int handle_pte_fault(struct mm_struct *mm,
 /*
  * By the time we get here, we already hold the mm semaphore
  */
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-		unsigned long address, unsigned int flags)
+static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+			     unsigned long address, unsigned int flags)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 
-	__set_current_state(TASK_RUNNING);
-
-	count_vm_event(PGFAULT);
-	mem_cgroup_count_vm_event(mm, PGFAULT);
-
-	/* do counter updates before entering really critical section. */
-	check_sync_rss_stat(current);
-
 	if (unlikely(is_vm_hugetlb_page(vma)))
 		return hugetlb_fault(mm, vma, address, flags);
 
@@ -3850,6 +3842,34 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return handle_pte_fault(mm, vma, address, pte, pmd, flags);
 }
 
+int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+		    unsigned long address, unsigned int flags)
+{
+	int ret;
+
+	__set_current_state(TASK_RUNNING);
+
+	count_vm_event(PGFAULT);
+	mem_cgroup_count_vm_event(mm, PGFAULT);
+
+	/* do counter updates before entering really critical section. */
+	check_sync_rss_stat(current);
+
+	/*
+	 * Enable the memcg OOM handling for faults triggered in user
+	 * space.  Kernel faults are handled more gracefully.
+	 */
+	if (flags & FAULT_FLAG_USER)
+		mem_cgroup_enable_oom();
+
+	ret = __handle_mm_fault(mm, vma, address, flags);
+
+	if (flags & FAULT_FLAG_USER)
+		mem_cgroup_disable_oom();
+
+	return ret;
+}
+
 #ifndef __PAGETABLE_PUD_FOLDED
 /*
  * Allocate page upper directory.

From 7a147e0c45a8fa198ade4128bdcbf8592f48843e Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 12 Sep 2013 15:13:43 -0700
Subject: [PATCH 1178/1185] mm: memcg: rework and document OOM waiting and
 wakeup

commit fb2a6fc56be66c169f8b80e07ed999ba453a2db2 upstream.

The memcg OOM handler open-codes a sleeping lock for OOM serialization
(trylock, wait, repeat) because the required locking is so specific to
memcg hierarchies.  However, it would be nice if this construct would be
clearly recognizable and not be as obfuscated as it is right now.  Clean
up as follows:

1. Remove the return value of mem_cgroup_oom_unlock()

2. Rename mem_cgroup_oom_lock() to mem_cgroup_oom_trylock().

3. Pull the prepare_to_wait() out of the memcg_oom_lock scope.  This
   makes it more obvious that the task has to be on the waitqueue
   before attempting to OOM-trylock the hierarchy, to not miss any
   wakeups before going to sleep.  It just didn't matter until now
   because it was all lumped together into the global memcg_oom_lock
   spinlock section.

4. Pull the mem_cgroup_oom_notify() out of the memcg_oom_lock scope.
   It is proctected by the hierarchical OOM-lock.

5. The memcg_oom_lock spinlock is only required to propagate the OOM
   lock in any given hierarchy atomically.  Restrict its scope to
   mem_cgroup_oom_(trylock|unlock).

6. Do not wake up the waitqueue unconditionally at the end of the
   function.  Only the lockholder has to wake up the next in line
   after releasing the lock.

   Note that the lockholder kicks off the OOM-killer, which in turn
   leads to wakeups from the uncharges of the exiting task.  But a
   contender is not guaranteed to see them if it enters the OOM path
   after the OOM kills but before the lockholder releases the lock.
   Thus there has to be an explicit wakeup after releasing the lock.

7. Put the OOM task on the waitqueue before marking the hierarchy as
   under OOM as that is the point where we start to receive wakeups.
   No point in listening before being on the waitqueue.

8. Likewise, unmark the hierarchy before finishing the sleep, for
   symmetry.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/memcontrol.c | 83 +++++++++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 37 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c0607d27ce87..333bb91ee3f2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2075,15 +2075,18 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
 	return total;
 }
 
+static DEFINE_SPINLOCK(memcg_oom_lock);
+
 /*
  * Check OOM-Killer is already running under our hierarchy.
  * If someone is running, return false.
- * Has to be called with memcg_oom_lock
  */
-static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
+static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *iter, *failed = NULL;
 
+	spin_lock(&memcg_oom_lock);
+
 	for_each_mem_cgroup_tree(iter, memcg) {
 		if (iter->oom_lock) {
 			/*
@@ -2097,33 +2100,33 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
 			iter->oom_lock = true;
 	}
 
-	if (!failed)
-		return true;
-
-	/*
-	 * OK, we failed to lock the whole subtree so we have to clean up
-	 * what we set up to the failing subtree
-	 */
-	for_each_mem_cgroup_tree(iter, memcg) {
-		if (iter == failed) {
-			mem_cgroup_iter_break(memcg, iter);
-			break;
+	if (failed) {
+		/*
+		 * OK, we failed to lock the whole subtree so we have
+		 * to clean up what we set up to the failing subtree
+		 */
+		for_each_mem_cgroup_tree(iter, memcg) {
+			if (iter == failed) {
+				mem_cgroup_iter_break(memcg, iter);
+				break;
+			}
+			iter->oom_lock = false;
 		}
-		iter->oom_lock = false;
 	}
-	return false;
+
+	spin_unlock(&memcg_oom_lock);
+
+	return !failed;
 }
 
-/*
- * Has to be called with memcg_oom_lock
- */
-static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
+static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *iter;
 
+	spin_lock(&memcg_oom_lock);
 	for_each_mem_cgroup_tree(iter, memcg)
 		iter->oom_lock = false;
-	return 0;
+	spin_unlock(&memcg_oom_lock);
 }
 
 static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
@@ -2147,7 +2150,6 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
 		atomic_add_unless(&iter->under_oom, -1, 0);
 }
 
-static DEFINE_SPINLOCK(memcg_oom_lock);
 static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
 
 struct oom_wait_info {
@@ -2194,45 +2196,52 @@ static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask,
 				  int order)
 {
 	struct oom_wait_info owait;
-	bool locked, need_to_kill;
+	bool locked;
 
 	owait.memcg = memcg;
 	owait.wait.flags = 0;
 	owait.wait.func = memcg_oom_wake_function;
 	owait.wait.private = current;
 	INIT_LIST_HEAD(&owait.wait.task_list);
-	need_to_kill = true;
-	mem_cgroup_mark_under_oom(memcg);
 
-	/* At first, try to OOM lock hierarchy under memcg.*/
-	spin_lock(&memcg_oom_lock);
-	locked = mem_cgroup_oom_lock(memcg);
 	/*
+	 * As with any blocking lock, a contender needs to start
+	 * listening for wakeups before attempting the trylock,
+	 * otherwise it can miss the wakeup from the unlock and sleep
+	 * indefinitely.  This is just open-coded because our locking
+	 * is so particular to memcg hierarchies.
+	 *
 	 * Even if signal_pending(), we can't quit charge() loop without
 	 * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
 	 * under OOM is always welcomed, use TASK_KILLABLE here.
 	 */
 	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
-	if (!locked || memcg->oom_kill_disable)
-		need_to_kill = false;
+	mem_cgroup_mark_under_oom(memcg);
+
+	locked = mem_cgroup_oom_trylock(memcg);
+
 	if (locked)
 		mem_cgroup_oom_notify(memcg);
-	spin_unlock(&memcg_oom_lock);
 
-	if (need_to_kill) {
+	if (locked && !memcg->oom_kill_disable) {
+		mem_cgroup_unmark_under_oom(memcg);
 		finish_wait(&memcg_oom_waitq, &owait.wait);
 		mem_cgroup_out_of_memory(memcg, mask, order);
 	} else {
 		schedule();
+		mem_cgroup_unmark_under_oom(memcg);
 		finish_wait(&memcg_oom_waitq, &owait.wait);
 	}
-	spin_lock(&memcg_oom_lock);
-	if (locked)
-		mem_cgroup_oom_unlock(memcg);
-	memcg_wakeup_oom(memcg);
-	spin_unlock(&memcg_oom_lock);
 
-	mem_cgroup_unmark_under_oom(memcg);
+	if (locked) {
+		mem_cgroup_oom_unlock(memcg);
+		/*
+		 * There is no guarantee that an OOM-lock contender
+		 * sees the wakeups triggered by the OOM kill
+		 * uncharges.  Wake any sleepers explicitely.
+		 */
+		memcg_oom_recover(memcg);
+	}
 
 	if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
 		return false;

From f79d6a468980516cbfb9e01313c846b82b9d2e7e Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 12 Sep 2013 15:13:44 -0700
Subject: [PATCH 1179/1185] mm: memcg: do not trap chargers with full callstack
 on OOM

commit 3812c8c8f3953921ef18544110dafc3505c1ac62 upstream.

The memcg OOM handling is incredibly fragile and can deadlock.  When a
task fails to charge memory, it invokes the OOM killer and loops right
there in the charge code until it succeeds.  Comparably, any other task
that enters the charge path at this point will go to a waitqueue right
then and there and sleep until the OOM situation is resolved.  The problem
is that these tasks may hold filesystem locks and the mmap_sem; locks that
the selected OOM victim may need to exit.

For example, in one reported case, the task invoking the OOM killer was
about to charge a page cache page during a write(), which holds the
i_mutex.  The OOM killer selected a task that was just entering truncate()
and trying to acquire the i_mutex:

OOM invoking task:
  mem_cgroup_handle_oom+0x241/0x3b0
  mem_cgroup_cache_charge+0xbe/0xe0
  add_to_page_cache_locked+0x4c/0x140
  add_to_page_cache_lru+0x22/0x50
  grab_cache_page_write_begin+0x8b/0xe0
  ext3_write_begin+0x88/0x270
  generic_file_buffered_write+0x116/0x290
  __generic_file_aio_write+0x27c/0x480
  generic_file_aio_write+0x76/0xf0           # takes ->i_mutex
  do_sync_write+0xea/0x130
  vfs_write+0xf3/0x1f0
  sys_write+0x51/0x90
  system_call_fastpath+0x18/0x1d

OOM kill victim:
  do_truncate+0x58/0xa0              # takes i_mutex
  do_last+0x250/0xa30
  path_openat+0xd7/0x440
  do_filp_open+0x49/0xa0
  do_sys_open+0x106/0x240
  sys_open+0x20/0x30
  system_call_fastpath+0x18/0x1d

The OOM handling task will retry the charge indefinitely while the OOM
killed task is not releasing any resources.

A similar scenario can happen when the kernel OOM killer for a memcg is
disabled and a userspace task is in charge of resolving OOM situations.
In this case, ALL tasks that enter the OOM path will be made to sleep on
the OOM waitqueue and wait for userspace to free resources or increase
the group's limit.  But a userspace OOM handler is prone to deadlock
itself on the locks held by the waiting tasks.  For example one of the
sleeping tasks may be stuck in a brk() call with the mmap_sem held for
writing but the userspace handler, in order to pick an optimal victim,
may need to read files from /proc/<pid>, which tries to acquire the same
mmap_sem for reading and deadlocks.

This patch changes the way tasks behave after detecting a memcg OOM and
makes sure nobody loops or sleeps with locks held:

1. When OOMing in a user fault, invoke the OOM killer and restart the
   fault instead of looping on the charge attempt.  This way, the OOM
   victim can not get stuck on locks the looping task may hold.

2. When OOMing in a user fault but somebody else is handling it
   (either the kernel OOM killer or a userspace handler), don't go to
   sleep in the charge context.  Instead, remember the OOMing memcg in
   the task struct and then fully unwind the page fault stack with
   -ENOMEM.  pagefault_out_of_memory() will then call back into the
   memcg code to check if the -ENOMEM came from the memcg, and then
   either put the task to sleep on the memcg's OOM waitqueue or just
   restart the fault.  The OOM victim can no longer get stuck on any
   lock a sleeping task may hold.

Debugged by Michal Hocko.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: azurIt <azurit@pobox.sk>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/memcontrol.h |  21 +++++
 include/linux/sched.h      |   4 +
 mm/memcontrol.c            | 154 ++++++++++++++++++++++++++-----------
 mm/memory.c                |   3 +
 mm/oom_kill.c              |   7 +-
 5 files changed, 140 insertions(+), 49 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2c911c95b1ac..64591ffc2e2c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -130,6 +130,10 @@ extern void mem_cgroup_replace_page_cache(struct page *oldpage,
  *
  * Toggle whether a failed memcg charge should invoke the OOM killer
  * or just return -ENOMEM.  Returns the previous toggle state.
+ *
+ * NOTE: Any path that enables the OOM killer before charging must
+ *       call mem_cgroup_oom_synchronize() afterward to finalize the
+ *       OOM handling and clean up.
  */
 static inline bool mem_cgroup_toggle_oom(bool new)
 {
@@ -155,6 +159,13 @@ static inline void mem_cgroup_disable_oom(void)
 	WARN_ON(old == false);
 }
 
+static inline bool task_in_memcg_oom(struct task_struct *p)
+{
+	return p->memcg_oom.in_memcg_oom;
+}
+
+bool mem_cgroup_oom_synchronize(void);
+
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
 #endif
@@ -391,6 +402,16 @@ static inline void mem_cgroup_disable_oom(void)
 {
 }
 
+static inline bool task_in_memcg_oom(struct task_struct *p)
+{
+	return false;
+}
+
+static inline bool mem_cgroup_oom_synchronize(void)
+{
+	return false;
+}
+
 static inline void mem_cgroup_inc_page_stat(struct page *page,
 					    enum mem_cgroup_page_stat_item idx)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 42a58ce480bc..a1b7e6ee453b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1413,6 +1413,10 @@ struct task_struct {
 	unsigned int memcg_kmem_skip_account;
 	struct memcg_oom_info {
 		unsigned int may_oom:1;
+		unsigned int in_memcg_oom:1;
+		unsigned int oom_locked:1;
+		int wakeups;
+		struct mem_cgroup *wait_on_memcg;
 	} memcg_oom;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 333bb91ee3f2..7849660665d7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -302,6 +302,7 @@ struct mem_cgroup {
 
 	bool		oom_lock;
 	atomic_t	under_oom;
+	atomic_t	oom_wakeups;
 
 	atomic_t	refcnt;
 
@@ -2179,6 +2180,7 @@ static int memcg_oom_wake_function(wait_queue_t *wait,
 
 static void memcg_wakeup_oom(struct mem_cgroup *memcg)
 {
+	atomic_inc(&memcg->oom_wakeups);
 	/* for filtering, pass "memcg" as argument. */
 	__wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
 }
@@ -2190,19 +2192,17 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 }
 
 /*
- * try to call OOM killer. returns false if we should exit memory-reclaim loop.
+ * try to call OOM killer
  */
-static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask,
-				  int order)
+static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-	struct oom_wait_info owait;
 	bool locked;
+	int wakeups;
 
-	owait.memcg = memcg;
-	owait.wait.flags = 0;
-	owait.wait.func = memcg_oom_wake_function;
-	owait.wait.private = current;
-	INIT_LIST_HEAD(&owait.wait.task_list);
+	if (!current->memcg_oom.may_oom)
+		return;
+
+	current->memcg_oom.in_memcg_oom = 1;
 
 	/*
 	 * As with any blocking lock, a contender needs to start
@@ -2210,12 +2210,8 @@ static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask,
 	 * otherwise it can miss the wakeup from the unlock and sleep
 	 * indefinitely.  This is just open-coded because our locking
 	 * is so particular to memcg hierarchies.
-	 *
-	 * Even if signal_pending(), we can't quit charge() loop without
-	 * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
-	 * under OOM is always welcomed, use TASK_KILLABLE here.
 	 */
-	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
+	wakeups = atomic_read(&memcg->oom_wakeups);
 	mem_cgroup_mark_under_oom(memcg);
 
 	locked = mem_cgroup_oom_trylock(memcg);
@@ -2225,15 +2221,95 @@ static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask,
 
 	if (locked && !memcg->oom_kill_disable) {
 		mem_cgroup_unmark_under_oom(memcg);
-		finish_wait(&memcg_oom_waitq, &owait.wait);
 		mem_cgroup_out_of_memory(memcg, mask, order);
+		mem_cgroup_oom_unlock(memcg);
+		/*
+		 * There is no guarantee that an OOM-lock contender
+		 * sees the wakeups triggered by the OOM kill
+		 * uncharges.  Wake any sleepers explicitely.
+		 */
+		memcg_oom_recover(memcg);
 	} else {
-		schedule();
-		mem_cgroup_unmark_under_oom(memcg);
-		finish_wait(&memcg_oom_waitq, &owait.wait);
+		/*
+		 * A system call can just return -ENOMEM, but if this
+		 * is a page fault and somebody else is handling the
+		 * OOM already, we need to sleep on the OOM waitqueue
+		 * for this memcg until the situation is resolved.
+		 * Which can take some time because it might be
+		 * handled by a userspace task.
+		 *
+		 * However, this is the charge context, which means
+		 * that we may sit on a large call stack and hold
+		 * various filesystem locks, the mmap_sem etc. and we
+		 * don't want the OOM handler to deadlock on them
+		 * while we sit here and wait.  Store the current OOM
+		 * context in the task_struct, then return -ENOMEM.
+		 * At the end of the page fault handler, with the
+		 * stack unwound, pagefault_out_of_memory() will check
+		 * back with us by calling
+		 * mem_cgroup_oom_synchronize(), possibly putting the
+		 * task to sleep.
+		 */
+		current->memcg_oom.oom_locked = locked;
+		current->memcg_oom.wakeups = wakeups;
+		css_get(&memcg->css);
+		current->memcg_oom.wait_on_memcg = memcg;
 	}
+}
 
-	if (locked) {
+/**
+ * mem_cgroup_oom_synchronize - complete memcg OOM handling
+ *
+ * This has to be called at the end of a page fault if the the memcg
+ * OOM handler was enabled and the fault is returning %VM_FAULT_OOM.
+ *
+ * Memcg supports userspace OOM handling, so failed allocations must
+ * sleep on a waitqueue until the userspace task resolves the
+ * situation.  Sleeping directly in the charge context with all kinds
+ * of locks held is not a good idea, instead we remember an OOM state
+ * in the task and mem_cgroup_oom_synchronize() has to be called at
+ * the end of the page fault to put the task to sleep and clean up the
+ * OOM state.
+ *
+ * Returns %true if an ongoing memcg OOM situation was detected and
+ * finalized, %false otherwise.
+ */
+bool mem_cgroup_oom_synchronize(void)
+{
+	struct oom_wait_info owait;
+	struct mem_cgroup *memcg;
+
+	/* OOM is global, do not handle */
+	if (!current->memcg_oom.in_memcg_oom)
+		return false;
+
+	/*
+	 * We invoked the OOM killer but there is a chance that a kill
+	 * did not free up any charges.  Everybody else might already
+	 * be sleeping, so restart the fault and keep the rampage
+	 * going until some charges are released.
+	 */
+	memcg = current->memcg_oom.wait_on_memcg;
+	if (!memcg)
+		goto out;
+
+	if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
+		goto out_memcg;
+
+	owait.memcg = memcg;
+	owait.wait.flags = 0;
+	owait.wait.func = memcg_oom_wake_function;
+	owait.wait.private = current;
+	INIT_LIST_HEAD(&owait.wait.task_list);
+
+	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
+	/* Only sleep if we didn't miss any wakeups since OOM */
+	if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups)
+		schedule();
+	finish_wait(&memcg_oom_waitq, &owait.wait);
+out_memcg:
+	mem_cgroup_unmark_under_oom(memcg);
+	if (current->memcg_oom.oom_locked) {
 		mem_cgroup_oom_unlock(memcg);
 		/*
 		 * There is no guarantee that an OOM-lock contender
@@ -2242,11 +2318,10 @@ static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask,
 		 */
 		memcg_oom_recover(memcg);
 	}
-
-	if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
-		return false;
-	/* Give chance to dying process */
-	schedule_timeout_uninterruptible(1);
+	css_put(&memcg->css);
+	current->memcg_oom.wait_on_memcg = NULL;
+out:
+	current->memcg_oom.in_memcg_oom = 0;
 	return true;
 }
 
@@ -2559,12 +2634,11 @@ enum {
 	CHARGE_RETRY,		/* need to retry but retry is not bad */
 	CHARGE_NOMEM,		/* we can't do more. return -ENOMEM */
 	CHARGE_WOULDBLOCK,	/* GFP_WAIT wasn't set and no enough res. */
-	CHARGE_OOM_DIE,		/* the current is killed because of OOM */
 };
 
 static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 				unsigned int nr_pages, unsigned int min_pages,
-				bool oom_check)
+				bool invoke_oom)
 {
 	unsigned long csize = nr_pages * PAGE_SIZE;
 	struct mem_cgroup *mem_over_limit;
@@ -2621,14 +2695,10 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	if (mem_cgroup_wait_acct_move(mem_over_limit))
 		return CHARGE_RETRY;
 
-	/* If we don't need to call oom-killer at el, return immediately */
-	if (!oom_check || !current->memcg_oom.may_oom)
-		return CHARGE_NOMEM;
-	/* check OOM */
-	if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize)))
-		return CHARGE_OOM_DIE;
+	if (invoke_oom)
+		mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
 
-	return CHARGE_RETRY;
+	return CHARGE_NOMEM;
 }
 
 /*
@@ -2731,7 +2801,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	}
 
 	do {
-		bool oom_check;
+		bool invoke_oom = oom && !nr_oom_retries;
 
 		/* If killed, bypass charge */
 		if (fatal_signal_pending(current)) {
@@ -2739,14 +2809,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 			goto bypass;
 		}
 
-		oom_check = false;
-		if (oom && !nr_oom_retries) {
-			oom_check = true;
-			nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
-		}
-
-		ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, nr_pages,
-		    oom_check);
+		ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
+					   nr_pages, invoke_oom);
 		switch (ret) {
 		case CHARGE_OK:
 			break;
@@ -2759,16 +2823,12 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 			css_put(&memcg->css);
 			goto nomem;
 		case CHARGE_NOMEM: /* OOM routine works */
-			if (!oom) {
+			if (!oom || invoke_oom) {
 				css_put(&memcg->css);
 				goto nomem;
 			}
-			/* If oom, we never return -ENOMEM */
 			nr_oom_retries--;
 			break;
-		case CHARGE_OOM_DIE: /* Killed by OOM Killer */
-			css_put(&memcg->css);
-			goto bypass;
 		}
 	} while (ret != CHARGE_OK);
 
diff --git a/mm/memory.c b/mm/memory.c
index 59f450c5c0a3..c4ce987745e4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3867,6 +3867,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (flags & FAULT_FLAG_USER)
 		mem_cgroup_disable_oom();
 
+	if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)))
+		mem_cgroup_oom_synchronize();
+
 	return ret;
 }
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f104c7e9f61e..1a582e3aee3e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -702,9 +702,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
  */
 void pagefault_out_of_memory(void)
 {
-	struct zonelist *zonelist = node_zonelist(first_online_node,
-						  GFP_KERNEL);
+	struct zonelist *zonelist;
 
+	if (mem_cgroup_oom_synchronize())
+		return;
+
+	zonelist = node_zonelist(first_online_node, GFP_KERNEL);
 	if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
 		out_of_memory(NULL, 0, 0, NULL, false);
 		clear_zonelist_oom(zonelist, GFP_KERNEL);

From f8a5117916dd2871c056963bf5ee0d1101c10099 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 16 Oct 2013 13:46:59 -0700
Subject: [PATCH 1180/1185] mm: memcg: handle non-error OOM situations more
 gracefully

commit 4942642080ea82d99ab5b653abb9a12b7ba31f4a upstream.

Commit 3812c8c8f395 ("mm: memcg: do not trap chargers with full
callstack on OOM") assumed that only a few places that can trigger a
memcg OOM situation do not return VM_FAULT_OOM, like optional page cache
readahead.  But there are many more and it's impractical to annotate
them all.

First of all, we don't want to invoke the OOM killer when the failed
allocation is gracefully handled, so defer the actual kill to the end of
the fault handling as well.  This simplifies the code quite a bit for
added bonus.

Second, since a failed allocation might not be the abrupt end of the
fault, the memcg OOM handler needs to be re-entrant until the fault
finishes for subsequent allocation attempts.  If an allocation is
attempted after the task already OOMed, allow it to bypass the limit so
that it can quickly finish the fault and invoke the OOM killer.

Reported-by: azurIt <azurit@pobox.sk>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/memcontrol.h |  50 +++--------
 include/linux/sched.h      |   7 +-
 mm/filemap.c               |  11 +--
 mm/memcontrol.c            | 173 +++++++++++++++----------------------
 mm/memory.c                |  18 ++--
 mm/oom_kill.c              |   2 +-
 6 files changed, 96 insertions(+), 165 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 64591ffc2e2c..a3b4812f494f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -124,47 +124,24 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
 					struct page *newpage);
 
-/**
- * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task
- * @new: true to enable, false to disable
- *
- * Toggle whether a failed memcg charge should invoke the OOM killer
- * or just return -ENOMEM.  Returns the previous toggle state.
- *
- * NOTE: Any path that enables the OOM killer before charging must
- *       call mem_cgroup_oom_synchronize() afterward to finalize the
- *       OOM handling and clean up.
- */
-static inline bool mem_cgroup_toggle_oom(bool new)
+static inline void mem_cgroup_oom_enable(void)
 {
-	bool old;
-
-	old = current->memcg_oom.may_oom;
-	current->memcg_oom.may_oom = new;
-
-	return old;
+	WARN_ON(current->memcg_oom.may_oom);
+	current->memcg_oom.may_oom = 1;
 }
 
-static inline void mem_cgroup_enable_oom(void)
+static inline void mem_cgroup_oom_disable(void)
 {
-	bool old = mem_cgroup_toggle_oom(true);
-
-	WARN_ON(old == true);
-}
-
-static inline void mem_cgroup_disable_oom(void)
-{
-	bool old = mem_cgroup_toggle_oom(false);
-
-	WARN_ON(old == false);
+	WARN_ON(!current->memcg_oom.may_oom);
+	current->memcg_oom.may_oom = 0;
 }
 
 static inline bool task_in_memcg_oom(struct task_struct *p)
 {
-	return p->memcg_oom.in_memcg_oom;
+	return p->memcg_oom.memcg;
 }
 
-bool mem_cgroup_oom_synchronize(void);
+bool mem_cgroup_oom_synchronize(bool wait);
 
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
@@ -389,16 +366,11 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page,
 {
 }
 
-static inline bool mem_cgroup_toggle_oom(bool new)
-{
-	return false;
-}
-
-static inline void mem_cgroup_enable_oom(void)
+static inline void mem_cgroup_oom_enable(void)
 {
 }
 
-static inline void mem_cgroup_disable_oom(void)
+static inline void mem_cgroup_oom_disable(void)
 {
 }
 
@@ -407,7 +379,7 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
 	return false;
 }
 
-static inline bool mem_cgroup_oom_synchronize(void)
+static inline bool mem_cgroup_oom_synchronize(bool wait)
 {
 	return false;
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a1b7e6ee453b..00c1d4f45072 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1412,11 +1412,10 @@ struct task_struct {
 	} memcg_batch;
 	unsigned int memcg_kmem_skip_account;
 	struct memcg_oom_info {
+		struct mem_cgroup *memcg;
+		gfp_t gfp_mask;
+		int order;
 		unsigned int may_oom:1;
-		unsigned int in_memcg_oom:1;
-		unsigned int oom_locked:1;
-		int wakeups;
-		struct mem_cgroup *wait_on_memcg;
 	} memcg_oom;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/mm/filemap.c b/mm/filemap.c
index c466f4c449e9..7905fe721aa8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1614,7 +1614,6 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct inode *inode = mapping->host;
 	pgoff_t offset = vmf->pgoff;
 	struct page *page;
-	bool memcg_oom;
 	pgoff_t size;
 	int ret = 0;
 
@@ -1623,11 +1622,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 
 	/*
-	 * Do we have something in the page cache already?  Either
-	 * way, try readahead, but disable the memcg OOM killer for it
-	 * as readahead is optional and no errors are propagated up
-	 * the fault stack.  The OOM killer is enabled while trying to
-	 * instantiate the faulting page individually below.
+	 * Do we have something in the page cache already?
 	 */
 	page = find_get_page(mapping, offset);
 	if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
@@ -1635,14 +1630,10 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		 * We found the page, so try async readahead before
 		 * waiting for the lock.
 		 */
-		memcg_oom = mem_cgroup_toggle_oom(false);
 		do_async_mmap_readahead(vma, ra, file, page, offset);
-		mem_cgroup_toggle_oom(memcg_oom);
 	} else if (!page) {
 		/* No page in the page cache at all */
-		memcg_oom = mem_cgroup_toggle_oom(false);
 		do_sync_mmap_readahead(vma, ra, file, offset);
-		mem_cgroup_toggle_oom(memcg_oom);
 		count_vm_event(PGMAJFAULT);
 		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
 		ret = VM_FAULT_MAJOR;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7849660665d7..eaa3accb01e7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2191,27 +2191,67 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 		memcg_wakeup_oom(memcg);
 }
 
-/*
- * try to call OOM killer
- */
 static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-	bool locked;
-	int wakeups;
-
 	if (!current->memcg_oom.may_oom)
 		return;
-
-	current->memcg_oom.in_memcg_oom = 1;
-
 	/*
-	 * As with any blocking lock, a contender needs to start
-	 * listening for wakeups before attempting the trylock,
-	 * otherwise it can miss the wakeup from the unlock and sleep
-	 * indefinitely.  This is just open-coded because our locking
-	 * is so particular to memcg hierarchies.
+	 * We are in the middle of the charge context here, so we
+	 * don't want to block when potentially sitting on a callstack
+	 * that holds all kinds of filesystem and mm locks.
+	 *
+	 * Also, the caller may handle a failed allocation gracefully
+	 * (like optional page cache readahead) and so an OOM killer
+	 * invocation might not even be necessary.
+	 *
+	 * That's why we don't do anything here except remember the
+	 * OOM context and then deal with it at the end of the page
+	 * fault when the stack is unwound, the locks are released,
+	 * and when we know whether the fault was overall successful.
 	 */
-	wakeups = atomic_read(&memcg->oom_wakeups);
+	css_get(&memcg->css);
+	current->memcg_oom.memcg = memcg;
+	current->memcg_oom.gfp_mask = mask;
+	current->memcg_oom.order = order;
+}
+
+/**
+ * mem_cgroup_oom_synchronize - complete memcg OOM handling
+ * @handle: actually kill/wait or just clean up the OOM state
+ *
+ * This has to be called at the end of a page fault if the memcg OOM
+ * handler was enabled.
+ *
+ * Memcg supports userspace OOM handling where failed allocations must
+ * sleep on a waitqueue until the userspace task resolves the
+ * situation.  Sleeping directly in the charge context with all kinds
+ * of locks held is not a good idea, instead we remember an OOM state
+ * in the task and mem_cgroup_oom_synchronize() has to be called at
+ * the end of the page fault to complete the OOM handling.
+ *
+ * Returns %true if an ongoing memcg OOM situation was detected and
+ * completed, %false otherwise.
+ */
+bool mem_cgroup_oom_synchronize(bool handle)
+{
+	struct mem_cgroup *memcg = current->memcg_oom.memcg;
+	struct oom_wait_info owait;
+	bool locked;
+
+	/* OOM is global, do not handle */
+	if (!memcg)
+		return false;
+
+	if (!handle)
+		goto cleanup;
+
+	owait.memcg = memcg;
+	owait.wait.flags = 0;
+	owait.wait.func = memcg_oom_wake_function;
+	owait.wait.private = current;
+	INIT_LIST_HEAD(&owait.wait.task_list);
+
+	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
 	mem_cgroup_mark_under_oom(memcg);
 
 	locked = mem_cgroup_oom_trylock(memcg);
@@ -2221,95 +2261,16 @@ static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 
 	if (locked && !memcg->oom_kill_disable) {
 		mem_cgroup_unmark_under_oom(memcg);
-		mem_cgroup_out_of_memory(memcg, mask, order);
-		mem_cgroup_oom_unlock(memcg);
-		/*
-		 * There is no guarantee that an OOM-lock contender
-		 * sees the wakeups triggered by the OOM kill
-		 * uncharges.  Wake any sleepers explicitely.
-		 */
-		memcg_oom_recover(memcg);
+		finish_wait(&memcg_oom_waitq, &owait.wait);
+		mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask,
+					 current->memcg_oom.order);
 	} else {
-		/*
-		 * A system call can just return -ENOMEM, but if this
-		 * is a page fault and somebody else is handling the
-		 * OOM already, we need to sleep on the OOM waitqueue
-		 * for this memcg until the situation is resolved.
-		 * Which can take some time because it might be
-		 * handled by a userspace task.
-		 *
-		 * However, this is the charge context, which means
-		 * that we may sit on a large call stack and hold
-		 * various filesystem locks, the mmap_sem etc. and we
-		 * don't want the OOM handler to deadlock on them
-		 * while we sit here and wait.  Store the current OOM
-		 * context in the task_struct, then return -ENOMEM.
-		 * At the end of the page fault handler, with the
-		 * stack unwound, pagefault_out_of_memory() will check
-		 * back with us by calling
-		 * mem_cgroup_oom_synchronize(), possibly putting the
-		 * task to sleep.
-		 */
-		current->memcg_oom.oom_locked = locked;
-		current->memcg_oom.wakeups = wakeups;
-		css_get(&memcg->css);
-		current->memcg_oom.wait_on_memcg = memcg;
-	}
-}
-
-/**
- * mem_cgroup_oom_synchronize - complete memcg OOM handling
- *
- * This has to be called at the end of a page fault if the the memcg
- * OOM handler was enabled and the fault is returning %VM_FAULT_OOM.
- *
- * Memcg supports userspace OOM handling, so failed allocations must
- * sleep on a waitqueue until the userspace task resolves the
- * situation.  Sleeping directly in the charge context with all kinds
- * of locks held is not a good idea, instead we remember an OOM state
- * in the task and mem_cgroup_oom_synchronize() has to be called at
- * the end of the page fault to put the task to sleep and clean up the
- * OOM state.
- *
- * Returns %true if an ongoing memcg OOM situation was detected and
- * finalized, %false otherwise.
- */
-bool mem_cgroup_oom_synchronize(void)
-{
-	struct oom_wait_info owait;
-	struct mem_cgroup *memcg;
-
-	/* OOM is global, do not handle */
-	if (!current->memcg_oom.in_memcg_oom)
-		return false;
-
-	/*
-	 * We invoked the OOM killer but there is a chance that a kill
-	 * did not free up any charges.  Everybody else might already
-	 * be sleeping, so restart the fault and keep the rampage
-	 * going until some charges are released.
-	 */
-	memcg = current->memcg_oom.wait_on_memcg;
-	if (!memcg)
-		goto out;
-
-	if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
-		goto out_memcg;
-
-	owait.memcg = memcg;
-	owait.wait.flags = 0;
-	owait.wait.func = memcg_oom_wake_function;
-	owait.wait.private = current;
-	INIT_LIST_HEAD(&owait.wait.task_list);
-
-	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
-	/* Only sleep if we didn't miss any wakeups since OOM */
-	if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups)
 		schedule();
-	finish_wait(&memcg_oom_waitq, &owait.wait);
-out_memcg:
-	mem_cgroup_unmark_under_oom(memcg);
-	if (current->memcg_oom.oom_locked) {
+		mem_cgroup_unmark_under_oom(memcg);
+		finish_wait(&memcg_oom_waitq, &owait.wait);
+	}
+
+	if (locked) {
 		mem_cgroup_oom_unlock(memcg);
 		/*
 		 * There is no guarantee that an OOM-lock contender
@@ -2318,10 +2279,9 @@ bool mem_cgroup_oom_synchronize(void)
 		 */
 		memcg_oom_recover(memcg);
 	}
+cleanup:
+	current->memcg_oom.memcg = NULL;
 	css_put(&memcg->css);
-	current->memcg_oom.wait_on_memcg = NULL;
-out:
-	current->memcg_oom.in_memcg_oom = 0;
 	return true;
 }
 
@@ -2742,6 +2702,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		     || fatal_signal_pending(current)))
 		goto bypass;
 
+	if (unlikely(task_in_memcg_oom(current)))
+		goto bypass;
+
 	/*
 	 * We always charge the cgroup the mm_struct belongs to.
 	 * The mm_struct's mem_cgroup changes on task migration if the
diff --git a/mm/memory.c b/mm/memory.c
index c4ce987745e4..0984f398d746 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3860,15 +3860,21 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * space.  Kernel faults are handled more gracefully.
 	 */
 	if (flags & FAULT_FLAG_USER)
-		mem_cgroup_enable_oom();
+		mem_cgroup_oom_enable();
 
 	ret = __handle_mm_fault(mm, vma, address, flags);
 
-	if (flags & FAULT_FLAG_USER)
-		mem_cgroup_disable_oom();
-
-	if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)))
-		mem_cgroup_oom_synchronize();
+	if (flags & FAULT_FLAG_USER) {
+		mem_cgroup_oom_disable();
+                /*
+                 * The task may have entered a memcg OOM situation but
+                 * if the allocation error was handled gracefully (no
+                 * VM_FAULT_OOM), there is no need to kill anything.
+                 * Just clean up the OOM state peacefully.
+                 */
+                if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
+                        mem_cgroup_oom_synchronize(false);
+	}
 
 	return ret;
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 1a582e3aee3e..4d87d7c4ed2e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -704,7 +704,7 @@ void pagefault_out_of_memory(void)
 {
 	struct zonelist *zonelist;
 
-	if (mem_cgroup_oom_synchronize())
+	if (mem_cgroup_oom_synchronize(true))
 		return;
 
 	zonelist = node_zonelist(first_online_node, GFP_KERNEL);

From 252f23ea5987a4730e3399ef1ad5d78efcc786c9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 21 Nov 2014 09:23:22 -0800
Subject: [PATCH 1181/1185] Linux 3.10.61

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 9d4f30d0d201..0d5ba80786b8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 60
+SUBLEVEL = 61
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 

From da1660d32ffdc084879b55da0580e5bef9c23907 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Thu, 12 Jun 2014 22:30:34 +0530
Subject: [PATCH 1182/1185] mailbox: rename pl320-ipc specific mailbox.h

The patch 30058677 "ARM / highbank: add support for pl320 IPC"
added a pl320 IPC specific header file as a generic mailbox.h.
This file has been renamed appropriately to allow the
introduction of the generic mailbox API framework.

Acked-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
(cherry picked from commit f2fc42b6ac31f4d808da7a9da460dd433a71e976)
Signed-off-by: Mark Brown <broonie@kernel.org>

Conflicts:
	arch/arm/mach-highbank/highbank.c
---
 arch/arm/mach-highbank/highbank.c        | 1 +
 drivers/cpufreq/highbank-cpufreq.c       | 2 +-
 drivers/mailbox/pl320-ipc.c              | 2 +-
 include/linux/{mailbox.h => pl320-ipc.h} | 0
 4 files changed, 3 insertions(+), 2 deletions(-)
 rename include/linux/{mailbox.h => pl320-ipc.h} (100%)

diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index e7df2dd43a40..eec13a1fbb25 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
+#include <linux/pl320-ipc.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c
index b61b5a3fad64..3118b87a37bc 100644
--- a/drivers/cpufreq/highbank-cpufreq.c
+++ b/drivers/cpufreq/highbank-cpufreq.c
@@ -19,7 +19,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 #include <linux/platform_device.h>
 
 #define HB_CPUFREQ_CHANGE_NOTE	0x80000001
diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c
index d873cbae2fbb..f3755e0aa935 100644
--- a/drivers/mailbox/pl320-ipc.c
+++ b/drivers/mailbox/pl320-ipc.c
@@ -26,7 +26,7 @@
 #include <linux/device.h>
 #include <linux/amba/bus.h>
 
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 
 #define IPCMxSOURCE(m)		((m) * 0x40)
 #define IPCMxDSET(m)		(((m) * 0x40) + 0x004)
diff --git a/include/linux/mailbox.h b/include/linux/pl320-ipc.h
similarity index 100%
rename from include/linux/mailbox.h
rename to include/linux/pl320-ipc.h

From e28a642ae1696c5c8936a18bb82ff15b1ff5d61a Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Thu, 12 Jun 2014 22:31:19 +0530
Subject: [PATCH 1183/1185] mailbox: Introduce framework for mailbox

Introduce common framework for client/protocol drivers and
controller drivers of Inter-Processor-Communication (IPC).

Client driver developers should have a look at
 include/linux/mailbox_client.h to understand the part of
the API exposed to client drivers.
Similarly controller driver developers should have a look
at include/linux/mailbox_controller.h

Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
(cherry picked from commit 2b6d83e2b8b7de82331a6a1dcd64b51020a6031c)
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS                        |   8 +
 drivers/mailbox/Makefile           |   4 +
 drivers/mailbox/mailbox.c          | 465 +++++++++++++++++++++++++++++
 include/linux/mailbox_client.h     |  46 +++
 include/linux/mailbox_controller.h | 133 +++++++++
 5 files changed, 656 insertions(+)
 create mode 100644 drivers/mailbox/mailbox.c
 create mode 100644 include/linux/mailbox_client.h
 create mode 100644 include/linux/mailbox_controller.h

diff --git a/MAINTAINERS b/MAINTAINERS
index ad7e322ad17b..d5a14e676330 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5152,6 +5152,14 @@ S:	Maintained
 F:	drivers/net/macvlan.c
 F:	include/linux/if_macvlan.h
 
+MAILBOX API
+M:	Jassi Brar <jassisinghbrar@gmail.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/mailbox/
+F:	include/linux/mailbox_client.h
+F:	include/linux/mailbox_controller.h
+
 MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
 M:	Michael Kerrisk <mtk.manpages@gmail.com>
 W:	http://www.kernel.org/doc/man-pages
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 543ad6a79505..fefef7ebcbec 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -1 +1,5 @@
+# Generic MAILBOX API
+
+obj-$(CONFIG_MAILBOX)		+= mailbox.o
+
 obj-$(CONFIG_PL320_MBOX)	+= pl320-ipc.o
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
new file mode 100644
index 000000000000..afcb430508ec
--- /dev/null
+++ b/drivers/mailbox/mailbox.c
@@ -0,0 +1,465 @@
+/*
+ * Mailbox: Common code for Mailbox controllers and users
+ *
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitops.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox_controller.h>
+
+#define TXDONE_BY_IRQ	BIT(0) /* controller has remote RTR irq */
+#define TXDONE_BY_POLL	BIT(1) /* controller can read status of last TX */
+#define TXDONE_BY_ACK	BIT(2) /* S/W ACK recevied by Client ticks the TX */
+
+static LIST_HEAD(mbox_cons);
+static DEFINE_MUTEX(con_mutex);
+
+static int add_to_rbuf(struct mbox_chan *chan, void *mssg)
+{
+	int idx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* See if there is any space left */
+	if (chan->msg_count == MBOX_TX_QUEUE_LEN) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -ENOBUFS;
+	}
+
+	idx = chan->msg_free;
+	chan->msg_data[idx] = mssg;
+	chan->msg_count++;
+
+	if (idx == MBOX_TX_QUEUE_LEN - 1)
+		chan->msg_free = 0;
+	else
+		chan->msg_free++;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return idx;
+}
+
+static void msg_submit(struct mbox_chan *chan)
+{
+	unsigned count, idx;
+	unsigned long flags;
+	void *data;
+	int err;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	if (!chan->msg_count || chan->active_req)
+		goto exit;
+
+	count = chan->msg_count;
+	idx = chan->msg_free;
+	if (idx >= count)
+		idx -= count;
+	else
+		idx += MBOX_TX_QUEUE_LEN - count;
+
+	data = chan->msg_data[idx];
+
+	/* Try to submit a message to the MBOX controller */
+	err = chan->mbox->ops->send_data(chan, data);
+	if (!err) {
+		chan->active_req = data;
+		chan->msg_count--;
+	}
+exit:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void tx_tick(struct mbox_chan *chan, int r)
+{
+	unsigned long flags;
+	void *mssg;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	mssg = chan->active_req;
+	chan->active_req = NULL;
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	/* Submit next message */
+	msg_submit(chan);
+
+	/* Notify the client */
+	if (mssg && chan->cl->tx_done)
+		chan->cl->tx_done(chan->cl, mssg, r);
+
+	if (chan->cl->tx_block)
+		complete(&chan->tx_complete);
+}
+
+static void poll_txdone(unsigned long data)
+{
+	struct mbox_controller *mbox = (struct mbox_controller *)data;
+	bool txdone, resched = false;
+	int i;
+
+	for (i = 0; i < mbox->num_chans; i++) {
+		struct mbox_chan *chan = &mbox->chans[i];
+
+		if (chan->active_req && chan->cl) {
+			resched = true;
+			txdone = chan->mbox->ops->last_tx_done(chan);
+			if (txdone)
+				tx_tick(chan, 0);
+		}
+	}
+
+	if (resched)
+		mod_timer(&mbox->poll, jiffies +
+				msecs_to_jiffies(mbox->txpoll_period));
+}
+
+/**
+ * mbox_chan_received_data - A way for controller driver to push data
+ *				received from remote to the upper layer.
+ * @chan: Pointer to the mailbox channel on which RX happened.
+ * @mssg: Client specific message typecasted as void *
+ *
+ * After startup and before shutdown any data received on the chan
+ * is passed on to the API via atomic mbox_chan_received_data().
+ * The controller should ACK the RX only after this call returns.
+ */
+void mbox_chan_received_data(struct mbox_chan *chan, void *mssg)
+{
+	/* No buffering the received data */
+	if (chan->cl->rx_callback)
+		chan->cl->rx_callback(chan->cl, mssg);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_received_data);
+
+/**
+ * mbox_chan_txdone - A way for controller driver to notify the
+ *			framework that the last TX has completed.
+ * @chan: Pointer to the mailbox chan on which TX happened.
+ * @r: Status of last TX - OK or ERROR
+ *
+ * The controller that has IRQ for TX ACK calls this atomic API
+ * to tick the TX state machine. It works only if txdone_irq
+ * is set by the controller.
+ */
+void mbox_chan_txdone(struct mbox_chan *chan, int r)
+{
+	if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) {
+		dev_err(chan->mbox->dev,
+		       "Controller can't run the TX ticker\n");
+		return;
+	}
+
+	tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_txdone);
+
+/**
+ * mbox_client_txdone - The way for a client to run the TX state machine.
+ * @chan: Mailbox channel assigned to this client.
+ * @r: Success status of last transmission.
+ *
+ * The client/protocol had received some 'ACK' packet and it notifies
+ * the API that the last packet was sent successfully. This only works
+ * if the controller can't sense TX-Done.
+ */
+void mbox_client_txdone(struct mbox_chan *chan, int r)
+{
+	if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) {
+		dev_err(chan->mbox->dev, "Client can't run the TX ticker\n");
+		return;
+	}
+
+	tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_client_txdone);
+
+/**
+ * mbox_client_peek_data - A way for client driver to pull data
+ *			received from remote by the controller.
+ * @chan: Mailbox channel assigned to this client.
+ *
+ * A poke to controller driver for any received data.
+ * The data is actually passed onto client via the
+ * mbox_chan_received_data()
+ * The call can be made from atomic context, so the controller's
+ * implementation of peek_data() must not sleep.
+ *
+ * Return: True, if controller has, and is going to push after this,
+ *          some data.
+ *         False, if controller doesn't have any data to be read.
+ */
+bool mbox_client_peek_data(struct mbox_chan *chan)
+{
+	if (chan->mbox->ops->peek_data)
+		return chan->mbox->ops->peek_data(chan);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(mbox_client_peek_data);
+
+/**
+ * mbox_send_message -	For client to submit a message to be
+ *				sent to the remote.
+ * @chan: Mailbox channel assigned to this client.
+ * @mssg: Client specific message typecasted.
+ *
+ * For client to submit data to the controller destined for a remote
+ * processor. If the client had set 'tx_block', the call will return
+ * either when the remote receives the data or when 'tx_tout' millisecs
+ * run out.
+ *  In non-blocking mode, the requests are buffered by the API and a
+ * non-negative token is returned for each queued request. If the request
+ * is not queued, a negative token is returned. Upon failure or successful
+ * TX, the API calls 'tx_done' from atomic context, from which the client
+ * could submit yet another request.
+ * The pointer to message should be preserved until it is sent
+ * over the chan, i.e, tx_done() is made.
+ * This function could be called from atomic context as it simply
+ * queues the data and returns a token against the request.
+ *
+ * Return: Non-negative integer for successful submission (non-blocking mode)
+ *	or transmission over chan (blocking mode).
+ *	Negative value denotes failure.
+ */
+int mbox_send_message(struct mbox_chan *chan, void *mssg)
+{
+	int t;
+
+	if (!chan || !chan->cl)
+		return -EINVAL;
+
+	t = add_to_rbuf(chan, mssg);
+	if (t < 0) {
+		dev_err(chan->mbox->dev, "Try increasing MBOX_TX_QUEUE_LEN\n");
+		return t;
+	}
+
+	msg_submit(chan);
+
+	if (chan->txdone_method	== TXDONE_BY_POLL)
+		poll_txdone((unsigned long)chan->mbox);
+
+	if (chan->cl->tx_block && chan->active_req) {
+		unsigned long wait;
+		int ret;
+
+		if (!chan->cl->tx_tout) /* wait forever */
+			wait = msecs_to_jiffies(3600000);
+		else
+			wait = msecs_to_jiffies(chan->cl->tx_tout);
+
+		ret = wait_for_completion_timeout(&chan->tx_complete, wait);
+		if (ret == 0) {
+			t = -EIO;
+			tx_tick(chan, -EIO);
+		}
+	}
+
+	return t;
+}
+EXPORT_SYMBOL_GPL(mbox_send_message);
+
+/**
+ * mbox_request_channel - Request a mailbox channel.
+ * @cl: Identity of the client requesting the channel.
+ * @index: Index of mailbox specifier in 'mboxes' property.
+ *
+ * The Client specifies its requirements and capabilities while asking for
+ * a mailbox channel. It can't be called from atomic context.
+ * The channel is exclusively allocated and can't be used by another
+ * client before the owner calls mbox_free_channel.
+ * After assignment, any packet received on this channel will be
+ * handed over to the client via the 'rx_callback'.
+ * The framework holds reference to the client, so the mbox_client
+ * structure shouldn't be modified until the mbox_free_channel returns.
+ *
+ * Return: Pointer to the channel assigned to the client if successful.
+ *		ERR_PTR for request failure.
+ */
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
+{
+	struct device *dev = cl->dev;
+	struct mbox_controller *mbox;
+	struct of_phandle_args spec;
+	struct mbox_chan *chan;
+	unsigned long flags;
+	int ret;
+
+	if (!dev || !dev->of_node) {
+		pr_debug("%s: No owner device node\n", __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
+	mutex_lock(&con_mutex);
+
+	if (of_parse_phandle_with_args(dev->of_node, "mboxes",
+				       "#mbox-cells", index, &spec)) {
+		dev_dbg(dev, "%s: can't parse \"mboxes\" property\n", __func__);
+		mutex_unlock(&con_mutex);
+		return ERR_PTR(-ENODEV);
+	}
+
+	chan = NULL;
+	list_for_each_entry(mbox, &mbox_cons, node)
+		if (mbox->dev->of_node == spec.np) {
+			chan = mbox->of_xlate(mbox, &spec);
+			break;
+		}
+
+	of_node_put(spec.np);
+
+	if (!chan || chan->cl || !try_module_get(mbox->dev->driver->owner)) {
+		dev_dbg(dev, "%s: mailbox not free\n", __func__);
+		mutex_unlock(&con_mutex);
+		return ERR_PTR(-EBUSY);
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->msg_free = 0;
+	chan->msg_count = 0;
+	chan->active_req = NULL;
+	chan->cl = cl;
+	init_completion(&chan->tx_complete);
+
+	if (chan->txdone_method	== TXDONE_BY_POLL && cl->knows_txdone)
+		chan->txdone_method |= TXDONE_BY_ACK;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	ret = chan->mbox->ops->startup(chan);
+	if (ret) {
+		dev_err(dev, "Unable to startup the chan (%d)\n", ret);
+		mbox_free_channel(chan);
+		chan = ERR_PTR(ret);
+	}
+
+	mutex_unlock(&con_mutex);
+	return chan;
+}
+EXPORT_SYMBOL_GPL(mbox_request_channel);
+
+/**
+ * mbox_free_channel - The client relinquishes control of a mailbox
+ *			channel by this call.
+ * @chan: The mailbox channel to be freed.
+ */
+void mbox_free_channel(struct mbox_chan *chan)
+{
+	unsigned long flags;
+
+	if (!chan || !chan->cl)
+		return;
+
+	chan->mbox->ops->shutdown(chan);
+
+	/* The queued TX requests are simply aborted, no callbacks are made */
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->cl = NULL;
+	chan->active_req = NULL;
+	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
+		chan->txdone_method = TXDONE_BY_POLL;
+
+	module_put(chan->mbox->dev->driver->owner);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mbox_free_channel);
+
+static struct mbox_chan *
+of_mbox_index_xlate(struct mbox_controller *mbox,
+		    const struct of_phandle_args *sp)
+{
+	int ind = sp->args[0];
+
+	if (ind >= mbox->num_chans)
+		return NULL;
+
+	return &mbox->chans[ind];
+}
+
+/**
+ * mbox_controller_register - Register the mailbox controller
+ * @mbox:	Pointer to the mailbox controller.
+ *
+ * The controller driver registers its communication channels
+ */
+int mbox_controller_register(struct mbox_controller *mbox)
+{
+	int i, txdone;
+
+	/* Sanity check */
+	if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans)
+		return -EINVAL;
+
+	if (mbox->txdone_irq)
+		txdone = TXDONE_BY_IRQ;
+	else if (mbox->txdone_poll)
+		txdone = TXDONE_BY_POLL;
+	else /* It has to be ACK then */
+		txdone = TXDONE_BY_ACK;
+
+	if (txdone == TXDONE_BY_POLL) {
+		mbox->poll.function = &poll_txdone;
+		mbox->poll.data = (unsigned long)mbox;
+		init_timer(&mbox->poll);
+	}
+
+	for (i = 0; i < mbox->num_chans; i++) {
+		struct mbox_chan *chan = &mbox->chans[i];
+
+		chan->cl = NULL;
+		chan->mbox = mbox;
+		chan->txdone_method = txdone;
+		spin_lock_init(&chan->lock);
+	}
+
+	if (!mbox->of_xlate)
+		mbox->of_xlate = of_mbox_index_xlate;
+
+	mutex_lock(&con_mutex);
+	list_add_tail(&mbox->node, &mbox_cons);
+	mutex_unlock(&con_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mbox_controller_register);
+
+/**
+ * mbox_controller_unregister - Unregister the mailbox controller
+ * @mbox:	Pointer to the mailbox controller.
+ */
+void mbox_controller_unregister(struct mbox_controller *mbox)
+{
+	int i;
+
+	if (!mbox)
+		return;
+
+	mutex_lock(&con_mutex);
+
+	list_del(&mbox->node);
+
+	for (i = 0; i < mbox->num_chans; i++)
+		mbox_free_channel(&mbox->chans[i]);
+
+	if (mbox->txdone_poll)
+		del_timer_sync(&mbox->poll);
+
+	mutex_unlock(&con_mutex);
+}
+EXPORT_SYMBOL_GPL(mbox_controller_unregister);
diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
new file mode 100644
index 000000000000..307d9cab2026
--- /dev/null
+++ b/include/linux/mailbox_client.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CLIENT_H
+#define __MAILBOX_CLIENT_H
+
+#include <linux/of.h>
+#include <linux/device.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_client - User of a mailbox
+ * @dev:		The client device
+ * @tx_block:		If the mbox_send_message should block until data is
+ *			transmitted.
+ * @tx_tout:		Max block period in ms before TX is assumed failure
+ * @knows_txdone:	If the client could run the TX state machine. Usually
+ *			if the client receives some ACK packet for transmission.
+ *			Unused if the controller already has TX_Done/RTR IRQ.
+ * @rx_callback:	Atomic callback to provide client the data received
+ * @tx_done:		Atomic callback to tell client of data transmission
+ */
+struct mbox_client {
+	struct device *dev;
+	bool tx_block;
+	unsigned long tx_tout;
+	bool knows_txdone;
+
+	void (*rx_callback)(struct mbox_client *cl, void *mssg);
+	void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
+};
+
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index);
+int mbox_send_message(struct mbox_chan *chan, void *mssg);
+void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */
+bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */
+void mbox_free_channel(struct mbox_chan *chan); /* may sleep */
+
+#endif /* __MAILBOX_CLIENT_H */
diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
new file mode 100644
index 000000000000..d4cf96f07cfc
--- /dev/null
+++ b/include/linux/mailbox_controller.h
@@ -0,0 +1,133 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CONTROLLER_H
+#define __MAILBOX_CONTROLLER_H
+
+#include <linux/of.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#include <linux/completion.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_chan_ops - methods to control mailbox channels
+ * @send_data:	The API asks the MBOX controller driver, in atomic
+ *		context try to transmit a message on the bus. Returns 0 if
+ *		data is accepted for transmission, -EBUSY while rejecting
+ *		if the remote hasn't yet read the last data sent. Actual
+ *		transmission of data is reported by the controller via
+ *		mbox_chan_txdone (if it has some TX ACK irq). It must not
+ *		sleep.
+ * @startup:	Called when a client requests the chan. The controller
+ *		could ask clients for additional parameters of communication
+ *		to be provided via client's chan_data. This call may
+ *		block. After this call the Controller must forward any
+ *		data received on the chan by calling mbox_chan_received_data.
+ *		The controller may do stuff that need to sleep.
+ * @shutdown:	Called when a client relinquishes control of a chan.
+ *		This call may block too. The controller must not forward
+ *		any received data anymore.
+ *		The controller may do stuff that need to sleep.
+ * @last_tx_done: If the controller sets 'txdone_poll', the API calls
+ *		  this to poll status of last TX. The controller must
+ *		  give priority to IRQ method over polling and never
+ *		  set both txdone_poll and txdone_irq. Only in polling
+ *		  mode 'send_data' is expected to return -EBUSY.
+ *		  The controller may do stuff that need to sleep/block.
+ *		  Used only if txdone_poll:=true && txdone_irq:=false
+ * @peek_data: Atomic check for any received data. Return true if controller
+ *		  has some data to push to the client. False otherwise.
+ */
+struct mbox_chan_ops {
+	int (*send_data)(struct mbox_chan *chan, void *data);
+	int (*startup)(struct mbox_chan *chan);
+	void (*shutdown)(struct mbox_chan *chan);
+	bool (*last_tx_done)(struct mbox_chan *chan);
+	bool (*peek_data)(struct mbox_chan *chan);
+};
+
+/**
+ * struct mbox_controller - Controller of a class of communication channels
+ * @dev:		Device backing this controller
+ * @ops:		Operators that work on each communication chan
+ * @chans:		Array of channels
+ * @num_chans:		Number of channels in the 'chans' array.
+ * @txdone_irq:		Indicates if the controller can report to API when
+ *			the last transmitted data was read by the remote.
+ *			Eg, if it has some TX ACK irq.
+ * @txdone_poll:	If the controller can read but not report the TX
+ *			done. Ex, some register shows the TX status but
+ *			no interrupt rises. Ignored if 'txdone_irq' is set.
+ * @txpoll_period:	If 'txdone_poll' is in effect, the API polls for
+ *			last TX's status after these many millisecs
+ * @of_xlate:		Controller driver specific mapping of channel via DT
+ * @poll:		API private. Used to poll for TXDONE on all channels.
+ * @node:		API private. To hook into list of controllers.
+ */
+struct mbox_controller {
+	struct device *dev;
+	struct mbox_chan_ops *ops;
+	struct mbox_chan *chans;
+	int num_chans;
+	bool txdone_irq;
+	bool txdone_poll;
+	unsigned txpoll_period;
+	struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox,
+				      const struct of_phandle_args *sp);
+	/* Internal to API */
+	struct timer_list poll;
+	struct list_head node;
+};
+
+/*
+ * The length of circular buffer for queuing messages from a client.
+ * 'msg_count' tracks the number of buffered messages while 'msg_free'
+ * is the index where the next message would be buffered.
+ * We shouldn't need it too big because every transfer is interrupt
+ * triggered and if we have lots of data to transfer, the interrupt
+ * latencies are going to be the bottleneck, not the buffer length.
+ * Besides, mbox_send_message could be called from atomic context and
+ * the client could also queue another message from the notifier 'tx_done'
+ * of the last transfer done.
+ * REVISIT: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN"
+ * print, it needs to be taken from config option or somesuch.
+ */
+#define MBOX_TX_QUEUE_LEN	20
+
+/**
+ * struct mbox_chan - s/w representation of a communication chan
+ * @mbox:		Pointer to the parent/provider of this channel
+ * @txdone_method:	Way to detect TXDone chosen by the API
+ * @cl:			Pointer to the current owner of this channel
+ * @tx_complete:	Transmission completion
+ * @active_req:		Currently active request hook
+ * @msg_count:		No. of mssg currently queued
+ * @msg_free:		Index of next available mssg slot
+ * @msg_data:		Hook for data packet
+ * @lock:		Serialise access to the channel
+ * @con_priv:		Hook for controller driver to attach private data
+ */
+struct mbox_chan {
+	struct mbox_controller *mbox;
+	unsigned txdone_method;
+	struct mbox_client *cl;
+	struct completion tx_complete;
+	void *active_req;
+	unsigned msg_count, msg_free;
+	void *msg_data[MBOX_TX_QUEUE_LEN];
+	spinlock_t lock; /* Serialise access to the channel */
+	void *con_priv;
+};
+
+int mbox_controller_register(struct mbox_controller *mbox); /* can sleep */
+void mbox_controller_unregister(struct mbox_controller *mbox); /* can sleep */
+void mbox_chan_received_data(struct mbox_chan *chan, void *data); /* atomic */
+void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */
+
+#endif /* __MAILBOX_CONTROLLER_H */

From f5868ee1cc8b4afed64269d658e8bfe3931ee023 Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Tue, 22 Jul 2014 20:05:58 +0530
Subject: [PATCH 1184/1185] doc: add documentation for mailbox framework

 Some explanations with examples of how to write to implement users
and providers of the mailbox framework.

Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
(cherry picked from commit 15320fbcec69dc3a4f217044ed848e4225397e25)
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/mailbox.txt | 122 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 Documentation/mailbox.txt

diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt
new file mode 100644
index 000000000000..60f43ff629aa
--- /dev/null
+++ b/Documentation/mailbox.txt
@@ -0,0 +1,122 @@
+		The Common Mailbox Framework
+		Jassi Brar <jaswinder.singh@linaro.org>
+
+ This document aims to help developers write client and controller
+drivers for the API. But before we start, let us note that the
+client (especially) and controller drivers are likely going to be
+very platform specific because the remote firmware is likely to be
+proprietary and implement non-standard protocol. So even if two
+platforms employ, say, PL320 controller, the client drivers can't
+be shared across them. Even the PL320 driver might need to accommodate
+some platform specific quirks. So the API is meant mainly to avoid
+similar copies of code written for each platform. Having said that,
+nothing prevents the remote f/w to also be Linux based and use the
+same api there. However none of that helps us locally because we only
+ever deal at client's protocol level.
+ Some of the choices made during implementation are the result of this
+peculiarity of this "common" framework.
+
+
+
+	Part 1 - Controller Driver (See include/linux/mailbox_controller.h)
+
+ Allocate mbox_controller and the array of mbox_chan.
+Populate mbox_chan_ops, except peek_data() all are mandatory.
+The controller driver might know a message has been consumed
+by the remote by getting an IRQ or polling some hardware flag
+or it can never know (the client knows by way of the protocol).
+The method in order of preference is IRQ -> Poll -> None, which
+the controller driver should set via 'txdone_irq' or 'txdone_poll'
+or neither.
+
+
+	Part 2 - Client Driver (See include/linux/mailbox_client.h)
+
+ The client might want to operate in blocking mode (synchronously
+send a message through before returning) or non-blocking/async mode (submit
+a message and a callback function to the API and return immediately).
+
+
+struct demo_client {
+	struct mbox_client cl;
+	struct mbox_chan *mbox;
+	struct completion c;
+	bool async;
+	/* ... */
+};
+
+/*
+ * This is the handler for data received from remote. The behaviour is purely
+ * dependent upon the protocol. This is just an example.
+ */
+static void message_from_remote(struct mbox_client *cl, void *mssg)
+{
+	struct demo_client *dc = container_of(mbox_client,
+						struct demo_client, cl);
+	if (dc->aysnc) {
+		if (is_an_ack(mssg)) {
+			/* An ACK to our last sample sent */
+			return; /* Or do something else here */
+		} else { /* A new message from remote */
+			queue_req(mssg);
+		}
+	} else {
+		/* Remote f/w sends only ACK packets on this channel */
+		return;
+	}
+}
+
+static void sample_sent(struct mbox_client *cl, void *mssg, int r)
+{
+	struct demo_client *dc = container_of(mbox_client,
+						struct demo_client, cl);
+	complete(&dc->c);
+}
+
+static void client_demo(struct platform_device *pdev)
+{
+	struct demo_client *dc_sync, *dc_async;
+	/* The controller already knows async_pkt and sync_pkt */
+	struct async_pkt ap;
+	struct sync_pkt sp;
+
+	dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL);
+	dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL);
+
+	/* Populate non-blocking mode client */
+	dc_async->cl.dev = &pdev->dev;
+	dc_async->cl.rx_callback = message_from_remote;
+	dc_async->cl.tx_done = sample_sent;
+	dc_async->cl.tx_block = false;
+	dc_async->cl.tx_tout = 0; /* doesn't matter here */
+	dc_async->cl.knows_txdone = false; /* depending upon protocol */
+	dc_async->async = true;
+	init_completion(&dc_async->c);
+
+	/* Populate blocking mode client */
+	dc_sync->cl.dev = &pdev->dev;
+	dc_sync->cl.rx_callback = message_from_remote;
+	dc_sync->cl.tx_done = NULL; /* operate in blocking mode */
+	dc_sync->cl.tx_block = true;
+	dc_sync->cl.tx_tout = 500; /* by half a second */
+	dc_sync->cl.knows_txdone = false; /* depending upon protocol */
+	dc_sync->async = false;
+
+	/* ASync mailbox is listed second in 'mboxes' property */
+	dc_async->mbox = mbox_request_channel(&dc_async->cl, 1);
+	/* Populate data packet */
+	/* ap.xxx = 123; etc */
+	/* Send async message to remote */
+	mbox_send_message(dc_async->mbox, &ap);
+
+	/* Sync mailbox is listed first in 'mboxes' property */
+	dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0);
+	/* Populate data packet */
+	/* sp.abc = 123; etc */
+	/* Send message to remote in blocking mode */
+	mbox_send_message(dc_sync->mbox, &sp);
+	/* At this point 'sp' has been sent */
+
+	/* Now wait for async chan to be done */
+	wait_for_completion(&dc_async->c);
+}

From c583b1fec06304d6128f02b1bffff27202c66abf Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Tue, 22 Jul 2014 20:40:04 +0530
Subject: [PATCH 1185/1185] dt: mailbox: add generic bindings

Define generic bindings for the framework clients to
request mailbox channels.

Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
(cherry picked from commit 9f3e3cacb2ffdefe28c7cf490bf543e4dcb2770a)
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/mailbox/mailbox.txt   | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mailbox/mailbox.txt

diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt
new file mode 100644
index 000000000000..1a2cd3d266db
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt
@@ -0,0 +1,38 @@
+* Generic Mailbox Controller and client driver bindings
+
+Generic binding to provide a way for Mailbox controller drivers to
+assign appropriate mailbox channel to client drivers.
+
+* Mailbox Controller
+
+Required property:
+- #mbox-cells: Must be at least 1. Number of cells in a mailbox
+		specifier.
+
+Example:
+	mailbox: mailbox {
+		...
+		#mbox-cells = <1>;
+	};
+
+
+* Mailbox Client
+
+Required property:
+- mboxes: List of phandle and mailbox channel specifiers.
+
+Optional property:
+- mbox-names: List of identifier strings for each mailbox channel
+		required by the client. The use of this property
+		is discouraged in favor of using index in list of
+		'mboxes' while requesting a mailbox. Instead the
+		platforms may define channel indices, in DT headers,
+		to something legible.
+
+Example:
+	pwr_cntrl: power {
+		...
+		mbox-names = "pwr-ctrl", "rpc";
+		mboxes = <&mailbox 0
+			&mailbox 1>;
+	};