From 772de7be5b75e33a02d4ab79f0876fb868cf4242 Mon Sep 17 00:00:00 2001 From: Tim Murray Date: Tue, 19 Jan 2016 16:36:40 -0800 Subject: [PATCH 001/797] ANDROID: mmc: move to a SCHED_FIFO thread (cherry picked from commit 011e507b413393eab8279dac8b778ad9b6e9971b) Running mmcqd as a prio 120 thread forces it to compete with standard user processes for IO performance, especially when the system is under severe CPU load. Move it to a SCHED_FIFO thread to reduce the impact of load on IO performance. Signed-off-by: Tim Murray Bug: 25392275 Change-Id: I1edfe73baa25e181367c30c1f40fee886e92b60d --- drivers/mmc/card/queue.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 6f4323c6d653..6a4cd2bb4629 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -19,6 +19,7 @@ #include #include +#include #include "queue.h" #define MMC_QUEUE_BOUNCESZ 65536 @@ -50,6 +51,11 @@ static int mmc_queue_thread(void *d) { struct mmc_queue *mq = d; struct request_queue *q = mq->queue; + struct sched_param scheduler_params = {0}; + + scheduler_params.sched_priority = 1; + + sched_setscheduler(current, SCHED_FIFO, &scheduler_params); current->flags |= PF_MEMALLOC; From c84db235f0c9db4574760979382285fdd50ef88b Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 24 Feb 2016 11:17:02 -0800 Subject: [PATCH 002/797] Revert "mmc: core: Hold a wake lock accross delayed work + mmc rescan" Patch mmc: core: Signal wakeup event at card insert/removal provides wake lock for mmc_detect_change() This reverts commit bec7bcbb707d10b80d450f6f02384efeff294799. --- drivers/mmc/core/core.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 95fba49d2d42..96666984a103 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -29,7 +29,6 @@ #include #include #include -#include #include @@ -59,7 +58,6 @@ #define MMC_BKOPS_MAX_TIMEOUT (4 * 60 * 1000) /* max time to wait in ms */ static struct workqueue_struct *workqueue; -static struct wake_lock mmc_delayed_work_wake_lock; static const unsigned freqs[] = { 400000, 300000, 200000, 100000 }; /* @@ -76,7 +74,6 @@ module_param(use_spi_crc, bool, 0); static int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay) { - wake_lock(&mmc_delayed_work_wake_lock); return queue_delayed_work(workqueue, work, delay); } @@ -2582,7 +2579,6 @@ void mmc_rescan(struct work_struct *work) struct mmc_host *host = container_of(work, struct mmc_host, detect.work); int i; - bool extend_wakelock = false; if (host->trigger_card_event && host->ops->card_event) { host->ops->card_event(host); @@ -2644,20 +2640,14 @@ void mmc_rescan(struct work_struct *work) mmc_claim_host(host); for (i = 0; i < ARRAY_SIZE(freqs); i++) { - if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min))) { - extend_wakelock = true; + if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min))) break; - } if (freqs[i] <= host->f_min) break; } mmc_release_host(host); out: - if (extend_wakelock) - wake_lock_timeout(&mmc_delayed_work_wake_lock, HZ / 2); - else - wake_unlock(&mmc_delayed_work_wake_lock); if (host->caps & MMC_CAP_NEEDS_POLL) mmc_schedule_delayed_work(&host->detect, HZ); } @@ -2885,9 +2875,6 @@ static int __init mmc_init(void) if (!workqueue) return -ENOMEM; - wake_lock_init(&mmc_delayed_work_wake_lock, WAKE_LOCK_SUSPEND, - "mmc_delayed_work"); - ret = mmc_register_bus(); if (ret) goto destroy_workqueue; @@ -2908,7 +2895,6 @@ static int __init mmc_init(void) mmc_unregister_bus(); destroy_workqueue: destroy_workqueue(workqueue); - wake_lock_destroy(&mmc_delayed_work_wake_lock); return ret; } @@ -2919,7 +2905,6 @@ static void __exit mmc_exit(void) mmc_unregister_host_class(); mmc_unregister_bus(); destroy_workqueue(workqueue); - wake_lock_destroy(&mmc_delayed_work_wake_lock); } subsys_initcall(mmc_init); From 7187712e8d53b99c64b774011945d601345ddf36 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 24 Feb 2016 12:45:14 -0800 Subject: [PATCH 003/797] Revert "mmc: Extend wakelock if bus is dead" This reverts commit dde72f9e313fc52d467ef0aad41cecd2c9f9f212. --- drivers/mmc/core/core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 96666984a103..3e54185bc985 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2605,12 +2605,6 @@ void mmc_rescan(struct work_struct *work) host->detect_change = 0; - /* If the card was removed the bus will be marked - * as dead - extend the wakelock so userspace - * can respond */ - if (host->bus_dead) - extend_wakelock = 1; - /* * Let mmc_bus_put() free the bus/bus_ops if we've found that * the card is no longer present. From c3b67e21677e5967e04e84861f7b67b456d7a7c4 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:02 +0900 Subject: [PATCH 004/797] net: diag: split inet_diag_dump_one_icsk into two Currently, inet_diag_dump_one_icsk finds a socket and then dumps its information to userspace. Split it into a part that finds the socket and a part that dumps the information. [cherry-pick of net-next b613f56ec9baf30edf5d9d607b822532a273dad7] Change-Id: I144765afb6ff1cd66eb4757c9418112fb0b08a6f Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 5 +++++ net/ipv4/inet_diag.c | 42 +++++++++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 0e707f0c1a3e..e7032f041982 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -3,6 +3,7 @@ #include +struct net; struct sock; struct inet_hashinfo; struct nlattr; @@ -41,6 +42,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req); +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + const struct inet_diag_req_v2 *req); + int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); extern int inet_diag_register(const struct inet_diag_handler *handler); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ab9f8a66615d..cfabb8f8f0a0 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, nlmsg_flags, unlh); } -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, - const struct nlmsghdr *nlh, - const struct inet_diag_req_v2 *req) +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + const struct inet_diag_req_v2 *req) { - struct net *net = sock_net(in_skb->sk); - struct sk_buff *rep; struct sock *sk; - int err; - err = -EINVAL; if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], @@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, req->id.idiag_if); #endif else - goto out_nosk; + return ERR_PTR(-EINVAL); - err = -ENOENT; if (!sk) - goto out_nosk; + return ERR_PTR(-ENOENT); - err = sock_diag_check_cookie(sk, req->id.idiag_cookie); - if (err) - goto out; + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_gen_put(sk); + return ERR_PTR(-ENOENT); + } + + return sk; +} +EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); + +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = inet_diag_find_one_icsk(net, hashinfo, req); + if (IS_ERR(sk)) + return PTR_ERR(sk); rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); if (!rep) { @@ -409,7 +422,6 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, if (sk) sock_gen_put(sk); -out_nosk: return err; } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); From 7ae15a607bb106627859da621bd7543d44a6b61d Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:03 +0900 Subject: [PATCH 005/797] net: diag: Add the ability to destroy a socket. This patch adds a SOCK_DESTROY operation, a destroy function pointer to sock_diag_handler, and a diag_destroy function pointer. It does not include any implementation code. [backport of net-next 64be0aed59ad519d6f2160868734f7e278290ac1] Change-Id: Ic5327ff14b39dd268083ee4c1dc2c934b2820df5 Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 2 ++ include/net/sock.h | 1 + include/uapi/linux/sock_diag.h | 1 + net/core/sock_diag.c | 23 ++++++++++++++++++++--- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index fddebc617469..4018b48f2b3b 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -15,6 +15,7 @@ struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); + int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk) } void sock_diag_broadcast_destroy(struct sock *sk); +int sock_diag_destroy(struct sock *sk, int err); #endif diff --git a/include/net/sock.h b/include/net/sock.h index 14d3c0734007..2d663ee8494d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1067,6 +1067,7 @@ struct proto { void (*destroy_cgroup)(struct mem_cgroup *memcg); struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); #endif + int (*diag_destroy)(struct sock *sk, int err); }; int proto_register(struct proto *prot, int alloc_slab); diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index 49230d36f9ce..84e66ed670be 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -4,6 +4,7 @@ #include #define SOCK_DIAG_BY_FAMILY 20 +#define SOCK_DESTROY_BACKPORT 21 struct sock_diag_req { __u8 sdiag_family; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 0c1d58d43f67..3963c3872c69 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) } EXPORT_SYMBOL_GPL(sock_diag_unregister); -static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) { int err; struct sock_diag_req *req = nlmsg_data(nlh); @@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) hndl = sock_diag_handlers[req->sdiag_family]; if (hndl == NULL) err = -ENOENT; - else + else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY) err = hndl->dump(skb, nlh); + else if (nlh->nlmsg_type == SOCK_DESTROY_BACKPORT && hndl->destroy) + err = hndl->destroy(skb, nlh); + else + err = -EOPNOTSUPP; mutex_unlock(&sock_diag_table_mutex); return err; @@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ret; case SOCK_DIAG_BY_FAMILY: - return __sock_diag_rcv_msg(skb, nlh); + case SOCK_DESTROY_BACKPORT: + return __sock_diag_cmd(skb, nlh); default: return -EINVAL; } @@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group) return 0; } +int sock_diag_destroy(struct sock *sk, int err) +{ + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!sk->sk_prot->diag_destroy) + return -EOPNOTSUPP; + + return sk->sk_prot->diag_destroy(sk, err); +} +EXPORT_SYMBOL_GPL(sock_diag_destroy); + static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { From 1046108bbba31dc9f080c176647fa59f50118622 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:04 +0900 Subject: [PATCH 006/797] net: diag: Support SOCK_DESTROY for inet sockets. This passes the SOCK_DESTROY operation to the underlying protocol diag handler, or returns -EOPNOTSUPP if that handler does not define a destroy operation. Most of this patch is just renaming functions. This is not strictly necessary, but it would be fairly counterintuitive to have the code to destroy inet sockets be in a function whose name starts with inet_diag_get. [backport of net-next 6eb5d2e08f071c05ecbe135369c9ad418826cab2] Change-Id: Idc13a7def20f492a5323ad2f8de105426293bd37 Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 4 ++++ net/ipv4/inet_diag.c | 23 +++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index e7032f041982..7c27fa1030e8 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,6 +24,10 @@ struct inet_diag_handler { void (*idiag_get_info)(struct sock *sk, struct inet_diag_msg *r, void *info); + + int (*destroy)(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req); + __u16 idiag_type; __u16 idiag_info_size; }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index cfabb8f8f0a0..27c5fd5747d8 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -426,7 +426,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -static int inet_diag_get_exact(struct sk_buff *in_skb, +static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req) { @@ -436,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, handler = inet_diag_lock_handler(req->sdiag_protocol); if (IS_ERR(handler)) err = PTR_ERR(handler); - else + else if (cmd == SOCK_DIAG_BY_FAMILY) err = handler->dump_one(in_skb, nlh, req); + else if (cmd == SOCK_DESTROY_BACKPORT && handler->destroy) + err = handler->destroy(in_skb, req); + else + err = -EOPNOTSUPP; inet_diag_unlock_handler(handler); return err; @@ -950,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - return inet_diag_get_exact(in_skb, nlh, &req); + return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -984,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact_compat(skb, nlh); } -static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); struct net *net = sock_net(skb->sk); @@ -992,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) { + if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && + h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; @@ -1011,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } } - return inet_diag_get_exact(skb, h, nlmsg_data(h)); + return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); } static @@ -1062,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) static const struct sock_diag_handler inet_diag_handler = { .family = AF_INET, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; static const struct sock_diag_handler inet6_diag_handler = { .family = AF_INET6, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; int inet_diag_register(const struct inet_diag_handler *h) From 69f0e89f55b7667889a638d19e3a101ce7ef4f7f Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:05 +0900 Subject: [PATCH 007/797] net: diag: Support destroying TCP sockets. This implements SOCK_DESTROY for TCP sockets. It causes all blocking calls on the socket to fail fast with ECONNABORTED and causes a protocol close of the socket. It informs the other end of the connection by sending a RST, i.e., initiating a TCP ABORT as per RFC 793. ECONNABORTED was chosen for consistency with FreeBSD. [cherry-pick of net-next c1e64e298b8cad309091b95d8436a0255c84f54a] Change-Id: I728a01ef03f2ccfb9016a3f3051ef00975980e49 Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/Kconfig | 13 +++++++++++++ net/ipv4/tcp.c | 32 ++++++++++++++++++++++++++++++++ net/ipv4/tcp_diag.c | 19 +++++++++++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 6 files changed, 68 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index c585ab6d7c76..5f4d135a00cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1171,6 +1171,8 @@ void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); +int tcp_abort(struct sock *sk, int err); + static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 416dfa004cfb..c22920525e5d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -436,6 +436,19 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_DIAG_DESTROY + bool "INET: allow privileged process to administratively close sockets" + depends on INET_DIAG + default n + ---help--- + Provides a SOCK_DESTROY operation that allows privileged processes + (e.g., a connection manager or a network administration tool such as + ss) to close sockets opened by other processes. Closing a socket in + this way interrupts any blocking read/write/connect operations on + the socket and causes future socket calls to behave as if the socket + had been disconnected. + If unsure, say N. + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c2b1f02ea155..6033a270843a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3101,6 +3101,38 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); +int tcp_abort(struct sock *sk, int err) +{ + if (!sk_fullsock(sk)) { + sock_gen_put(sk); + return -EOPNOTSUPP; + } + + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); + + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ + local_bh_disable(); + bh_lock_sock(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_err = err; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); + } + + bh_unlock_sock(sk); + local_bh_enable(); + release_sock(sk); + sock_put(sk); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_abort); + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b31604086edd..4d610934fb39 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -10,6 +10,8 @@ */ #include +#include +#include #include #include @@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int tcp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return sock_diag_destroy(sk, ECONNABORTED); +} +#endif + static const struct inet_diag_handler tcp_diag_handler = { .dump = tcp_diag_dump, .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = tcp_diag_destroy, +#endif }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8c7e63163e92..c478092172f2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2351,6 +2351,7 @@ struct proto tcp_prot = { .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, #endif + .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 365af3c8145d..f85b4c44c00d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1893,6 +1893,7 @@ struct proto tcpv6_prot = { .proto_cgroup = tcp_proto_cgroup, #endif .clear_sk = tcp_v6_clear_sk, + .diag_destroy = tcp_abort, }; static const struct inet6_protocol tcpv6_protocol = { From 2a5cf317a11d19c2363502e3ec311cd601009221 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Dec 2015 16:14:11 -0800 Subject: [PATCH 008/797] tcp: diag: add support for request sockets to tcp_abort() Adding support for SYN_RECV request sockets to tcp_abort() is quite easy after our tcp listener rewrite. Note that we also need to better handle listeners, or we might leak not yet accepted children, because of a missing inet_csk_listen_stop() call. [cherry-pick of net-next 07f6f4a31e5a8dee67960fc07bb0b37c5f879d4d] Change-Id: I8ec6b2e6ec24f330a69595abf1d5469ace79b3fd Signed-off-by: Eric Dumazet Cc: Lorenzo Colitti Tested-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6033a270843a..6d06fbf6654f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3104,6 +3104,15 @@ EXPORT_SYMBOL_GPL(tcp_done); int tcp_abort(struct sock *sk, int err) { if (!sk_fullsock(sk)) { + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); + + local_bh_disable(); + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, + req); + local_bh_enable(); + return 0; + } sock_gen_put(sk); return -EOPNOTSUPP; } From 58281e63ffb4c31d120709a090086c2c151897f5 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 22 Dec 2015 00:03:44 +0900 Subject: [PATCH 009/797] net: tcp: deal with listen sockets properly in tcp_abort. When closing a listen socket, tcp_abort currently calls tcp_done without clearing the request queue. If the socket has a child socket that is established but not yet accepted, the child socket is then left without a parent, causing a leak. Fix this by setting the socket state to TCP_CLOSE and calling inet_csk_listen_stop with the socket lock held, like tcp_close does. Tested using net_test. With this patch, calling SOCK_DESTROY on a listen socket that has an established but not yet accepted child socket results in the parent and the child being closed, such that they no longer appear in sock_diag dumps. [cherry-pick of net-next 2010b93e9317cc12acd20c4aed385af7f9d1681e] Change-Id: I0555a142f11d8b36362ffd7c8ef4a5ecae8987c9 Reported-by: Eric Dumazet Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6d06fbf6654f..7c0465202cc5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3120,6 +3120,11 @@ int tcp_abort(struct sock *sk, int err) /* Don't race with userspace socket closes such as tcp_close. */ lock_sock(sk); + if (sk->sk_state == TCP_LISTEN) { + tcp_set_state(sk, TCP_CLOSE); + inet_csk_listen_stop(sk); + } + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ local_bh_disable(); bh_lock_sock(sk); From 8538ccf40b8c5c480ee232fa948164a0b4dc166b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jan 2016 16:25:01 -0800 Subject: [PATCH 010/797] net: diag: support v4mapped sockets in inet_diag_find_one_icsk() Lorenzo reported that we could not properly find v4mapped sockets in inet_diag_find_one_icsk(). This patch fixes the issue. [cherry-pick of fc439d9489479411fbf9bbbec2c768df89e85503] Change-Id: I13515e83fb76d4729f00047f9eb142c929390fb2 Reported-by: Lorenzo Colitti Signed-off-by: Eric Dumazet --- net/ipv4/inet_diag.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 27c5fd5747d8..a403a676d452 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -361,13 +361,20 @@ struct sock *inet_diag_find_one_icsk(struct net *net, req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); #if IS_ENABLED(CONFIG_IPV6) - else if (req->sdiag_family == AF_INET6) - sk = inet6_lookup(net, hashinfo, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - req->id.idiag_if); + else if (req->sdiag_family == AF_INET6) { + if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && + ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) + sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3], + req->id.idiag_dport, req->id.idiag_src[3], + req->id.idiag_sport, req->id.idiag_if); + else + sk = inet6_lookup(net, hashinfo, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if); + } #endif else return ERR_PTR(-EINVAL); From 4f27b251f23643c115fce7e36f31268deed6e71b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 29 Feb 2016 17:38:34 -0800 Subject: [PATCH 011/797] ANDROID: net: fix 'const' warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See the following build log splats. The sock_i_uid() helper doesn't quite treat the parameter as 'const' (it acquires a member lock), but this cast is the same approach taken by other callers in this file, so I don't feel too bad about the fix. CC net/ipv4/inet_connection_sock.o CC net/ipv6/inet6_connection_sock.o net/ipv6/inet6_connection_sock.c: In function ‘inet6_csk_route_req’: net/ipv6/inet6_connection_sock.c:89:2: warning: passing argument 1 of ‘sock_i_uid’ discards ‘const’ qualifier from pointer target type [enabled by default] In file included from include/linux/tcp.h:22:0, from include/linux/ipv6.h:73, from net/ipv6/inet6_connection_sock.c:18: include/net/sock.h:1689:8: note: expected ‘struct sock *’ but argument is of type ‘const struct sock *’ net/ipv4/inet_connection_sock.c: In function ‘inet_csk_route_req’: net/ipv4/inet_connection_sock.c:423:7: warning: passing argument 1 of ‘sock_i_uid’ discards ‘const’ qualifier from pointer target type [enabled by default] In file included from include/net/inet_sock.h:27:0, from include/net/inet_connection_sock.h:23, from net/ipv4/inet_connection_sock.c:19: include/net/sock.h:1689:8: note: expected ‘struct sock *’ but argument is of type ‘const struct sock *’ net/ipv4/inet_connection_sock.c: In function ‘inet_csk_route_child_sock’: net/ipv4/inet_connection_sock.c:460:7: warning: passing argument 1 of ‘sock_i_uid’ discards ‘const’ qualifier from pointer target type [enabled by default] In file included from include/net/inet_sock.h:27:0, from include/net/inet_connection_sock.h:23, from net/ipv4/inet_connection_sock.c:19: include/net/sock.h:1689:8: note: expected ‘struct sock *’ but argument is of type ‘const struct sock *’ Change-Id: I5c156fc1a81f90323717bffd93c31d205b85620c Signed-off-by: Brian Norris --- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv6/inet6_connection_sock.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 759f90e1e499..030cd09dd2a2 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -420,7 +420,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num), sock_i_uid(sk)); + htons(ireq->ir_num), sock_i_uid((struct sock *)sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -457,7 +457,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num), sock_i_uid(sk)); + htons(ireq->ir_num), sock_i_uid((struct sock *)sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 7b214652768e..897bb6eb5751 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -86,7 +86,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); - fl6->flowi6_uid = sock_i_uid(sk); + fl6->flowi6_uid = sock_i_uid((struct sock *)sk); security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); From 2e9117dd33110315f302d4bf3afb11561c29af21 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 29 Feb 2016 17:40:05 -0800 Subject: [PATCH 012/797] ANDROID: lowmemorykiller: fix declaration order warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/staging/android/lowmemorykiller.c: In function ‘lowmem_scan’: drivers/staging/android/lowmemorykiller.c:174:3: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement] Change-Id: I9de6cf2c374bc43131725a7ed666a033a4449ea9 Signed-off-by: Brian Norris --- drivers/staging/android/lowmemorykiller.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 4cc3d02eacfb..af49af0cca01 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -161,6 +161,10 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) p->comm, p->pid, oom_score_adj, tasksize); } if (selected) { + long cache_size = other_file * (long)(PAGE_SIZE / 1024); + long cache_limit = minfree * (long)(PAGE_SIZE / 1024); + long free = other_free * (long)(PAGE_SIZE / 1024); + task_lock(selected); send_sig(SIGKILL, selected, 0); /* @@ -171,9 +175,6 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) if (selected->mm) mark_oom_victim(selected); task_unlock(selected); - long cache_size = other_file * (long)(PAGE_SIZE / 1024); - long cache_limit = minfree * (long)(PAGE_SIZE / 1024); - long free = other_free * (long)(PAGE_SIZE / 1024); trace_lowmemory_kill(selected, cache_size, cache_limit, free); lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \ " to free %ldkB on behalf of '%s' (%d) because\n" \ From 4ebd433b3d2c1d26425429d6192da63965fbbc7e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 29 Feb 2016 17:44:51 -0800 Subject: [PATCH 013/797] ANDROID: usb: gadget: f_mtp: don't use le16 for u8 field The 'bCount' field is u8. Noticed by this warning: drivers/usb/gadget/function/f_mtp.c:264:3: warning: large integer implicitly truncated to unsigned type [-Woverflow] Change-Id: Ie82dfd1a8986ecd3acf143e41c46822f0d1aca4f Signed-off-by: Brian Norris --- drivers/usb/gadget/function/f_mtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index aec7b8d61fe7..8f80a7e91314 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -261,7 +261,7 @@ struct { .dwLength = __constant_cpu_to_le32(sizeof(mtp_ext_config_desc)), .bcdVersion = __constant_cpu_to_le16(0x0100), .wIndex = __constant_cpu_to_le16(4), - .bCount = __constant_cpu_to_le16(1), + .bCount = 1, }, .function = { .bFirstInterfaceNumber = 0, From 49f63e15598e09f24e0528e7cad18d6dec2ba804 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 29 Feb 2016 17:42:29 -0800 Subject: [PATCH 014/797] ANDROID: kernel/watchdog: fix unused variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kernel/watchdog.c:122:22: warning: ‘hardlockup_allcpu_dumped’ defined but not used [-Wunused-variable] Change-Id: I99e97e7cc31b589cd674fd4495832c9ef036d0b9 Signed-off-by: Brian Norris --- kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 25955235ecdd..e864906af3fc 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -119,7 +119,7 @@ static unsigned long soft_lockup_nmi_warn; #ifdef CONFIG_HARDLOCKUP_DETECTOR unsigned int __read_mostly hardlockup_panic = CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; -static unsigned long hardlockup_allcpu_dumped; +static unsigned long __maybe_unused hardlockup_allcpu_dumped; /* * We may not want to enable hard lockup detection by default in all cases, * for example when running the kernel as a guest on a hypervisor. In these From 13712cceef9b7f1a585ab83564e86b115dd41134 Mon Sep 17 00:00:00 2001 From: Dylan Reid Date: Mon, 2 Mar 2015 17:09:07 -0800 Subject: [PATCH 015/797] ANDROID: mmc: Move tracepoint creation and export symbols Move the tracepoint creation to core from card, as core shouldn't depend on card. Also add EXPORT_SYMBOL_GPL calls to enable module build. Change-Id: Ie39fcdadc0516df99600d0963efe09b6cd7a9bf8 Signed-off-by: Dylan Reid (cherry picked from commit da5fbd1e7e50fee3a8271f50d25c848d0ede64b3, from android-3.14) Signed-off-by: Brian Norris --- drivers/mmc/card/block.c | 1 - drivers/mmc/core/core.c | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 21aa3244029b..90e9738a129a 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -36,7 +36,6 @@ #include #include -#define CREATE_TRACE_POINTS #include #include diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 3e54185bc985..9fab52559a8c 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -30,6 +30,7 @@ #include #include +#define CREATE_TRACE_POINTS #include #include @@ -48,6 +49,11 @@ #include "sd_ops.h" #include "sdio_ops.h" +EXPORT_TRACEPOINT_SYMBOL_GPL(mmc_blk_erase_start); +EXPORT_TRACEPOINT_SYMBOL_GPL(mmc_blk_erase_end); +EXPORT_TRACEPOINT_SYMBOL_GPL(mmc_blk_rw_start); +EXPORT_TRACEPOINT_SYMBOL_GPL(mmc_blk_rw_end); + /* If the device is not responding */ #define MMC_CORE_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */ From 467971a1a728971f22d8642266bf6a8cb0ec6cb4 Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Tue, 10 Nov 2015 14:11:46 -0800 Subject: [PATCH 016/797] ANDROID: mmc: sdio: Disable retuning in sdio_reset_comm() Since sdio_reset_comm() re-initializes the SDIO card, disable retuning before idling and shutting down the card. Tuning will be re-enabled (if necessary) in mmc_sdio_init_card(). BUG=chrome-os-partner:46444 TEST=With CL:311815, toggle WiFi on/off on Smaug and observe that the WiFi card comes back up and is able to tune successfully. Change-Id: Ib4a5cfd4d75fc9e3ed7bb3f1e2ffd30de16c5d28 Signed-off-by: Andrew Bresticker Reviewed-on: https://chromium-review.googlesource.com/311797 Reviewed-by: Derek Basehore [briannorris: brought from Chromium kernel in 3.18 -> 4.4 rebase] Signed-off-by: Brian Norris --- drivers/mmc/core/sdio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 09e100ba22b0..b47957122fd7 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -22,6 +22,7 @@ #include "core.h" #include "bus.h" +#include "host.h" #include "sd.h" #include "sdio_bus.h" #include "mmc_ops.h" @@ -1228,6 +1229,8 @@ int sdio_reset_comm(struct mmc_card *card) printk("%s():\n", __func__); mmc_claim_host(host); + mmc_retune_disable(host); + mmc_go_idle(host); mmc_set_clock(host, host->f_min); From a9c7e0955a71e12167d763ef286c80acb5e5fb45 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 1 Mar 2016 09:44:17 -0800 Subject: [PATCH 017/797] net: pppolac/pppopns: Replace msg.msg_iov with iov_iter_kvec() Commit 1af89c1ef3b6 ("Hack: net: PPPoPNS and PPPoLAC build fixes for 4.1") fixed the build for PPPoPNS and PPPoLAC by re-introducing a field in struct msghdr which was removed upstream. Re-introducing the field doesn't get it used, so it is quite likely that the code never worked. Fix it up for good. Fixes: 1af89c1ef3b6 ("Hack: net: PPPoPNS and PPPoLAC build fixes for 4.1") Signed-off-by: Guenter Roeck --- drivers/net/ppp/pppolac.c | 9 ++++----- drivers/net/ppp/pppopns.c | 9 ++++----- include/linux/socket.h | 4 ---- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c index 1b8180cc1d4d..0184c96579e9 100644 --- a/drivers/net/ppp/pppolac.c +++ b/drivers/net/ppp/pppolac.c @@ -206,11 +206,10 @@ static void pppolac_xmit_core(struct work_struct *delivery_work) while ((skb = skb_dequeue(&delivery_queue))) { struct sock *sk_udp = skb->sk; struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len}; - struct msghdr msg = { - .msg_iov = (struct iovec *)&iov, - .msg_iovlen = 1, - .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT, - }; + struct msghdr msg = { 0 }; + + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, + skb->len); sk_udp->sk_prot->sendmsg(sk_udp, &msg, skb->len); kfree_skb(skb); } diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c index 568bb45cfeac..d9e06039794e 100644 --- a/drivers/net/ppp/pppopns.c +++ b/drivers/net/ppp/pppopns.c @@ -189,11 +189,10 @@ static void pppopns_xmit_core(struct work_struct *delivery_work) while ((skb = skb_dequeue(&delivery_queue))) { struct sock *sk_raw = skb->sk; struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len}; - struct msghdr msg = { - .msg_iov = (struct iovec *)&iov, - .msg_iovlen = 1, - .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT, - }; + struct msghdr msg = { 0 }; + + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, + skb->len); sk_raw->sk_prot->sendmsg(sk_raw, &msg, skb->len); kfree_skb(skb); } diff --git a/include/linux/socket.h b/include/linux/socket.h index 18a8337c8959..5bf59c8493b7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -47,10 +47,6 @@ struct linger { struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ -#if defined(CONFIG_PPPOLAC) || defined(CONFIG_PPPOPNS) - struct iovec *msg_iov; /* scatter/gather array */ - __kernel_size_t msg_iovlen; /* # elements in msg_iov */ -#endif struct iov_iter msg_iter; /* data */ void *msg_control; /* ancillary data */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ From 9752f90bc23d8ee78ee9d8613b89436e4fc505af Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 1 Mar 2016 09:47:32 -0800 Subject: [PATCH 018/797] net: ppp: Fix modular build for PPPOLAC and PPPOPNS Unlike other configurations in net/ppp, PPPOLAC and PPPOPNS are defined as boolean configuration options. In allmodconfig builds (or, specifically, if PPP and some of the other PPP protocols were built as modules), this resulted in build errors such as the following, since pppox was built both as module and into the kernel. ERROR: "pppox_ioctl" [net/l2tp/l2tp_ppp.ko] undefined! ERROR: "unregister_pppox_proto" [net/l2tp/l2tp_ppp.ko] undefined! ERROR: "register_pppox_proto" [net/l2tp/l2tp_ppp.ko] undefined! ERROR: "pppox_unbind_sock" [net/l2tp/l2tp_ppp.ko] undefined! Fix the problem by defining PPPOLAC and PPPOPNS tristate. Signed-off-by: Guenter Roeck --- drivers/net/ppp/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig index e4cf44b1e815..282aec4860eb 100644 --- a/drivers/net/ppp/Kconfig +++ b/drivers/net/ppp/Kconfig @@ -150,7 +150,7 @@ config PPPOL2TP if TTY config PPPOLAC - bool "PPP on L2TP Access Concentrator" + tristate "PPP on L2TP Access Concentrator" depends on PPP && INET help L2TP (RFC 2661) is a tunneling protocol widely used in virtual private @@ -159,7 +159,7 @@ config PPPOLAC fairly simple and suited for clients. config PPPOPNS - bool "PPP on PPTP Network Server" + tristate "PPP on PPTP Network Server" depends on PPP && INET help PPTP (RFC 2637) is a tunneling protocol widely used in virtual private From 74260562e2fed255ca80f31f74140f9d493ce290 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 1 Mar 2016 09:52:27 -0800 Subject: [PATCH 019/797] video: adf: Fix modular build Builds with ADF configured as module fail the following errors. ERROR: "adf_fops" [drivers/video/adf/adf_sysfs.ko] undefined! ERROR: "adf_obj_sysfs_find" [drivers/video/adf/adf_fops.ko] undefined! ERROR: "adf_buffer_cleanup" [drivers/video/adf/adf_fops.ko] undefined! ERROR: "adf_attachment_validate" [drivers/video/adf/adf_client.ko] undefined! ERROR: "adf_attachment_find" [drivers/video/adf/adf_client.ko] undefined! ERROR: "adf_buffer_mapping_cleanup" [drivers/video/adf/adf_client.ko] undefined! ERROR: "adf_attachment_free" [drivers/video/adf/adf_client.ko] undefined! ERROR: "adf_obj_find_event_refcount" [drivers/video/adf/adf_client.ko] undefined! ERROR: "adf_file_queue_event" [drivers/video/adf/adf.ko] undefined! ERROR: "adf_interface_sysfs_init" [drivers/video/adf/adf.ko] undefined! ERROR: "adf_interface_sysfs_destroy" [drivers/video/adf/adf.ko] undefined! ERROR: "adf_device_sysfs_init" [drivers/video/adf/adf.ko] undefined! ERROR: "adf_device_sysfs_destroy" [drivers/video/adf/adf.ko] undefined! ERROR: "adf_sysfs_destroy" [drivers/video/adf/adf.ko] undefined! ERROR: "adf_overlay_engine_sysfs_init" [drivers/video/adf/adf.ko] undefined! ERROR: "adf_overlay_engine_sysfs_destroy" [drivers/video/adf/adf.ko] undefined! ERROR: "adf_sysfs_init" [drivers/video/adf/adf.ko] undefined! If ADF is configured as module, each of the object files ends up being a separate module. Since the functions are used across the various files but not exported, this results in the observed build errors. Modify the Makefile to create a single module instead. Fixes: 066a50cee536 ("video: add atomic display framework") Signed-off-by: Guenter Roeck --- drivers/video/adf/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/video/adf/Makefile b/drivers/video/adf/Makefile index 78d0915122f4..cdf34a666dc7 100644 --- a/drivers/video/adf/Makefile +++ b/drivers/video/adf/Makefile @@ -2,13 +2,15 @@ ccflags-y := -Idrivers/staging/android CFLAGS_adf.o := -I$(src) -obj-$(CONFIG_ADF) += adf.o \ +obj-$(CONFIG_ADF) += adf_core.o + +adf_core-y := adf.o \ adf_client.o \ adf_fops.o \ adf_format.o \ adf_sysfs.o -obj-$(CONFIG_COMPAT) += adf_fops32.o +adf_core-$(CONFIG_COMPAT) += adf_fops32.o obj-$(CONFIG_ADF_FBDEV) += adf_fbdev.o From c37e85aae233519567c82a34a6201cf95f1280a6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 3 Mar 2016 09:30:33 -0800 Subject: [PATCH 020/797] video: adf: Set ADF_MEMBLOCK to boolean Attempts to build with CONFIG_ADF_MEMBLOCK=m result in the following build error. ERROR: "memblock_free" [drivers/video/adf/adf_memblock.ko] undefined! memblock_free() is marked as __init_memblock, so exporting it seems to be a bad idea. All other callers are only configurable into the kernel, so do the same with ADF_MEMBLOCK. Signed-off-by: Guenter Roeck --- drivers/video/adf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/adf/Kconfig b/drivers/video/adf/Kconfig index 33858b73d8bb..2777db48fae0 100644 --- a/drivers/video/adf/Kconfig +++ b/drivers/video/adf/Kconfig @@ -11,4 +11,4 @@ menuconfig ADF_FBDEV menuconfig ADF_MEMBLOCK depends on ADF depends on HAVE_MEMBLOCK - tristate "Helper for using memblocks as buffers in ADF drivers" + bool "Helper for using memblocks as buffers in ADF drivers" From 267687882304f817e75e7f9a439ecb5b8f579a34 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 3 Mar 2016 09:44:44 -0800 Subject: [PATCH 021/797] drivers: power: use 'current' instead of 'get_current()' get_current() to get the current thread pointer is not defined for all architectures. This results in the following build error for several architectures (s390, powerpc, and possibly others). drivers/base/power/main.c: In function '__device_suspend': drivers/base/power/main.c:1415:2: error: implicit declaration of function 'get_current' Use 'current' instead. Also include asm/current.h instead of depending on an implicit include. Fixes: ad86cc8ad632 ("drivers: power: Add watchdog timer to catch drivers which lockup during suspend." Signed-off-by: Guenter Roeck --- drivers/base/power/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index a54d810f2966..6ed8b9326629 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -35,6 +35,8 @@ #include #include +#include + #include "../base.h" #include "power.h" @@ -1412,7 +1414,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) goto Complete; data.dev = dev; - data.tsk = get_current(); + data.tsk = current; init_timer_on_stack(&timer); timer.expires = jiffies + HZ * 12; timer.function = dpm_drv_timeout; From 860df91e2aa0da046c727ea87ad7f1176a20665d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 3 Mar 2016 10:33:40 -0800 Subject: [PATCH 022/797] PM / suspend: Add dependency on RTC_LIB Commit 1eff8f99f9f9 ("PM / Suspend: Print wall time at suspend entry and exit") calls rtc_time_to_tm(), which in turn calls rtc_time64_to_tm(). Since RTC_LIB is not mandatory for all architetures, this can result in the following build error. suspend.c:(.text+0x2f36c): undefined reference to `rtc_time64_to_tm' rtc_time64_to_tm() is implemented in rtc-lib, so SUSPEND now needs to select RTC_LIB. Fixes: 1eff8f99f9f9 ("PM / Suspend: Print wall time at suspend entry and exit") Signed-off-by: Guenter Roeck --- kernel/power/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 84c480946fb2..6d6f63be1f9b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -1,6 +1,7 @@ config SUSPEND bool "Suspend to RAM and standby" depends on ARCH_SUSPEND_POSSIBLE + select RTC_LIB default y ---help--- Allow the system to enter sleep states in which main memory is From 486057e23399589d5d2b904bfbfcd48ea9c72066 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 4 Mar 2016 07:22:27 -0800 Subject: [PATCH 023/797] power: Provide dummy log_suspend_abort_reason() if SUSPEND is disabled The API to log the suspend reason was introduced with commit 57caa2ad5ce3 ("power: Adds functionality to log the last suspend abort reason."). It is called from functions enabled with PM_SLEEP and from functions enabled with SUSPEND, but only available if SUSPEND is enabled. This can result in build failures such as the following if PM_SLEEP is enabled, but SUSPEND is not. kernel/built-in.o: In function `try_to_freeze_tasks': process.c:(.text+0x30928): undefined reference to `log_suspend_abort_reason' drivers/built-in.o: In function `syscore_suspend': (.text+0x6e250): undefined reference to `log_suspend_abort_reason' drivers/built-in.o: In function `__device_suspend': main.c:(.text+0x7a528): undefined reference to `log_suspend_abort_reason' Fixes: 57caa2ad5ce3 ("power: Adds functionality to log the last suspend abort reason.") Signed-off-by: Guenter Roeck --- include/linux/wakeup_reason.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h index ad8b76936c7f..d84d8c301546 100644 --- a/include/linux/wakeup_reason.h +++ b/include/linux/wakeup_reason.h @@ -21,7 +21,12 @@ #define MAX_SUSPEND_ABORT_LEN 256 void log_wakeup_reason(int irq); -void log_suspend_abort_reason(const char *fmt, ...); int check_wakeup_reason(int irq); +#ifdef CONFIG_SUSPEND +void log_suspend_abort_reason(const char *fmt, ...); +#else +static inline void log_suspend_abort_reason(const char *fmt, ...) { } +#endif + #endif /* _LINUX_WAKEUP_REASON_H */ From 47ccdf2dba62f9931047bd4bc3bfbddfcbef1b7e Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 3 Nov 2015 17:01:46 -0500 Subject: [PATCH 024/797] hid-sensor-hub.c: fix wrong do_div() usage do_div() must only be used with a u64 dividend. Signed-off-by: Nicolas Pitre (cherry picked from commit 8d43b49e7e0070f96ac46d30659a336c0224fa0b) Signed-off-by: Guenter Roeck --- drivers/hid/hid-sensor-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 92870cdb52d9..8efaa88329aa 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -218,7 +218,8 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, goto done_proc; } - remaining_bytes = do_div(buffer_size, sizeof(__s32)); + remaining_bytes = buffer_size % sizeof(__s32); + buffer_size = buffer_size / sizeof(__s32); if (buffer_size) { for (i = 0; i < buffer_size; ++i) { hid_set_field(report->field[field_index], i, From 11b55d507888b8f81679aa09636d339d42b09ac4 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 4 Mar 2016 12:44:33 -0800 Subject: [PATCH 025/797] misc: uid_stat: Include linux/atomic.h instead of asm/atomic.h Building the uid_stat driver on sparc32 fails with the following errors. include/linux/atomic.h: In function 'atomic_add_unless': include/linux/atomic.h:437:2: error: implicit declaration of function '__atomic_add_unless' include/linux/atomic.h: In function 'atomic_andnot': include/linux/atomic.h:454:2: error: implicit declaration of function 'atomic_and' include/linux/atomic.h: In function 'atomic_set_mask': include/linux/atomic.h:465:2: error: implicit declaration of function 'atomic_or' include/linux/atomic.h: In function 'atomic_inc_not_zero_hint': include/linux/atomic.h:490:3: error: implicit declaration of function 'atomic_cmpxchg' include/linux/atomic.h: In function 'atomic_dec_if_positive': include/linux/atomic.h:537:2: error: implicit declaration of function 'atomic_read' Fixes: 6b6d5fbf9ae5 ("misc: uidstat: Adding uid stat driver to collect network statistics.") Signed-off-by: Guenter Roeck --- drivers/misc/uid_stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/uid_stat.c b/drivers/misc/uid_stat.c index 8b8c9a22360b..185c69c9738a 100644 --- a/drivers/misc/uid_stat.c +++ b/drivers/misc/uid_stat.c @@ -13,7 +13,7 @@ * */ -#include +#include #include #include From e9eb108900c60b1696426cb6d155cea9905b403f Mon Sep 17 00:00:00 2001 From: Tim Murray Date: Tue, 19 Jan 2016 16:33:27 -0800 Subject: [PATCH 026/797] ANDROID: dm-crypt: run in a WQ_HIGHPRI workqueue (cherry pick from commit ad3ac5180979e5dd1f84e4a807f76fb9fb19f814) Running dm-crypt in a standard workqueue results in IO competing for CPU time with standard user apps, which can lead to pipeline bubbles and seriously degraded performance. Move to a WQ_HIGHPRI workqueue to protect against that. Signed-off-by: Tim Murray Bug: 25392275 Change-Id: I2828587c754a7c2cafdd78b3323b9896cb8cd4e7 --- drivers/md/dm-crypt.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3147c8d09ea8..e85bcae50f65 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1864,16 +1864,24 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ret = -ENOMEM; - cc->io_queue = alloc_workqueue("kcryptd_io", WQ_MEM_RECLAIM, 1); + cc->io_queue = alloc_workqueue("kcryptd_io", + WQ_HIGHPRI | + WQ_MEM_RECLAIM, + 1); if (!cc->io_queue) { ti->error = "Couldn't create kcryptd io queue"; goto bad; } if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) - cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1); + cc->crypt_queue = alloc_workqueue("kcryptd", + WQ_HIGHPRI | + WQ_MEM_RECLAIM, 1); else - cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, + cc->crypt_queue = alloc_workqueue("kcryptd", + WQ_HIGHPRI | + WQ_MEM_RECLAIM | + WQ_UNBOUND, num_online_cpus()); if (!cc->crypt_queue) { ti->error = "Couldn't create kcryptd queue"; From 3e04d6dfbce9bd89f0c5c79123670397d8ca9370 Mon Sep 17 00:00:00 2001 From: Matthew Moeller Date: Wed, 9 Mar 2016 20:19:25 -0600 Subject: [PATCH 027/797] usb: u_ether: Add missing rx_work init commit 398a708ed5f3ef771d96dfb9b95b5d5170d17eb7 usb: u_ether: Add workqueue as bottom half handler for rx data path set up a worker for the rx data path but missed a case where the work_struct needed to be initialized. This patch adds the missing 'INIT_WORK' Change-Id: I2daabd39d35b3e17a3054837282d649d9c78a0aa Signed-off-by: Matthew Moeller --- drivers/usb/gadget/function/u_ether.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 76b25445c6ad..dd73dfe5dcab 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -1014,6 +1014,7 @@ struct net_device *gether_setup_name_default(const char *netname) spin_lock_init(&dev->lock); spin_lock_init(&dev->req_lock); INIT_WORK(&dev->work, eth_work); + INIT_WORK(&dev->rx_work, process_rx_w); INIT_LIST_HEAD(&dev->tx_reqs); INIT_LIST_HEAD(&dev->rx_reqs); From 772d83a92971662b11316a2a266730d18c1bdeda Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 14 Mar 2016 13:34:44 -0700 Subject: [PATCH 028/797] FROMLIST: pstore-ram: fix NULL reference when used with pdata When using platform-data (not DT), we get an OOPS, because drvdata is only initialized after we try to use it. This addresses my comments made on the upstream submission here: https://patchwork.kernel.org/patch/7980651/ Fixes boot on Chrome OS systems, including the Pixel 2. Change-Id: I97360edf2ce61c83dc543cb6c169f3287e2dae4b Fixes: b1d1b7187c11 ("FROMLIST: pstore-ram: add Device Tree bindings") Signed-off-by: Brian Norris --- fs/pstore/ram.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 2429c804cf78..414041342a99 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -554,7 +554,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, static int ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct ramoops_platform_data *pdata = platform_get_drvdata(pdev); + struct ramoops_platform_data *pdata = pdev->dev.platform_data; struct ramoops_context *cxt = &oops_cxt; size_t dump_mem_sz; phys_addr_t paddr; @@ -666,7 +666,6 @@ static int ramoops_probe(struct platform_device *pdev) cxt->size, (unsigned long long)cxt->phys_addr, cxt->ecc_info.ecc_size, cxt->ecc_info.block_size); - platform_set_drvdata(pdev, pdata); return 0; fail_buf: From 06bfe14bdda83b9544c6b7bd4cdf738013e51ae8 Mon Sep 17 00:00:00 2001 From: dcashman Date: Wed, 24 Feb 2016 13:27:06 -0800 Subject: [PATCH 029/797] FROMLIST: drivers: char: random: add get_random_long() (cherry picked from commit https://lkml.org/lkml/2016/2/4/831) d07e22597d1d355 ("mm: mmap: add new /proc tunable for mmap_base ASLR") added the ability to choose from a range of values to use for entropy count in generating the random offset to the mmap_base address. The maximum value on this range was set to 32 bits for 64-bit x86 systems, but this value could be increased further, requiring more than the 32 bits of randomness provided by get_random_int(), as is already possible for arm64. Add a new function: get_random_long() which more naturally fits with the mmap usage of get_random_int() but operates exactly the same as get_random_int(). Also, fix the shifting constant in mmap_rnd() to be an unsigned long so that values greater than 31 bits generate an appropriate mask without overflow. This is especially important on x86, as its shift instruction uses a 5-bit mask for the shift operand, which meant that any value for mmap_rnd_bits over 31 acts as a no-op and effectively disables mmap_base randomization. Finally, replace calls to get_random_int() with get_random_long() where appropriate. Bug: 26963541 Signed-off-by: Daniel Cashman Signed-off-by: Daniel Cashman Change-Id: I5b45621088666d5d1dfbf43952f25ea0798b10ba --- drivers/char/random.c | 22 ++++++++++++++++++++++ include/linux/random.h | 1 + 2 files changed, 23 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index d0da5d852d41..b583e5336630 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1818,6 +1818,28 @@ unsigned int get_random_int(void) } EXPORT_SYMBOL(get_random_int); +/* + * Same as get_random_int(), but returns unsigned long. + */ +unsigned long get_random_long(void) +{ + __u32 *hash; + unsigned long ret; + + if (arch_get_random_long(&ret)) + return ret; + + hash = get_cpu_var(get_random_int_hash); + + hash[0] += current->pid + jiffies + random_get_entropy(); + md5_transform(hash, random_int_secret); + ret = *(unsigned long *)hash; + put_cpu_var(get_random_int_hash); + + return ret; +} +EXPORT_SYMBOL(get_random_long); + /* * randomize_range() returns a start address such that * diff --git a/include/linux/random.h b/include/linux/random.h index a75840c1aa71..9c29122037f9 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -34,6 +34,7 @@ extern const struct file_operations random_fops, urandom_fops; #endif unsigned int get_random_int(void); +unsigned long get_random_long(void); unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); u32 prandom_u32(void); From 818ad76fe6fb93c8dcbfdd4e479026fdfe14df07 Mon Sep 17 00:00:00 2001 From: dcashman Date: Wed, 24 Feb 2016 13:31:22 -0800 Subject: [PATCH 030/797] FROMLIST: mm: ASLR: use get_random_long() (cherry picked from commit https://lkml.org/lkml/2016/2/4/833) Replace calls to get_random_int() followed by a cast to (unsigned long) with calls to get_random_long(). Also address shifting bug which, in case of x86 removed entropy mask for mmap_rnd_bits values > 31 bits. Bug: 26963541 Signed-off-by: Daniel Cashman Signed-off-by: Daniel Cashman Change-Id: I36c156c9b8d7d157134895fddd4cd6efddcbee86 --- arch/arm/mm/mmap.c | 2 +- arch/arm64/mm/mmap.c | 4 ++-- arch/mips/mm/mmap.c | 4 ++-- arch/powerpc/kernel/process.c | 4 ++-- arch/powerpc/mm/mmap.c | 4 ++-- arch/sparc/kernel/sys_sparc_64.c | 2 +- arch/x86/mm/mmap.c | 6 +++--- fs/binfmt_elf.c | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 4b4058db0781..66353caa35b9 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -173,7 +173,7 @@ unsigned long arch_mmap_rnd(void) { unsigned long rnd; - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 4c893b5189dd..232f787a088a 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -53,10 +53,10 @@ unsigned long arch_mmap_rnd(void) #ifdef CONFIG_COMPAT if (test_thread_flag(TIF_32BIT)) - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); else #endif - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 5c81fdd032c3..353037699512 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -146,7 +146,7 @@ unsigned long arch_mmap_rnd(void) { unsigned long rnd; - rnd = (unsigned long)get_random_int(); + rnd = get_random_long(); rnd <<= PAGE_SHIFT; if (TASK_IS_32BIT_ADDR) rnd &= 0xfffffful; @@ -174,7 +174,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) static inline unsigned long brk_rnd(void) { - unsigned long rnd = get_random_int(); + unsigned long rnd = get_random_long(); rnd = rnd << PAGE_SHIFT; /* 8MB for 32bit, 256MB for 64bit */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ef2ad2d682da..36795d1e7558 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1659,9 +1659,9 @@ static inline unsigned long brk_rnd(void) /* 8MB for 32bit, 1GB for 64bit */ if (is_32bit_task()) - rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); + rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT))); else - rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); + rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT))); return rnd << PAGE_SHIFT; } diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 0f0502e12f6c..4087705ba90f 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -59,9 +59,9 @@ unsigned long arch_mmap_rnd(void) /* 8MB for 32bit, 1GB for 64bit */ if (is_32bit_task()) - rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT)); + rnd = get_random_long() % (1<<(23-PAGE_SHIFT)); else - rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT)); + rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT)); return rnd << PAGE_SHIFT; } diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index c690c8e16a96..b489e9759518 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -264,7 +264,7 @@ static unsigned long mmap_rnd(void) unsigned long rnd = 0UL; if (current->flags & PF_RANDOMIZE) { - unsigned long val = get_random_int(); + unsigned long val = get_random_long(); if (test_thread_flag(TIF_32BIT)) rnd = (val % (1UL << (23UL-PAGE_SHIFT))); else diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 96bd1e2bffaf..72bb52f93c3d 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -71,12 +71,12 @@ unsigned long arch_mmap_rnd(void) if (mmap_is_ia32()) #ifdef CONFIG_COMPAT - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); #else - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); #endif else - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3a93755e880f..0c52941dd62c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -651,7 +651,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top) if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) { - random_variable = (unsigned long) get_random_int(); + random_variable = get_random_long(); random_variable &= STACK_RND_MASK; random_variable <<= PAGE_SHIFT; } From 2b081fe250a6aef7375a6d6f9783547b11c171c6 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 16 Mar 2016 18:15:47 +0800 Subject: [PATCH 031/797] FROMLIST: mmc: block: fix ABI regression of mmc_blk_ioctl If mmc_blk_ioctl returns -EINVAL, blkdev_ioctl continues to work without returning err to user-space. But now we check CAP_SYS_RAWIO firstly, so we return -EPERM to blkdev_ioctl, which make blkdev_ioctl return -EPERM to user-space directly. So this will break all the ioctl with BLKROSET. Now we find Android-adb suffer it for the following log: remount of /system failed; couldn't make block device writable: Operation not permitted openat(AT_FDCWD, "/dev/block/platform/ff420000.dwmmc/by-name/system", O_RDONLY) = 3 ioctl(3, BLKROSET, 0) = -1 EPERM (Operation not permitted) Fixes: a5f5774c55a2 ("mmc: block: Add new ioctl to send multi commands") Change-Id: Ie9ba728e366abf4ab73fd6102d2a2aa0d4ee5c66 Signed-off-by: Shawn Lin Signed-off-by: John Stultz --- drivers/mmc/card/block.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 90e9738a129a..f2ce13ab7ae6 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -593,6 +593,14 @@ static int mmc_blk_ioctl_cmd(struct block_device *bdev, struct mmc_card *card; int err = 0, ioc_err = 0; + /* + * The caller must have CAP_SYS_RAWIO, and must be calling this on the + * whole block device, not on a partition. This prevents overspray + * between sibling partitions. + */ + if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains)) + return -EPERM; + idata = mmc_blk_ioctl_copy_from_user(ic_ptr); if (IS_ERR(idata)) return PTR_ERR(idata); @@ -635,6 +643,14 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev, int i, err = 0, ioc_err = 0; __u64 num_of_cmds; + /* + * The caller must have CAP_SYS_RAWIO, and must be calling this on the + * whole block device, not on a partition. This prevents overspray + * between sibling partitions. + */ + if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains)) + return -EPERM; + if (copy_from_user(&num_of_cmds, &user->num_of_cmds, sizeof(num_of_cmds))) return -EFAULT; @@ -690,14 +706,6 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev, static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - /* - * The caller must have CAP_SYS_RAWIO, and must be calling this on the - * whole block device, not on a partition. This prevents overspray - * between sibling partitions. - */ - if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains)) - return -EPERM; - switch (cmd) { case MMC_IOC_CMD: return mmc_blk_ioctl_cmd(bdev, From a898ec7786b54a2b18f072df16e142656617cefe Mon Sep 17 00:00:00 2001 From: Mark Kuo Date: Thu, 20 Aug 2015 13:01:46 +0800 Subject: [PATCH 032/797] CHROMIUM: usb: gadget: f_mtp: Add SuperSpeed support Add SuperSpeed endpoint and companion descriptors. BUG=chrome-os-partner:43682 TEST=Smaug enumerates as a SuperSpeed device. Change-Id: I2bf3125d180fcb07222a5740fa67f3526cf3e95c Signed-off-by: Hui Fu Signed-off-by: Henry Lin Signed-off-by: Mark Kuo Signed-off-by: Andrew Bresticker Reviewed-on: https://chromium-review.googlesource.com/294950 --- drivers/usb/gadget/function/f_mtp.c | 76 ++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index 8f80a7e91314..148f8fcecc80 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -135,6 +135,34 @@ static struct usb_interface_descriptor ptp_interface_desc = { .bInterfaceProtocol = 1, }; +static struct usb_endpoint_descriptor mtp_ss_in_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(1024), +}; + +static struct usb_ss_ep_comp_descriptor mtp_ss_in_comp_desc = { + .bLength = sizeof(mtp_ss_in_comp_desc), + .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, + /* .bMaxBurst = DYNAMIC, */ +}; + +static struct usb_endpoint_descriptor mtp_ss_out_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(1024), +}; + +static struct usb_ss_ep_comp_descriptor mtp_ss_out_comp_desc = { + .bLength = sizeof(mtp_ss_out_comp_desc), + .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, + /* .bMaxBurst = DYNAMIC, */ +}; + static struct usb_endpoint_descriptor mtp_highspeed_in_desc = { .bLength = USB_DT_ENDPOINT_SIZE, .bDescriptorType = USB_DT_ENDPOINT, @@ -174,6 +202,12 @@ static struct usb_endpoint_descriptor mtp_intr_desc = { .bInterval = 6, }; +static struct usb_ss_ep_comp_descriptor mtp_intr_ss_comp_desc = { + .bLength = sizeof(mtp_intr_ss_comp_desc), + .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, + .wBytesPerInterval = cpu_to_le16(2), +}; + static struct usb_descriptor_header *fs_mtp_descs[] = { (struct usb_descriptor_header *) &mtp_interface_desc, (struct usb_descriptor_header *) &mtp_fullspeed_in_desc, @@ -190,6 +224,17 @@ static struct usb_descriptor_header *hs_mtp_descs[] = { NULL, }; +static struct usb_descriptor_header *ss_mtp_descs[] = { + (struct usb_descriptor_header *) &mtp_interface_desc, + (struct usb_descriptor_header *) &mtp_ss_in_desc, + (struct usb_descriptor_header *) &mtp_ss_in_comp_desc, + (struct usb_descriptor_header *) &mtp_ss_out_desc, + (struct usb_descriptor_header *) &mtp_ss_out_comp_desc, + (struct usb_descriptor_header *) &mtp_intr_desc, + (struct usb_descriptor_header *) &mtp_intr_ss_comp_desc, + NULL, +}; + static struct usb_descriptor_header *fs_ptp_descs[] = { (struct usb_descriptor_header *) &ptp_interface_desc, (struct usb_descriptor_header *) &mtp_fullspeed_in_desc, @@ -206,6 +251,17 @@ static struct usb_descriptor_header *hs_ptp_descs[] = { NULL, }; +static struct usb_descriptor_header *ss_ptp_descs[] = { + (struct usb_descriptor_header *) &ptp_interface_desc, + (struct usb_descriptor_header *) &mtp_ss_in_desc, + (struct usb_descriptor_header *) &mtp_ss_in_comp_desc, + (struct usb_descriptor_header *) &mtp_ss_out_desc, + (struct usb_descriptor_header *) &mtp_ss_out_comp_desc, + (struct usb_descriptor_header *) &mtp_intr_desc, + (struct usb_descriptor_header *) &mtp_intr_ss_comp_desc, + NULL, +}; + static struct usb_string mtp_string_defs[] = { /* Naming interface "MTP" so libmtp will recognize us */ [INTERFACE_STRING_INDEX].s = "MTP", @@ -1131,10 +1187,24 @@ mtp_function_bind(struct usb_configuration *c, struct usb_function *f) mtp_highspeed_out_desc.bEndpointAddress = mtp_fullspeed_out_desc.bEndpointAddress; } + /* support super speed hardware */ + if (gadget_is_superspeed(c->cdev->gadget)) { + unsigned max_burst; + + /* Calculate bMaxBurst, we know packet size is 1024 */ + max_burst = min_t(unsigned, MTP_BULK_BUFFER_SIZE / 1024, 15); + mtp_ss_in_desc.bEndpointAddress = + mtp_fullspeed_in_desc.bEndpointAddress; + mtp_ss_in_comp_desc.bMaxBurst = max_burst; + mtp_ss_out_desc.bEndpointAddress = + mtp_fullspeed_out_desc.bEndpointAddress; + mtp_ss_out_comp_desc.bMaxBurst = max_burst; + } DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n", - gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full", - f->name, dev->ep_in->name, dev->ep_out->name); + gadget_is_superspeed(c->cdev->gadget) ? "super" : + (gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full"), + f->name, dev->ep_in->name, dev->ep_out->name); return 0; } @@ -1410,9 +1480,11 @@ struct usb_function *function_alloc_mtp_ptp(struct usb_function_instance *fi, if (mtp_config) { dev->function.fs_descriptors = fs_mtp_descs; dev->function.hs_descriptors = hs_mtp_descs; + dev->function.ss_descriptors = ss_mtp_descs; } else { dev->function.fs_descriptors = fs_ptp_descs; dev->function.hs_descriptors = hs_ptp_descs; + dev->function.ss_descriptors = ss_ptp_descs; } dev->function.bind = mtp_function_bind; dev->function.unbind = mtp_function_unbind; From 714ef4a05ab0239c7ad27e96780e7fe6f05ec5d2 Mon Sep 17 00:00:00 2001 From: Mark Kuo Date: Fri, 11 Sep 2015 16:12:59 +0800 Subject: [PATCH 033/797] CHROMIUM: usb: gadget: f_mtp: fix usb_ss_ep_comp_descriptor wBytesPerInterval in SuperSpeed Endpoint Companion Descriptor needs to be set large enough to reserve enough bus time for associated periodic endpoint. Originally, wBytesPerInterval for mtp's interrupt IN endpoint is set to 2 and its single interrupt transfer will be split into many 2 bytes interrupt transfers. So, we change wBytesPerInterval to INTR_BUFFER_SIZE to ensure interrupt transfer will not be split. BUG=none TEST=Smaug works as a MTP device Change-Id: I49c0df892b2d9e0193a684eef23f73664ced9f91 Signed-off-by: Henry Lin Signed-off-by: Mark Kuo Reviewed-on: https://chromium-review.googlesource.com/299091 Reviewed-by: Andrew Bresticker --- drivers/usb/gadget/function/f_mtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index 148f8fcecc80..7f5c390885fe 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -205,7 +205,7 @@ static struct usb_endpoint_descriptor mtp_intr_desc = { static struct usb_ss_ep_comp_descriptor mtp_intr_ss_comp_desc = { .bLength = sizeof(mtp_intr_ss_comp_desc), .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, - .wBytesPerInterval = cpu_to_le16(2), + .wBytesPerInterval = cpu_to_le16(INTR_BUFFER_SIZE), }; static struct usb_descriptor_header *fs_mtp_descs[] = { From 324e42bec18db251c70c73f6ac1b3e09a9535042 Mon Sep 17 00:00:00 2001 From: Mark Kuo Date: Mon, 11 Jan 2016 17:49:16 +0800 Subject: [PATCH 034/797] CHROMIUM: usb: gadget: audio_source: add .free_func callback When userspace unbinds gadget functions through configfs, the .free_func() callback is always invoked. (in config_usb_cfg_unlink()) Implement it as a no-op to avoid the following crash: [ 68.125679] configfs-gadget gadget: unbind function 'accessory'/ffffffc0720bf000 [ 68.133202] configfs-gadget gadget: unbind function 'audio_source'/ffffffc0012ca3c0 [ 68.142668] tegra-xudc 700d0000.usb-device: ep 0 disabled [ 68.148186] Bad mode in Synchronous Abort handler detected, code 0x86000006 [ 68.155144] CPU: 2 PID: 1 Comm: init Tainted: G U W 3.18.0-09419-g87296c3-dirty #561 [ 68.163743] Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT) [ 68.169566] task: ffffffc0bc8d0000 ti: ffffffc0bc8bc000 task.ti: ffffffc0bc8bc000 [ 68.177039] PC is at 0x0 [ 68.179577] LR is at usb_put_function+0x14/0x1c .... BUG=chrome-os-partner:49140 TEST="setprop sys.usb.config accessory,audio_source" on A44 and then switch back to default: "setprop sys.usb.config mtp,adb", no crash will be seen. Change-Id: I5b6141964aab861e86e3afb139ded02d4d122dab Signed-off-by: Mark Kuo Reviewed-on: https://chromium-review.googlesource.com/321013 Commit-Ready: Andrew Bresticker Tested-by: Andrew Bresticker Reviewed-by: Andrew Bresticker --- drivers/usb/gadget/function/f_audio_source.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c index 39645be93502..bcd817439dbf 100644 --- a/drivers/usb/gadget/function/f_audio_source.c +++ b/drivers/usb/gadget/function/f_audio_source.c @@ -583,6 +583,11 @@ static void audio_disable(struct usb_function *f) usb_ep_disable(audio->in_ep); } +static void audio_free_func(struct usb_function *f) +{ + /* no-op */ +} + /*-------------------------------------------------------------------------*/ static void audio_build_desc(struct audio_dev *audio) @@ -827,6 +832,7 @@ static struct audio_dev _audio_dev = { .set_alt = audio_set_alt, .setup = audio_setup, .disable = audio_disable, + .free_func = audio_free_func, }, .lock = __SPIN_LOCK_UNLOCKED(_audio_dev.lock), .idle_reqs = LIST_HEAD_INIT(_audio_dev.idle_reqs), From 584d9e82958e16fd3f38c9206f4ef9b64f632a18 Mon Sep 17 00:00:00 2001 From: Mark Kuo Date: Mon, 11 Jan 2016 19:07:12 +0800 Subject: [PATCH 035/797] CHROMIUM: usb: gadget: f_accessory: add .raw_request callback After this upstream commit: 3c86726cfe38952f0366f86acfbbb025813ec1c2, .raw_request is mandatory in hid_ll_driver structure, hence add an empty raw_request() function. BUG=chrome-os-partner:49140 TEST=none Change-Id: Idd0bbe6960aad2c557376e4a24827d7e1df8e023 Signed-off-by: Mark Kuo Reviewed-on: https://chromium-review.googlesource.com/321038 Commit-Ready: Andrew Bresticker Tested-by: Andrew Bresticker Reviewed-by: Andrew Bresticker --- drivers/usb/gadget/function/f_accessory.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index 1be93a7ca4a1..c62123560143 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -404,12 +404,19 @@ static void acc_hid_close(struct hid_device *hid) { } +static int acc_hid_raw_request(struct hid_device *hid, unsigned char reportnum, + __u8 *buf, size_t len, unsigned char rtype, int reqtype) +{ + return 0; +} + static struct hid_ll_driver acc_hid_ll_driver = { .parse = acc_hid_parse, .start = acc_hid_start, .stop = acc_hid_stop, .open = acc_hid_open, .close = acc_hid_close, + .raw_request = acc_hid_raw_request, }; static struct acc_hid_dev *acc_hid_new(struct acc_dev *dev, From abf8eef2ed2e5270047a2ae524d161e0f2c04350 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 6 Oct 2015 20:32:01 -0700 Subject: [PATCH 036/797] ANDROID: usb: gadget: Add support for MTP OS desc Windows requires OS specific descriptors for automatic install of drivers for MTP devices. https://msdn.microsoft.com/en-us/library/windows/ hardware/gg463179.aspx BUG=24583401 BUG=chrome-os-partner:43409 Change-Id: I9397072ca3d183efbc9571c6cde3790f10d8851e Signed-off-by: Badhri Jagan Sridharan Reviewed-on: https://chromium-review.googlesource.com/304346 Commit-Ready: Andrew Bresticker Tested-by: Andrew Bresticker Reviewed-by: Andrew Bresticker Signed-off-by: Amit Pundir --- drivers/usb/gadget/function/f_mtp.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index 7f5c390885fe..74195be9b054 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -346,6 +346,8 @@ struct mtp_instance { struct usb_function_instance func_inst; const char *name; struct mtp_dev *dev; + char mtp_ext_compat_id[16]; + struct usb_os_desc mtp_os_desc; }; /* temporary variable used between mtp_open() and mtp_gadget_bind() */ @@ -1157,6 +1159,7 @@ mtp_function_bind(struct usb_configuration *c, struct usb_function *f) struct mtp_dev *dev = func_to_mtp(f); int id; int ret; + struct mtp_instance *fi_mtp; dev->cdev = cdev; DBG(cdev, "mtp_function_bind dev: %p\n", dev); @@ -1174,6 +1177,18 @@ mtp_function_bind(struct usb_configuration *c, struct usb_function *f) mtp_string_defs[INTERFACE_STRING_INDEX].id = ret; mtp_interface_desc.iInterface = ret; } + + fi_mtp = container_of(f->fi, struct mtp_instance, func_inst); + + if (cdev->use_os_string) { + f->os_desc_table = kzalloc(sizeof(*f->os_desc_table), + GFP_KERNEL); + if (!f->os_desc_table) + return -ENOMEM; + f->os_desc_n = 1; + f->os_desc_table[0].os_desc = &fi_mtp->mtp_os_desc; + } + /* allocate endpoints */ ret = mtp_create_bulk_endpoints(dev, &mtp_fullspeed_in_desc, &mtp_fullspeed_out_desc, &mtp_intr_desc); @@ -1223,6 +1238,8 @@ mtp_function_unbind(struct usb_configuration *c, struct usb_function *f) while ((req = mtp_req_get(dev, &dev->intr_idle))) mtp_request_free(req, dev->ep_intr); dev->state = STATE_OFFLINE; + kfree(f->os_desc_table); + f->os_desc_n = 0; } static int mtp_function_set_alt(struct usb_function *f, @@ -1406,6 +1423,7 @@ static void mtp_free_inst(struct usb_function_instance *fi) fi_mtp = to_fi_mtp(fi); kfree(fi_mtp->name); mtp_cleanup(); + kfree(fi_mtp->mtp_os_desc.group.default_groups); kfree(fi_mtp); } @@ -1413,6 +1431,8 @@ struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config) { struct mtp_instance *fi_mtp; int ret = 0; + struct usb_os_desc *descs[1]; + char *names[1]; fi_mtp = kzalloc(sizeof(*fi_mtp), GFP_KERNEL); if (!fi_mtp) @@ -1420,6 +1440,13 @@ struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config) fi_mtp->func_inst.set_inst_name = mtp_set_inst_name; fi_mtp->func_inst.free_func_inst = mtp_free_inst; + fi_mtp->mtp_os_desc.ext_compat_id = fi_mtp->mtp_ext_compat_id; + INIT_LIST_HEAD(&fi_mtp->mtp_os_desc.ext_prop); + descs[0] = &fi_mtp->mtp_os_desc; + names[0] = "MTP"; + usb_os_desc_prepare_interf_dir(&fi_mtp->func_inst.group, 1, + descs, names, THIS_MODULE); + if (mtp_config) { ret = mtp_setup_configfs(fi_mtp); if (ret) { From c4190692cd1fe8b3ba4a3953b3fc0d539d01560d Mon Sep 17 00:00:00 2001 From: Daniel Campello Date: Mon, 20 Jul 2015 16:23:50 -0700 Subject: [PATCH 037/797] Included sdcardfs source code for kernel 3.0 Only included the source code as is for kernel 3.0. Following patches take care of porting this file system to version 3.10. Change-Id: I09e76db77cd98a059053ba5b6fd88572a4b75b5b Signed-off-by: Daniel Campello --- fs/Kconfig | 1 + fs/Makefile | 5 +- fs/sdcardfs/Kconfig | 18 + fs/sdcardfs/Makefile | 7 + fs/sdcardfs/dentry.c | 182 ++++++++ fs/sdcardfs/derived_perm.c | 290 ++++++++++++ fs/sdcardfs/file.c | 357 +++++++++++++++ fs/sdcardfs/hashtable.h | 190 ++++++++ fs/sdcardfs/inode.c | 886 +++++++++++++++++++++++++++++++++++++ fs/sdcardfs/lookup.c | 386 ++++++++++++++++ fs/sdcardfs/main.c | 425 ++++++++++++++++++ fs/sdcardfs/mmap.c | 82 ++++ fs/sdcardfs/multiuser.h | 37 ++ fs/sdcardfs/packagelist.c | 458 +++++++++++++++++++ fs/sdcardfs/sdcardfs.h | 493 +++++++++++++++++++++ fs/sdcardfs/strtok.h | 75 ++++ fs/sdcardfs/super.c | 229 ++++++++++ include/linux/namei.h | 3 + include/uapi/linux/magic.h | 2 + 19 files changed, 4124 insertions(+), 2 deletions(-) create mode 100644 fs/sdcardfs/Kconfig create mode 100644 fs/sdcardfs/Makefile create mode 100644 fs/sdcardfs/dentry.c create mode 100644 fs/sdcardfs/derived_perm.c create mode 100644 fs/sdcardfs/file.c create mode 100644 fs/sdcardfs/hashtable.h create mode 100644 fs/sdcardfs/inode.c create mode 100644 fs/sdcardfs/lookup.c create mode 100644 fs/sdcardfs/main.c create mode 100644 fs/sdcardfs/mmap.c create mode 100644 fs/sdcardfs/multiuser.h create mode 100644 fs/sdcardfs/packagelist.c create mode 100644 fs/sdcardfs/sdcardfs.h create mode 100644 fs/sdcardfs/strtok.h create mode 100644 fs/sdcardfs/super.c diff --git a/fs/Kconfig b/fs/Kconfig index 6ce72d8d1ee1..a5d2dc39ba07 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -199,6 +199,7 @@ if MISC_FILESYSTEMS source "fs/adfs/Kconfig" source "fs/affs/Kconfig" source "fs/ecryptfs/Kconfig" +source "fs/sdcardfs/Kconfig" source "fs/hfs/Kconfig" source "fs/hfsplus/Kconfig" source "fs/befs/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index 79f522575cba..3b54070cd629 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -3,7 +3,7 @@ # # 14 Sep 2000, Christoph Hellwig # Rewritten to use lists instead of if-statements. -# +# obj-y := open.o read_write.o file_table.o super.o \ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ @@ -59,7 +59,7 @@ obj-y += devpts/ obj-$(CONFIG_PROFILING) += dcookies.o obj-$(CONFIG_DLM) += dlm/ - + # Do not add any filesystems before this line obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ @@ -81,6 +81,7 @@ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ obj-$(CONFIG_HFS_FS) += hfs/ obj-$(CONFIG_ECRYPT_FS) += ecryptfs/ +obj-$(CONFIG_SDCARD_FS) += sdcardfs/ obj-$(CONFIG_VXFS_FS) += freevxfs/ obj-$(CONFIG_NFS_FS) += nfs/ obj-$(CONFIG_EXPORTFS) += exportfs/ diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig new file mode 100644 index 000000000000..657f4958e8d6 --- /dev/null +++ b/fs/sdcardfs/Kconfig @@ -0,0 +1,18 @@ +config SDCARD_FS + tristate "sdcard file system" + depends on EXPERIMENTAL + default n + help + Sdcardfs is based on Wrapfs file system. + +config SDCARD_FS_FADV_NOACTIVE + bool "sdcardfs fadvise noactive support" + depends on FADV_NOACTIVE + default y + help + Sdcardfs supports fadvise noactive mode. + +config SDCARD_FS_CI_SEARCH + tristate "sdcardfs case-insensitive search support" + depends on SDCARD_FS + default y diff --git a/fs/sdcardfs/Makefile b/fs/sdcardfs/Makefile new file mode 100644 index 000000000000..b84fbb2b45a4 --- /dev/null +++ b/fs/sdcardfs/Makefile @@ -0,0 +1,7 @@ +SDCARDFS_VERSION="0.1" + +EXTRA_CFLAGS += -DSDCARDFS_VERSION=\"$(SDCARDFS_VERSION)\" + +obj-$(CONFIG_SDCARD_FS) += sdcardfs.o + +sdcardfs-y := dentry.o file.o inode.o main.o super.o lookup.o mmap.o packagelist.o derived_perm.o diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c new file mode 100644 index 000000000000..4572a5403bb2 --- /dev/null +++ b/fs/sdcardfs/dentry.c @@ -0,0 +1,182 @@ +/* + * fs/sdcardfs/dentry.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include "linux/ctype.h" + +/* + * returns: -ERRNO if error (returned to user) + * 0: tell VFS to invalidate dentry + * 1: dentry is valid + */ +static int sdcardfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + int err = 1; + struct path parent_lower_path, lower_path; + struct dentry *parent_dentry = NULL; + struct dentry *parent_lower_dentry = NULL; + struct dentry *lower_cur_parent_dentry = NULL; + struct dentry *lower_dentry = NULL; + + if (nd && nd->flags & LOOKUP_RCU) + return -ECHILD; + + spin_lock(&dentry->d_lock); + if (IS_ROOT(dentry)) { + spin_unlock(&dentry->d_lock); + return 1; + } + spin_unlock(&dentry->d_lock); + + /* check uninitialized obb_dentry and + * whether the base obbpath has been changed or not */ + if (is_obbpath_invalid(dentry)) { + d_drop(dentry); + return 0; + } + + parent_dentry = dget_parent(dentry); + sdcardfs_get_lower_path(parent_dentry, &parent_lower_path); + sdcardfs_get_real_lower(dentry, &lower_path); + parent_lower_dentry = parent_lower_path.dentry; + lower_dentry = lower_path.dentry; + lower_cur_parent_dentry = dget_parent(lower_dentry); + + spin_lock(&lower_dentry->d_lock); + if (d_unhashed(lower_dentry)) { + spin_unlock(&lower_dentry->d_lock); + d_drop(dentry); + err = 0; + goto out; + } + spin_unlock(&lower_dentry->d_lock); + + if (parent_lower_dentry != lower_cur_parent_dentry) { + d_drop(dentry); + err = 0; + goto out; + } + + if (dentry < lower_dentry) { + spin_lock(&dentry->d_lock); + spin_lock(&lower_dentry->d_lock); + } else { + spin_lock(&lower_dentry->d_lock); + spin_lock(&dentry->d_lock); + } + + if (dentry->d_name.len != lower_dentry->d_name.len) { + __d_drop(dentry); + err = 0; + } else if (strncasecmp(dentry->d_name.name, lower_dentry->d_name.name, + dentry->d_name.len) != 0) { + __d_drop(dentry); + err = 0; + } + + if (dentry < lower_dentry) { + spin_unlock(&lower_dentry->d_lock); + spin_unlock(&dentry->d_lock); + } else { + spin_unlock(&dentry->d_lock); + spin_unlock(&lower_dentry->d_lock); + } + +out: + dput(parent_dentry); + dput(lower_cur_parent_dentry); + sdcardfs_put_lower_path(parent_dentry, &parent_lower_path); + sdcardfs_put_real_lower(dentry, &lower_path); + return err; +} + +static void sdcardfs_d_release(struct dentry *dentry) +{ + /* release and reset the lower paths */ + if(has_graft_path(dentry)) { + sdcardfs_put_reset_orig_path(dentry); + } + sdcardfs_put_reset_lower_path(dentry); + free_dentry_private_data(dentry); + return; +} + +static int sdcardfs_hash_ci(const struct dentry *dentry, + const struct inode *inode, struct qstr *qstr) +{ + /* + * This function is copy of vfat_hashi. + * FIXME Should we support national language? + * Refer to vfat_hashi() + * struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; + */ + const unsigned char *name; + unsigned int len; + unsigned long hash; + + name = qstr->name; + //len = vfat_striptail_len(qstr); + len = qstr->len; + + hash = init_name_hash(); + while (len--) + //hash = partial_name_hash(nls_tolower(t, *name++), hash); + hash = partial_name_hash(tolower(*name++), hash); + qstr->hash = end_name_hash(hash); + + return 0; +} + +/* + * Case insensitive compare of two vfat names. + */ +static int sdcardfs_cmp_ci(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) +{ + /* This function is copy of vfat_cmpi */ + // FIXME Should we support national language? + //struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; + //unsigned int alen, blen; + + /* A filename cannot end in '.' or we treat it like it has none */ + /* + alen = vfat_striptail_len(name); + blen = __vfat_striptail_len(len, str); + if (alen == blen) { + if (nls_strnicmp(t, name->name, str, alen) == 0) + return 0; + } + */ + if (name->len == len) { + if (strncasecmp(name->name, str, len) == 0) + return 0; + } + return 1; +} + +const struct dentry_operations sdcardfs_ci_dops = { + .d_revalidate = sdcardfs_d_revalidate, + .d_release = sdcardfs_d_release, + .d_hash = sdcardfs_hash_ci, + .d_compare = sdcardfs_cmp_ci, +}; + diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c new file mode 100644 index 000000000000..00c33a471dcc --- /dev/null +++ b/fs/sdcardfs/derived_perm.c @@ -0,0 +1,290 @@ +/* + * fs/sdcardfs/derived_perm.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +/* copy derived state from parent inode */ +static void inherit_derived_state(struct inode *parent, struct inode *child) +{ + struct sdcardfs_inode_info *pi = SDCARDFS_I(parent); + struct sdcardfs_inode_info *ci = SDCARDFS_I(child); + + ci->perm = PERM_INHERIT; + ci->userid = pi->userid; + ci->d_uid = pi->d_uid; + ci->d_gid = pi->d_gid; + ci->d_mode = pi->d_mode; +} + +/* helper function for derived state */ +void setup_derived_state(struct inode *inode, perm_t perm, + userid_t userid, uid_t uid, gid_t gid, mode_t mode) +{ + struct sdcardfs_inode_info *info = SDCARDFS_I(inode); + + info->perm = perm; + info->userid = userid; + info->d_uid = uid; + info->d_gid = gid; + info->d_mode = mode; +} + +void get_derived_permission(struct dentry *parent, struct dentry *dentry) +{ + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + struct sdcardfs_inode_info *info = SDCARDFS_I(dentry->d_inode); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + appid_t appid; + + /* By default, each inode inherits from its parent. + * the properties are maintained on its private fields + * because the inode attributes will be modified with that of + * its lower inode. + * The derived state will be updated on the last + * stage of each system call by fix_derived_permission(inode). + */ + + inherit_derived_state(parent->d_inode, dentry->d_inode); + + //printk(KERN_INFO "sdcardfs: derived: %s, %s, %d\n", parent->d_name.name, + // dentry->d_name.name, parent_info->perm); + + if (sbi->options.derive == DERIVE_NONE) { + return; + } + + /* Derive custom permissions based on parent and current node */ + switch (parent_info->perm) { + case PERM_INHERIT: + /* Already inherited above */ + break; + case PERM_LEGACY_PRE_ROOT: + /* Legacy internal layout places users at top level */ + info->perm = PERM_ROOT; + info->userid = simple_strtoul(dentry->d_name.name, NULL, 10); + break; + case PERM_ROOT: + /* Assume masked off by default. */ + info->d_mode = 00770; + if (!strcasecmp(dentry->d_name.name, "Android")) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID; + info->d_mode = 00771; + } else if (sbi->options.split_perms) { + if (!strcasecmp(dentry->d_name.name, "DCIM") + || !strcasecmp(dentry->d_name.name, "Pictures")) { + info->d_gid = AID_SDCARD_PICS; + } else if (!strcasecmp(dentry->d_name.name, "Alarms") + || !strcasecmp(dentry->d_name.name, "Movies") + || !strcasecmp(dentry->d_name.name, "Music") + || !strcasecmp(dentry->d_name.name, "Notifications") + || !strcasecmp(dentry->d_name.name, "Podcasts") + || !strcasecmp(dentry->d_name.name, "Ringtones")) { + info->d_gid = AID_SDCARD_AV; + } + } + break; + case PERM_ANDROID: + if (!strcasecmp(dentry->d_name.name, "data")) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_DATA; + info->d_mode = 00771; + } else if (!strcasecmp(dentry->d_name.name, "obb")) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_OBB; + info->d_mode = 00771; + // FIXME : this feature will be implemented later. + /* Single OBB directory is always shared */ + } else if (!strcasecmp(dentry->d_name.name, "user")) { + /* User directories must only be accessible to system, protected + * by sdcard_all. Zygote will bind mount the appropriate user- + * specific path. */ + info->perm = PERM_ANDROID_USER; + info->d_gid = AID_SDCARD_ALL; + info->d_mode = 00770; + } + break; + /* same policy will be applied on PERM_ANDROID_DATA + * and PERM_ANDROID_OBB */ + case PERM_ANDROID_DATA: + case PERM_ANDROID_OBB: + appid = get_appid(sbi->pkgl_id, dentry->d_name.name); + if (appid != 0) { + info->d_uid = multiuser_get_uid(parent_info->userid, appid); + } + info->d_mode = 00770; + break; + case PERM_ANDROID_USER: + /* Root of a secondary user */ + info->perm = PERM_ROOT; + info->userid = simple_strtoul(dentry->d_name.name, NULL, 10); + info->d_gid = AID_SDCARD_R; + info->d_mode = 00771; + break; + } +} + +/* main function for updating derived permission */ +inline void update_derived_permission(struct dentry *dentry) +{ + struct dentry *parent; + + if(!dentry || !dentry->d_inode) { + printk(KERN_ERR "sdcardfs: %s: invalid dentry\n", __func__); + return; + } + /* FIXME: + * 1. need to check whether the dentry is updated or not + * 2. remove the root dentry update + */ + if(IS_ROOT(dentry)) { + //setup_default_pre_root_state(dentry->d_inode); + } else { + parent = dget_parent(dentry); + if(parent) { + get_derived_permission(parent, dentry); + dput(parent); + } + } + fix_derived_permission(dentry->d_inode); +} + +int need_graft_path(struct dentry *dentry) +{ + int ret = 0; + struct dentry *parent = dget_parent(dentry); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + if(parent_info->perm == PERM_ANDROID && + !strcasecmp(dentry->d_name.name, "obb")) { + + /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ + if(!(sbi->options.derive == DERIVE_UNIFIED + && parent_info->userid == 0)) { + ret = 1; + } + } + dput(parent); + return ret; +} + +int is_obbpath_invalid(struct dentry *dent) +{ + int ret = 0; + struct sdcardfs_dentry_info *di = SDCARDFS_D(dent); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb); + char *path_buf, *obbpath_s; + + /* check the base obbpath has been changed. + * this routine can check an uninitialized obb dentry as well. + * regarding the uninitialized obb, refer to the sdcardfs_mkdir() */ + spin_lock(&di->lock); + if(di->orig_path.dentry) { + if(!di->lower_path.dentry) { + ret = 1; + } else { + path_get(&di->lower_path); + //lower_parent = lock_parent(lower_path->dentry); + + path_buf = kmalloc(PATH_MAX, GFP_ATOMIC); + if(!path_buf) { + ret = 1; + printk(KERN_ERR "sdcardfs: " + "fail to allocate path_buf in %s.\n", __func__); + } else { + obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX); + if (d_unhashed(di->lower_path.dentry) || + strcasecmp(sbi->obbpath_s, obbpath_s)) { + ret = 1; + } + kfree(path_buf); + } + + //unlock_dir(lower_parent); + path_put(&di->lower_path); + } + } + spin_unlock(&di->lock); + return ret; +} + +int is_base_obbpath(struct dentry *dentry) +{ + int ret = 0; + struct dentry *parent = dget_parent(dentry); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + spin_lock(&SDCARDFS_D(dentry)->lock); + /* DERIVED_LEGACY */ + if(parent_info->perm == PERM_LEGACY_PRE_ROOT && + !strcasecmp(dentry->d_name.name, "obb")) { + ret = 1; + } + /* DERIVED_UNIFIED :/Android/obb is the base obbpath */ + else if (parent_info->perm == PERM_ANDROID && + !strcasecmp(dentry->d_name.name, "obb")) { + if((sbi->options.derive == DERIVE_UNIFIED + && parent_info->userid == 0)) { + ret = 1; + } + } + spin_unlock(&SDCARDFS_D(dentry)->lock); + dput(parent); + return ret; +} + +/* The lower_path will be stored to the dentry's orig_path + * and the base obbpath will be copyed to the lower_path variable. + * if an error returned, there's no change in the lower_path + * returns: -ERRNO if error (0: no error) */ +int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) +{ + int err = 0; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + struct path obbpath; + + /* A local obb dentry must have its own orig_path to support rmdir + * and mkdir of itself. Usually, we expect that the sbi->obbpath + * is avaiable on this stage. */ + sdcardfs_set_orig_path(dentry, lower_path); + + err = kern_path(sbi->obbpath_s, + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath); + + if(!err) { + /* the obbpath base has been found */ + printk(KERN_INFO "sdcardfs: " + "the sbi->obbpath is found\n"); + pathcpy(lower_path, &obbpath); + } else { + /* if the sbi->obbpath is not available, we can optionally + * setup the lower_path with its orig_path. + * but, the current implementation just returns an error + * because the sdcard daemon also regards this case as + * a lookup fail. */ + printk(KERN_INFO "sdcardfs: " + "the sbi->obbpath is not available\n"); + } + return err; +} + + diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c new file mode 100644 index 000000000000..bcacb947c874 --- /dev/null +++ b/fs/sdcardfs/file.c @@ -0,0 +1,357 @@ +/* + * fs/sdcardfs/file.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE +#include +#endif + +static ssize_t sdcardfs_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int err; + struct file *lower_file; + struct dentry *dentry = file->f_path.dentry; +#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE + struct backing_dev_info *bdi; +#endif + + lower_file = sdcardfs_lower_file(file); + +#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE + if (file->f_mode & FMODE_NOACTIVE) { + if (!(lower_file->f_mode & FMODE_NOACTIVE)) { + bdi = lower_file->f_mapping->backing_dev_info; + lower_file->f_ra.ra_pages = bdi->ra_pages * 2; + spin_lock(&lower_file->f_lock); + lower_file->f_mode |= FMODE_NOACTIVE; + spin_unlock(&lower_file->f_lock); + } + } +#endif + + err = vfs_read(lower_file, buf, count, ppos); + /* update our inode atime upon a successful lower read */ + if (err >= 0) + fsstack_copy_attr_atime(dentry->d_inode, + lower_file->f_path.dentry->d_inode); + + return err; +} + +static ssize_t sdcardfs_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int err = 0; + struct file *lower_file; + struct dentry *dentry = file->f_path.dentry; + + /* check disk space */ + if (!check_min_free_space(dentry, count, 0)) { + printk(KERN_INFO "No minimum free space.\n"); + return -ENOSPC; + } + + lower_file = sdcardfs_lower_file(file); + err = vfs_write(lower_file, buf, count, ppos); + /* update our inode times+sizes upon a successful lower write */ + if (err >= 0) { + fsstack_copy_inode_size(dentry->d_inode, + lower_file->f_path.dentry->d_inode); + fsstack_copy_attr_times(dentry->d_inode, + lower_file->f_path.dentry->d_inode); + } + + return err; +} + +static int sdcardfs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + int err = 0; + struct file *lower_file = NULL; + struct dentry *dentry = file->f_path.dentry; + + lower_file = sdcardfs_lower_file(file); + + lower_file->f_pos = file->f_pos; + err = vfs_readdir(lower_file, filldir, dirent); + file->f_pos = lower_file->f_pos; + if (err >= 0) /* copy the atime */ + fsstack_copy_attr_atime(dentry->d_inode, + lower_file->f_path.dentry->d_inode); + return err; +} + +static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + long err = -ENOTTY; + struct file *lower_file; + + lower_file = sdcardfs_lower_file(file); + + /* XXX: use vfs_ioctl if/when VFS exports it */ + if (!lower_file || !lower_file->f_op) + goto out; + if (lower_file->f_op->unlocked_ioctl) + err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); + +out: + return err; +} + +#ifdef CONFIG_COMPAT +static long sdcardfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + long err = -ENOTTY; + struct file *lower_file; + + lower_file = sdcardfs_lower_file(file); + + /* XXX: use vfs_ioctl if/when VFS exports it */ + if (!lower_file || !lower_file->f_op) + goto out; + if (lower_file->f_op->compat_ioctl) + err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); + +out: + return err; +} +#endif + +static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err = 0; + bool willwrite; + struct file *lower_file; + const struct vm_operations_struct *saved_vm_ops = NULL; + + /* this might be deferred to mmap's writepage */ + willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); + + /* + * File systems which do not implement ->writepage may use + * generic_file_readonly_mmap as their ->mmap op. If you call + * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. + * But we cannot call the lower ->mmap op, so we can't tell that + * writeable mappings won't work. Therefore, our only choice is to + * check if the lower file system supports the ->writepage, and if + * not, return EINVAL (the same error that + * generic_file_readonly_mmap returns in that case). + */ + lower_file = sdcardfs_lower_file(file); + if (willwrite && !lower_file->f_mapping->a_ops->writepage) { + err = -EINVAL; + printk(KERN_ERR "sdcardfs: lower file system does not " + "support writeable mmap\n"); + goto out; + } + + /* + * find and save lower vm_ops. + * + * XXX: the VFS should have a cleaner way of finding the lower vm_ops + */ + if (!SDCARDFS_F(file)->lower_vm_ops) { + err = lower_file->f_op->mmap(lower_file, vma); + if (err) { + printk(KERN_ERR "sdcardfs: lower mmap failed %d\n", err); + goto out; + } + saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */ + err = do_munmap(current->mm, vma->vm_start, + vma->vm_end - vma->vm_start); + if (err) { + printk(KERN_ERR "sdcardfs: do_munmap failed %d\n", err); + goto out; + } + } + + /* + * Next 3 lines are all I need from generic_file_mmap. I definitely + * don't want its test for ->readpage which returns -ENOEXEC. + */ + file_accessed(file); + vma->vm_ops = &sdcardfs_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; + + file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */ + if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */ + SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops; + +out: + return err; +} + +static int sdcardfs_open(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lower_path; + struct dentry *dentry = file->f_path.dentry; + struct dentry *parent = dget_parent(dentry); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + int has_rw; + + /* don't open unhashed/deleted files */ + if (d_unhashed(dentry)) { + err = -ENOENT; + goto out_err; + } + + has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, + sbi->options.derive, + open_flags_to_access_mode(file->f_flags), has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + goto out_err; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(sbi, saved_cred); + + file->private_data = + kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL); + if (!SDCARDFS_F(file)) { + err = -ENOMEM; + goto out_revert_cred; + } + + /* open lower object and link sdcardfs's file struct to lower's */ + sdcardfs_get_lower_path(file->f_path.dentry, &lower_path); + lower_file = dentry_open(lower_path.dentry, lower_path.mnt, + file->f_flags, current_cred()); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + lower_file = sdcardfs_lower_file(file); + if (lower_file) { + sdcardfs_set_lower_file(file, NULL); + fput(lower_file); /* fput calls dput for lower_dentry */ + } + } else { + sdcardfs_set_lower_file(file, lower_file); + } + + if (err) + kfree(SDCARDFS_F(file)); + else { + fsstack_copy_attr_all(inode, sdcardfs_lower_inode(inode)); + fix_derived_permission(inode); + } + +out_revert_cred: + REVERT_CRED(saved_cred); +out_err: + dput(parent); + return err; +} + +static int sdcardfs_flush(struct file *file, fl_owner_t id) +{ + int err = 0; + struct file *lower_file = NULL; + + lower_file = sdcardfs_lower_file(file); + if (lower_file && lower_file->f_op && lower_file->f_op->flush) + err = lower_file->f_op->flush(lower_file, id); + + return err; +} + +/* release all lower object references & free the file info structure */ +static int sdcardfs_file_release(struct inode *inode, struct file *file) +{ + struct file *lower_file; + + lower_file = sdcardfs_lower_file(file); + if (lower_file) { + sdcardfs_set_lower_file(file, NULL); + fput(lower_file); + } + + kfree(SDCARDFS_F(file)); + return 0; +} + +static int +sdcardfs_fsync(struct file *file, int datasync) +{ + int err; + struct file *lower_file; + struct path lower_path; + struct dentry *dentry = file->f_path.dentry; + + lower_file = sdcardfs_lower_file(file); + sdcardfs_get_lower_path(dentry, &lower_path); + err = vfs_fsync(lower_file, datasync); + sdcardfs_put_lower_path(dentry, &lower_path); + + return err; +} + +static int sdcardfs_fasync(int fd, struct file *file, int flag) +{ + int err = 0; + struct file *lower_file = NULL; + + lower_file = sdcardfs_lower_file(file); + if (lower_file->f_op && lower_file->f_op->fasync) + err = lower_file->f_op->fasync(fd, lower_file, flag); + + return err; +} + +const struct file_operations sdcardfs_main_fops = { + .llseek = generic_file_llseek, + .read = sdcardfs_read, + .write = sdcardfs_write, + .unlocked_ioctl = sdcardfs_unlocked_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = sdcardfs_compat_ioctl, +#endif + .mmap = sdcardfs_mmap, + .open = sdcardfs_open, + .flush = sdcardfs_flush, + .release = sdcardfs_file_release, + .fsync = sdcardfs_fsync, + .fasync = sdcardfs_fasync, +}; + +/* trimmed directory options */ +const struct file_operations sdcardfs_dir_fops = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = sdcardfs_readdir, + .unlocked_ioctl = sdcardfs_unlocked_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = sdcardfs_compat_ioctl, +#endif + .open = sdcardfs_open, + .release = sdcardfs_file_release, + .flush = sdcardfs_flush, + .fsync = sdcardfs_fsync, + .fasync = sdcardfs_fasync, +}; diff --git a/fs/sdcardfs/hashtable.h b/fs/sdcardfs/hashtable.h new file mode 100644 index 000000000000..1e770f3df148 --- /dev/null +++ b/fs/sdcardfs/hashtable.h @@ -0,0 +1,190 @@ +/* + * Statically sized hash table implementation + * (C) 2012 Sasha Levin + */ + +#ifndef _LINUX_HASHTABLE_H +#define _LINUX_HASHTABLE_H + +#include +#include +#include +#include +#include + +#define DEFINE_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] = \ + { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } + +#define DECLARE_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] + +#define HASH_SIZE(name) (ARRAY_SIZE(name)) +#define HASH_BITS(name) ilog2(HASH_SIZE(name)) + +/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ +#define hash_min(val, bits) \ + (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) + +static inline void __hash_init(struct hlist_head *ht, unsigned int sz) +{ + unsigned int i; + + for (i = 0; i < sz; i++) + INIT_HLIST_HEAD(&ht[i]); +} + +/** + * hash_init - initialize a hash table + * @hashtable: hashtable to be initialized + * + * Calculates the size of the hashtable from the given parameter, otherwise + * same as hash_init_size. + * + * This has to be a macro since HASH_BITS() will not work on pointers since + * it calculates the size during preprocessing. + */ +#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) + +/** + * hash_add - add an object to a hashtable + * @hashtable: hashtable to add to + * @node: the &struct hlist_node of the object to be added + * @key: the key of the object to be added + */ +#define hash_add(hashtable, node, key) \ + hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) + +/** + * hash_add_rcu - add an object to a rcu enabled hashtable + * @hashtable: hashtable to add to + * @node: the &struct hlist_node of the object to be added + * @key: the key of the object to be added + */ +#define hash_add_rcu(hashtable, node, key) \ + hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) + +/** + * hash_hashed - check whether an object is in any hashtable + * @node: the &struct hlist_node of the object to be checked + */ +static inline bool hash_hashed(struct hlist_node *node) +{ + return !hlist_unhashed(node); +} + +static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) +{ + unsigned int i; + + for (i = 0; i < sz; i++) + if (!hlist_empty(&ht[i])) + return false; + + return true; +} + +/** + * hash_empty - check whether a hashtable is empty + * @hashtable: hashtable to check + * + * This has to be a macro since HASH_BITS() will not work on pointers since + * it calculates the size during preprocessing. + */ +#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) + +/** + * hash_del - remove an object from a hashtable + * @node: &struct hlist_node of the object to remove + */ +static inline void hash_del(struct hlist_node *node) +{ + hlist_del_init(node); +} + +/** + * hash_del_rcu - remove an object from a rcu enabled hashtable + * @node: &struct hlist_node of the object to remove + */ +static inline void hash_del_rcu(struct hlist_node *node) +{ + hlist_del_init_rcu(node); +} + +/** + * hash_for_each - iterate over a hashtable + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each(name, bkt, obj, member, pos) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry(obj, pos, &name[bkt], member) + +/** + * hash_for_each_rcu - iterate over a rcu enabled hashtable + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each_rcu(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_rcu(obj, &name[bkt], member) + +/** + * hash_for_each_safe - iterate over a hashtable safe against removal of + * hash entry + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @tmp: a &struct used for temporary storage + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each_safe(name, bkt, tmp, obj, member, pos) \ + for ((bkt) = 0, obj = NULL; (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_safe(obj, pos, tmp, &name[bkt], member) + +/** + * hash_for_each_possible - iterate over all possible objects hashing to the + * same bucket + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible(name, obj, member, key, pos) \ + hlist_for_each_entry(obj, pos, &name[hash_min(key, HASH_BITS(name))], member) + +/** + * hash_for_each_possible_rcu - iterate over all possible objects hashing to the + * same bucket in an rcu enabled hashtable + * in a rcu enabled hashtable + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible_rcu(name, obj, member, key) \ + hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ + member) + +/** + * hash_for_each_possible_safe - iterate over all possible objects hashing to the + * same bucket safe against removals + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @tmp: a &struct used for temporary storage + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible_safe(name, obj, tmp, member, key) \ + hlist_for_each_entry_safe(obj, tmp,\ + &name[hash_min(key, HASH_BITS(name))], member) + + +#endif diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c new file mode 100644 index 000000000000..e8ed04250ed1 --- /dev/null +++ b/fs/sdcardfs/inode.c @@ -0,0 +1,886 @@ +/* + * fs/sdcardfs/inode.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +/* Do not directly use this function. Use OVERRIDE_CRED() instead. */ +const struct cred * override_fsids(struct sdcardfs_sb_info* sbi) +{ + struct cred * cred; + const struct cred * old_cred; + + cred = prepare_creds(); + if (!cred) + return NULL; + + cred->fsuid = sbi->options.fs_low_uid; + cred->fsgid = sbi->options.fs_low_gid; + + old_cred = override_creds(cred); + + return old_cred; +} + +/* Do not directly use this function, use REVERT_CRED() instead. */ +void revert_fsids(const struct cred * old_cred) +{ + const struct cred * cur_cred; + + cur_cred = current->cred; + revert_creds(old_cred); + put_cred(cur_cred); +} + +static int sdcardfs_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + int err = 0; + struct dentry *lower_dentry; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path, saved_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + + int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_parent_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + + pathcpy(&saved_path, &nd->path); + pathcpy(&nd->path, &lower_path); + + /* set last 16bytes of mode field to 0664 */ + mode = (mode & S_IFMT) | 00664; + err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode, nd); + + pathcpy(&nd->path, &saved_path); + if (err) + goto out; + + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + if (err) + goto out; + fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); + fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_parent_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); + REVERT_CRED(saved_cred); +out_eacces: + return err; +} + +#if 0 +static int sdcardfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + struct dentry *lower_old_dentry; + struct dentry *lower_new_dentry; + struct dentry *lower_dir_dentry; + u64 file_size_save; + int err; + struct path lower_old_path, lower_new_path; + + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); + + file_size_save = i_size_read(old_dentry->d_inode); + sdcardfs_get_lower_path(old_dentry, &lower_old_path); + sdcardfs_get_lower_path(new_dentry, &lower_new_path); + lower_old_dentry = lower_old_path.dentry; + lower_new_dentry = lower_new_path.dentry; + lower_dir_dentry = lock_parent(lower_new_dentry); + + err = mnt_want_write(lower_new_path.mnt); + if (err) + goto out_unlock; + + err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, + lower_new_dentry); + if (err || !lower_new_dentry->d_inode) + goto out; + + err = sdcardfs_interpose(new_dentry, dir->i_sb, &lower_new_path); + if (err) + goto out; + fsstack_copy_attr_times(dir, lower_new_dentry->d_inode); + fsstack_copy_inode_size(dir, lower_new_dentry->d_inode); + old_dentry->d_inode->i_nlink = + sdcardfs_lower_inode(old_dentry->d_inode)->i_nlink; + i_size_write(new_dentry->d_inode, file_size_save); +out: + mnt_drop_write(lower_new_path.mnt); +out_unlock: + unlock_dir(lower_dir_dentry); + sdcardfs_put_lower_path(old_dentry, &lower_old_path); + sdcardfs_put_lower_path(new_dentry, &lower_new_path); + REVERT_CRED(); + return err; +} +#endif + +static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) +{ + int err; + struct dentry *lower_dentry; + struct inode *lower_dir_inode = sdcardfs_lower_inode(dir); + struct dentry *lower_dir_dentry; + struct path lower_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + + int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + err = vfs_unlink(lower_dir_inode, lower_dentry); + + /* + * Note: unlinking on top of NFS can cause silly-renamed files. + * Trying to delete such files results in EBUSY from NFS + * below. Silly-renamed files will get deleted by NFS later on, so + * we just need to detect them here and treat such EBUSY errors as + * if the upper file was successfully deleted. + */ + if (err == -EBUSY && lower_dentry->d_flags & DCACHE_NFSFS_RENAMED) + err = 0; + if (err) + goto out; + fsstack_copy_attr_times(dir, lower_dir_inode); + fsstack_copy_inode_size(dir, lower_dir_inode); + dentry->d_inode->i_nlink = + sdcardfs_lower_inode(dentry->d_inode)->i_nlink; + dentry->d_inode->i_ctime = dir->i_ctime; + d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */ +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); + REVERT_CRED(saved_cred); +out_eacces: + return err; +} + +#if 0 +static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + int err = 0; + struct dentry *lower_dentry; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path; + + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_parent_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname); + if (err) + goto out; + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + if (err) + goto out; + fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); + fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_parent_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); + REVERT_CRED(); + return err; +} +#endif + +static int touch(char *abs_path, mode_t mode) { + struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode); + if (IS_ERR(filp)) { + if (PTR_ERR(filp) == -EEXIST) { + return 0; + } + else { + printk(KERN_ERR "sdcardfs: failed to open(%s): %ld\n", + abs_path, PTR_ERR(filp)); + return PTR_ERR(filp); + } + } + filp_close(filp, current->files); + return 0; +} + +static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + int err = 0; + int make_nomedia_in_obb = 0; + struct dentry *lower_dentry; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + struct sdcardfs_inode_info *pi = SDCARDFS_I(dir); + char *page_buf; + char *nomedia_dir_name; + char *nomedia_fullpath; + int fullpath_namelen; + int touch_err = 0; + + int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); + + /* check disk space */ + if (!check_min_free_space(dentry, 0, 1)) { + printk(KERN_INFO "sdcardfs: No minimum free space.\n"); + err = -ENOSPC; + goto out_revert; + } + + /* the lower_dentry is negative here */ + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_parent_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + + /* set last 16bytes of mode field to 0775 */ + mode = (mode & S_IFMT) | 00775; + err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry, mode); + + if (err) + goto out; + + /* if it is a local obb dentry, setup it with the base obbpath */ + if(need_graft_path(dentry)) { + + err = setup_obb_dentry(dentry, &lower_path); + if(err) { + /* if the sbi->obbpath is not available, the lower_path won't be + * changed by setup_obb_dentry() but the lower path is saved to + * its orig_path. this dentry will be revalidated later. + * but now, the lower_path should be NULL */ + sdcardfs_put_reset_lower_path(dentry); + + /* the newly created lower path which saved to its orig_path or + * the lower_path is the base obbpath. + * therefore, an additional path_get is required */ + path_get(&lower_path); + } else + make_nomedia_in_obb = 1; + } + + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + if (err) + goto out; + + fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); + fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + /* update number of links on parent directory */ + dir->i_nlink = sdcardfs_lower_inode(dir)->i_nlink; + + if ((sbi->options.derive == DERIVE_UNIFIED) && (!strcasecmp(dentry->d_name.name, "obb")) + && (pi->perm == PERM_ANDROID) && (pi->userid == 0)) + make_nomedia_in_obb = 1; + + /* When creating /Android/data and /Android/obb, mark them as .nomedia */ + if (make_nomedia_in_obb || + ((pi->perm == PERM_ANDROID) && (!strcasecmp(dentry->d_name.name, "data")))) { + + page_buf = (char *)__get_free_page(GFP_KERNEL); + if (!page_buf) { + printk(KERN_ERR "sdcardfs: failed to allocate page buf\n"); + goto out; + } + + nomedia_dir_name = d_absolute_path(&lower_path, page_buf, PAGE_SIZE); + if (IS_ERR(nomedia_dir_name)) { + free_page((unsigned long)page_buf); + printk(KERN_ERR "sdcardfs: failed to get .nomedia dir name\n"); + goto out; + } + + fullpath_namelen = page_buf + PAGE_SIZE - nomedia_dir_name - 1; + fullpath_namelen += strlen("/.nomedia"); + nomedia_fullpath = kzalloc(fullpath_namelen + 1, GFP_KERNEL); + if (!nomedia_fullpath) { + free_page((unsigned long)page_buf); + printk(KERN_ERR "sdcardfs: failed to allocate .nomedia fullpath buf\n"); + goto out; + } + + strcpy(nomedia_fullpath, nomedia_dir_name); + free_page((unsigned long)page_buf); + strcat(nomedia_fullpath, "/.nomedia"); + touch_err = touch(nomedia_fullpath, 0664); + if (touch_err) { + printk(KERN_ERR "sdcardfs: failed to touch(%s): %d\n", + nomedia_fullpath, touch_err); + kfree(nomedia_fullpath); + goto out; + } + kfree(nomedia_fullpath); + } +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_parent_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); +out_revert: + REVERT_CRED(saved_cred); +out_eacces: + return err; +} + +static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + int err; + struct path lower_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + //char *path_s = NULL; + + int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); + + /* sdcardfs_get_real_lower(): in case of remove an user's obb dentry + * the dentry on the original path should be deleted. */ + sdcardfs_get_real_lower(dentry, &lower_path); + + lower_dentry = lower_path.dentry; + lower_dir_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); + if (err) + goto out; + + d_drop(dentry); /* drop our dentry on success (why not VFS's job?) */ + if (dentry->d_inode) + clear_nlink(dentry->d_inode); + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); + fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); + dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; + +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_dir_dentry); + sdcardfs_put_real_lower(dentry, &lower_path); + REVERT_CRED(saved_cred); +out_eacces: + return err; +} + +#if 0 +static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode, + dev_t dev) +{ + int err = 0; + struct dentry *lower_dentry; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path; + + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_parent_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev); + if (err) + goto out; + + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + if (err) + goto out; + fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); + fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_parent_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); + REVERT_CRED(); + return err; +} +#endif + +/* + * The locking rules in sdcardfs_rename are complex. We could use a simpler + * superblock-level name-space lock for renames and copy-ups. + */ +static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + int err = 0; + struct dentry *lower_old_dentry = NULL; + struct dentry *lower_new_dentry = NULL; + struct dentry *lower_old_dir_dentry = NULL; + struct dentry *lower_new_dir_dentry = NULL; + struct dentry *trap = NULL; + struct dentry *new_parent = NULL; + struct path lower_old_path, lower_new_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(old_dentry->d_sb); + const struct cred *saved_cred = NULL; + + int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name, + sbi->options.derive, 1, has_rw) || + !check_caller_access_to_name(new_dir, new_dentry->d_name.name, + sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " new_dentry: %s, task:%s\n", + __func__, new_dentry->d_name.name, current->comm); + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(old_dir->i_sb), saved_cred); + + sdcardfs_get_real_lower(old_dentry, &lower_old_path); + sdcardfs_get_lower_path(new_dentry, &lower_new_path); + lower_old_dentry = lower_old_path.dentry; + lower_new_dentry = lower_new_path.dentry; + lower_old_dir_dentry = dget_parent(lower_old_dentry); + lower_new_dir_dentry = dget_parent(lower_new_dentry); + + trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + /* source should not be ancestor of target */ + if (trap == lower_old_dentry) { + err = -EINVAL; + goto out; + } + /* target should not be ancestor of source */ + if (trap == lower_new_dentry) { + err = -ENOTEMPTY; + goto out; + } + + err = mnt_want_write(lower_old_path.mnt); + if (err) + goto out; + err = mnt_want_write(lower_new_path.mnt); + if (err) + goto out_drop_old_write; + + err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, + lower_new_dir_dentry->d_inode, lower_new_dentry); + if (err) + goto out_err; + + /* Copy attrs from lower dir, but i_uid/i_gid */ + fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); + fsstack_copy_inode_size(new_dir, lower_new_dir_dentry->d_inode); + fix_derived_permission(new_dir); + if (new_dir != old_dir) { + fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); + fsstack_copy_inode_size(old_dir, lower_old_dir_dentry->d_inode); + fix_derived_permission(old_dir); + /* update the derived permission of the old_dentry + * with its new parent + */ + new_parent = dget_parent(new_dentry); + if(new_parent) { + if(old_dentry->d_inode) { + get_derived_permission(new_parent, old_dentry); + fix_derived_permission(old_dentry->d_inode); + } + dput(new_parent); + } + } + +out_err: + mnt_drop_write(lower_new_path.mnt); +out_drop_old_write: + mnt_drop_write(lower_old_path.mnt); +out: + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + dput(lower_old_dir_dentry); + dput(lower_new_dir_dentry); + sdcardfs_put_real_lower(old_dentry, &lower_old_path); + sdcardfs_put_lower_path(new_dentry, &lower_new_path); + REVERT_CRED(saved_cred); +out_eacces: + return err; +} + +#if 0 +static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +{ + int err; + struct dentry *lower_dentry; + struct path lower_path; + /* XXX readlink does not requires overriding credential */ + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + if (!lower_dentry->d_inode->i_op || + !lower_dentry->d_inode->i_op->readlink) { + err = -EINVAL; + goto out; + } + + err = lower_dentry->d_inode->i_op->readlink(lower_dentry, + buf, bufsiz); + if (err < 0) + goto out; + fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode); + +out: + sdcardfs_put_lower_path(dentry, &lower_path); + return err; +} +#endif + +#if 0 +static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char *buf; + int len = PAGE_SIZE, err; + mm_segment_t old_fs; + + /* This is freed by the put_link method assuming a successful call. */ + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + buf = ERR_PTR(-ENOMEM); + goto out; + } + + /* read the symlink, and then we will follow it */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sdcardfs_readlink(dentry, buf, len); + set_fs(old_fs); + if (err < 0) { + kfree(buf); + buf = ERR_PTR(err); + } else { + buf[err] = '\0'; + } +out: + nd_set_link(nd, buf); + return NULL; +} +#endif + +#if 0 +/* this @nd *IS* still used */ +static void sdcardfs_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *buf = nd_get_link(nd); + if (!IS_ERR(buf)) /* free the char* */ + kfree(buf); +} +#endif + +static int sdcardfs_permission(struct inode *inode, int mask, unsigned int flags) +{ + int err; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + /* + * Permission check on sdcardfs inode. + * Calling process should have AID_SDCARD_RW permission + */ + err = generic_permission(inode, mask, 0, inode->i_op->check_acl); + + /* XXX + * Original sdcardfs code calls inode_permission(lower_inode,.. ) + * for checking inode permission. But doing such things here seems + * duplicated work, because the functions called after this func, + * such as vfs_create, vfs_unlink, vfs_rename, and etc, + * does exactly same thing, i.e., they calls inode_permission(). + * So we just let they do the things. + * If there are any security hole, just uncomment following if block. + */ +#if 0 + if (!err) { + /* + * Permission check on lower_inode(=EXT4). + * we check it with AID_MEDIA_RW permission + */ + struct inode *lower_inode; + OVERRIDE_CRED(SDCARDFS_SB(inode->sb)); + + lower_inode = sdcardfs_lower_inode(inode); + err = inode_permission(lower_inode, mask); + + REVERT_CRED(); + } +#endif + return err; + +} + +static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct dentry *lower_dentry; + struct inode *inode; + struct inode *lower_inode; + struct path lower_path; + struct dentry *parent; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + parent = dget_parent(dentry); + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, + sbi->options.derive, 0, 0)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + dput(parent); + return -EACCES; + } + dput(parent); + + inode = dentry->d_inode; + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_inode = sdcardfs_lower_inode(inode); + + fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_inode_size(inode, lower_inode); + /* if the dentry has been moved from other location + * so, on this stage, its derived permission must be + * rechecked from its private field. + */ + fix_derived_permission(inode); + + generic_fillattr(inode, stat); + sdcardfs_put_lower_path(dentry, &lower_path); + return 0; +} + +static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) +{ + int err = 0; + struct dentry *lower_dentry; + struct inode *inode; + struct inode *lower_inode; + struct path lower_path; + struct iattr lower_ia; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + struct dentry *parent; + int has_rw; + + inode = dentry->d_inode; + + /* + * Check if user has permission to change inode. We don't check if + * this user can change the lower inode: that should happen when + * calling notify_change on the lower inode. + */ + err = inode_change_ok(inode, ia); + + /* no vfs_XXX operations required, cred overriding will be skipped. wj*/ + if (!err) { + /* check the Android group ID */ + has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + parent = dget_parent(dentry); + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, + sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + } + dput(parent); + } + + if (err) + goto out_err; + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_inode = sdcardfs_lower_inode(inode); + + /* prepare our own lower struct iattr (with the lower file) */ + memcpy(&lower_ia, ia, sizeof(lower_ia)); + if (ia->ia_valid & ATTR_FILE) + lower_ia.ia_file = sdcardfs_lower_file(ia->ia_file); + + lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE); + + /* + * If shrinking, first truncate upper level to cancel writing dirty + * pages beyond the new eof; and also if its' maxbytes is more + * limiting (fail with -EFBIG before making any change to the lower + * level). There is no need to vmtruncate the upper level + * afterwards in the other cases: we fsstack_copy_inode_size from + * the lower level. + */ + if (current->mm) + down_write(¤t->mm->mmap_sem); + if (ia->ia_valid & ATTR_SIZE) { + err = inode_newsize_ok(inode, ia->ia_size); + if (err) { + if (current->mm) + up_write(¤t->mm->mmap_sem); + goto out; + } + truncate_setsize(inode, ia->ia_size); + } + + /* + * mode change is for clearing setuid/setgid bits. Allow lower fs + * to interpret this in its own way. + */ + if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) + lower_ia.ia_valid &= ~ATTR_MODE; + + /* notify the (possibly copied-up) lower inode */ + /* + * Note: we use lower_dentry->d_inode, because lower_inode may be + * unlinked (no inode->i_sb and i_ino==0. This happens if someone + * tries to open(), unlink(), then ftruncate() a file. + */ + mutex_lock(&lower_dentry->d_inode->i_mutex); + err = notify_change(lower_dentry, &lower_ia); /* note: lower_ia */ + mutex_unlock(&lower_dentry->d_inode->i_mutex); + if (current->mm) + up_write(¤t->mm->mmap_sem); + if (err) + goto out; + + /* get attributes from the lower inode */ + fsstack_copy_attr_all(inode, lower_inode); + /* update derived permission of the upper inode */ + fix_derived_permission(inode); + + /* + * Not running fsstack_copy_inode_size(inode, lower_inode), because + * VFS should update our inode size, and notify_change on + * lower_inode should update its size. + */ + +out: + sdcardfs_put_lower_path(dentry, &lower_path); +out_err: + return err; +} + +const struct inode_operations sdcardfs_symlink_iops = { + .permission = sdcardfs_permission, + .setattr = sdcardfs_setattr, + /* XXX Following operations are implemented, + * but FUSE(sdcard) or FAT does not support them + * These methods are *NOT* perfectly tested. + .readlink = sdcardfs_readlink, + .follow_link = sdcardfs_follow_link, + .put_link = sdcardfs_put_link, + */ +}; + +const struct inode_operations sdcardfs_dir_iops = { + .create = sdcardfs_create, + .lookup = sdcardfs_lookup, + .permission = sdcardfs_permission, + .unlink = sdcardfs_unlink, + .mkdir = sdcardfs_mkdir, + .rmdir = sdcardfs_rmdir, + .rename = sdcardfs_rename, + .setattr = sdcardfs_setattr, + .getattr = sdcardfs_getattr, + /* XXX Following operations are implemented, + * but FUSE(sdcard) or FAT does not support them + * These methods are *NOT* perfectly tested. + .symlink = sdcardfs_symlink, + .link = sdcardfs_link, + .mknod = sdcardfs_mknod, + */ +}; + +const struct inode_operations sdcardfs_main_iops = { + .permission = sdcardfs_permission, + .setattr = sdcardfs_setattr, + .getattr = sdcardfs_getattr, +}; diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c new file mode 100644 index 000000000000..c0b12375b1bf --- /dev/null +++ b/fs/sdcardfs/lookup.c @@ -0,0 +1,386 @@ +/* + * fs/sdcardfs/lookup.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include "linux/delay.h" + +/* The dentry cache is just so we have properly sized dentries */ +static struct kmem_cache *sdcardfs_dentry_cachep; + +int sdcardfs_init_dentry_cache(void) +{ + sdcardfs_dentry_cachep = + kmem_cache_create("sdcardfs_dentry", + sizeof(struct sdcardfs_dentry_info), + 0, SLAB_RECLAIM_ACCOUNT, NULL); + + return sdcardfs_dentry_cachep ? 0 : -ENOMEM; +} + +void sdcardfs_destroy_dentry_cache(void) +{ + if (sdcardfs_dentry_cachep) + kmem_cache_destroy(sdcardfs_dentry_cachep); +} + +void free_dentry_private_data(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + kmem_cache_free(sdcardfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +/* allocate new dentry private data */ +int new_dentry_private_data(struct dentry *dentry) +{ + struct sdcardfs_dentry_info *info = SDCARDFS_D(dentry); + + /* use zalloc to init dentry_info.lower_path */ + info = kmem_cache_zalloc(sdcardfs_dentry_cachep, GFP_ATOMIC); + if (!info) + return -ENOMEM; + + spin_lock_init(&info->lock); + dentry->d_fsdata = info; + + return 0; +} + +static int sdcardfs_inode_test(struct inode *inode, void *candidate_lower_inode) +{ + struct inode *current_lower_inode = sdcardfs_lower_inode(inode); + if (current_lower_inode == (struct inode *)candidate_lower_inode) + return 1; /* found a match */ + else + return 0; /* no match */ +} + +static int sdcardfs_inode_set(struct inode *inode, void *lower_inode) +{ + /* we do actual inode initialization in sdcardfs_iget */ + return 0; +} + +static struct inode *sdcardfs_iget(struct super_block *sb, + struct inode *lower_inode) +{ + struct sdcardfs_inode_info *info; + struct inode *inode; /* the new inode to return */ + int err; + + inode = iget5_locked(sb, /* our superblock */ + /* + * hashval: we use inode number, but we can + * also use "(unsigned long)lower_inode" + * instead. + */ + lower_inode->i_ino, /* hashval */ + sdcardfs_inode_test, /* inode comparison function */ + sdcardfs_inode_set, /* inode init function */ + lower_inode); /* data passed to test+set fxns */ + if (!inode) { + err = -EACCES; + iput(lower_inode); + return ERR_PTR(err); + } + /* if found a cached inode, then just return it */ + if (!(inode->i_state & I_NEW)) + return inode; + + /* initialize new inode */ + info = SDCARDFS_I(inode); + + inode->i_ino = lower_inode->i_ino; + if (!igrab(lower_inode)) { + err = -ESTALE; + return ERR_PTR(err); + } + sdcardfs_set_lower_inode(inode, lower_inode); + + inode->i_version++; + + /* use different set of inode ops for symlinks & directories */ + if (S_ISDIR(lower_inode->i_mode)) + inode->i_op = &sdcardfs_dir_iops; + else if (S_ISLNK(lower_inode->i_mode)) + inode->i_op = &sdcardfs_symlink_iops; + else + inode->i_op = &sdcardfs_main_iops; + + /* use different set of file ops for directories */ + if (S_ISDIR(lower_inode->i_mode)) + inode->i_fop = &sdcardfs_dir_fops; + else + inode->i_fop = &sdcardfs_main_fops; + + inode->i_mapping->a_ops = &sdcardfs_aops; + + inode->i_atime.tv_sec = 0; + inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_sec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_ctime.tv_sec = 0; + inode->i_ctime.tv_nsec = 0; + + /* properly initialize special inodes */ + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) + init_special_inode(inode, lower_inode->i_mode, + lower_inode->i_rdev); + + /* all well, copy inode attributes */ + fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_inode_size(inode, lower_inode); + + fix_derived_permission(inode); + + unlock_new_inode(inode); + return inode; +} + +/* + * Connect a sdcardfs inode dentry/inode with several lower ones. This is + * the classic stackable file system "vnode interposition" action. + * + * @dentry: sdcardfs's dentry which interposes on lower one + * @sb: sdcardfs's super_block + * @lower_path: the lower path (caller does path_get/put) + */ +int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path) +{ + int err = 0; + struct inode *inode; + struct inode *lower_inode; + struct super_block *lower_sb; + + lower_inode = lower_path->dentry->d_inode; + lower_sb = sdcardfs_lower_super(sb); + + /* check that the lower file system didn't cross a mount point */ + if (lower_inode->i_sb != lower_sb) { + err = -EXDEV; + goto out; + } + + /* + * We allocate our new inode below by calling sdcardfs_iget, + * which will initialize some of the new inode's fields + */ + + /* inherit lower inode number for sdcardfs's inode */ + inode = sdcardfs_iget(sb, lower_inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + update_derived_permission(dentry); +out: + return err; +} + +/* + * Main driver function for sdcardfs's lookup. + * + * Returns: NULL (ok), ERR_PTR if an error occurred. + * Fills in lower_parent_path with on success. + */ +static struct dentry *__sdcardfs_lookup(struct dentry *dentry, + struct nameidata *nd, struct path *lower_parent_path) +{ + int err = 0; + struct vfsmount *lower_dir_mnt; + struct dentry *lower_dir_dentry = NULL; + struct dentry *lower_dentry; + const char *name; + struct nameidata lower_nd; + struct path lower_path; + struct qstr this; + struct sdcardfs_sb_info *sbi; + + sbi = SDCARDFS_SB(dentry->d_sb); + /* must initialize dentry operations */ + d_set_d_op(dentry, &sdcardfs_ci_dops); + + if (IS_ROOT(dentry)) + goto out; + + name = dentry->d_name.name; + + /* now start the actual lookup procedure */ + lower_dir_dentry = lower_parent_path->dentry; + lower_dir_mnt = lower_parent_path->mnt; + + /* Use vfs_path_lookup to check if the dentry exists or not */ + if (sbi->options.lower_fs == LOWER_FS_EXT4) { + err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, + LOOKUP_CASE_INSENSITIVE, &lower_nd); + } else if (sbi->options.lower_fs == LOWER_FS_FAT) { + err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, + &lower_nd); + } + + /* no error: handle positive dentries */ + if (!err) { + /* check if the dentry is an obb dentry + * if true, the lower_inode must be replaced with + * the inode of the graft path */ + + if(need_graft_path(dentry)) { + + /* setup_obb_dentry() + * The lower_path will be stored to the dentry's orig_path + * and the base obbpath will be copyed to the lower_path variable. + * if an error returned, there's no change in the lower_path + * returns: -ERRNO if error (0: no error) */ + err = setup_obb_dentry(dentry, &lower_nd.path); + + if(err) { + /* if the sbi->obbpath is not available, we can optionally + * setup the lower_path with its orig_path. + * but, the current implementation just returns an error + * because the sdcard daemon also regards this case as + * a lookup fail. */ + printk(KERN_INFO "sdcardfs: base obbpath is not available\n"); + sdcardfs_put_reset_orig_path(dentry); + goto out; + } + } + + sdcardfs_set_lower_path(dentry, &lower_nd.path); + err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_nd.path); + if (err) /* path_put underlying path on error */ + sdcardfs_put_reset_lower_path(dentry); + goto out; + } + + /* + * We don't consider ENOENT an error, and we want to return a + * negative dentry. + */ + if (err && err != -ENOENT) + goto out; + + /* instatiate a new negative dentry */ + this.name = name; + this.len = strlen(name); + this.hash = full_name_hash(this.name, this.len); + lower_dentry = d_lookup(lower_dir_dentry, &this); + if (lower_dentry) + goto setup_lower; + + lower_dentry = d_alloc(lower_dir_dentry, &this); + if (!lower_dentry) { + err = -ENOMEM; + goto out; + } + d_add(lower_dentry, NULL); /* instantiate and hash */ + +setup_lower: + lower_path.dentry = lower_dentry; + lower_path.mnt = mntget(lower_dir_mnt); + sdcardfs_set_lower_path(dentry, &lower_path); + + /* + * If the intent is to create a file, then don't return an error, so + * the VFS will continue the process of making this negative dentry + * into a positive one. + */ + if (nd) { + if (nd->flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET)) + err = 0; + } else + err = 0; + +out: + return ERR_PTR(err); +} + +/* + * On success: + * fills dentry object appropriate values and returns NULL. + * On fail (== error) + * returns error ptr + * + * @dir : Parent inode. It is locked (dir->i_mutex) + * @dentry : Target dentry to lookup. we should set each of fields. + * (dentry->d_name is initialized already) + * @nd : nameidata of parent inode + */ +struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct dentry *ret = NULL, *parent; + struct path lower_parent_path; + int err = 0; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + + parent = dget_parent(dentry); + + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, + sbi->options.derive, 0, 0)) { + ret = ERR_PTR(-EACCES); + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + goto out_err; + } + + /* save current_cred and override it */ + OVERRIDE_CRED_PTR(SDCARDFS_SB(dir->i_sb), saved_cred); + + sdcardfs_get_lower_path(parent, &lower_parent_path); + + /* allocate dentry private data. We free it in ->d_release */ + err = new_dentry_private_data(dentry); + if (err) { + ret = ERR_PTR(err); + goto out; + } + + ret = __sdcardfs_lookup(dentry, nd, &lower_parent_path); + if (IS_ERR(ret)) + { + goto out; + } + if (ret) + dentry = ret; + if (dentry->d_inode) { + fsstack_copy_attr_times(dentry->d_inode, + sdcardfs_lower_inode(dentry->d_inode)); + /* get drived permission */ + get_derived_permission(parent, dentry); + fix_derived_permission(dentry->d_inode); + } + /* update parent directory's atime */ + fsstack_copy_attr_atime(parent->d_inode, + sdcardfs_lower_inode(parent->d_inode)); + +out: + sdcardfs_put_lower_path(parent, &lower_parent_path); + REVERT_CRED(saved_cred); +out_err: + dput(parent); + return ret; +} diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c new file mode 100644 index 000000000000..1fdceffec72c --- /dev/null +++ b/fs/sdcardfs/main.c @@ -0,0 +1,425 @@ +/* + * fs/sdcardfs/main.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include +#include +#include + +enum { + Opt_uid, + Opt_gid, + Opt_wgid, + Opt_debug, + Opt_split, + Opt_derive, + Opt_lower_fs, + Opt_reserved_mb, + Opt_err, +}; + +static const match_table_t sdcardfs_tokens = { + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_wgid, "wgid=%u"}, + {Opt_debug, "debug"}, + {Opt_split, "split"}, + {Opt_derive, "derive=%s"}, + {Opt_lower_fs, "lower_fs=%s"}, + {Opt_reserved_mb, "reserved_mb=%u"}, + {Opt_err, NULL} +}; + +static int parse_options(struct super_block *sb, char *options, int silent, + int *debug, struct sdcardfs_mount_options *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + char *string_option; + + /* by default, we use AID_MEDIA_RW as uid, gid */ + opts->fs_low_uid = AID_MEDIA_RW; + opts->fs_low_gid = AID_MEDIA_RW; + /* by default, we use AID_SDCARD_RW as write_gid */ + opts->write_gid = AID_SDCARD_RW; + /* default permission policy + * (DERIVE_NONE | DERIVE_LEGACY | DERIVE_UNIFIED) */ + opts->derive = DERIVE_NONE; + opts->split_perms = 0; + /* by default, we use LOWER_FS_EXT4 as lower fs type */ + opts->lower_fs = LOWER_FS_EXT4; + /* by default, 0MB is reserved */ + opts->reserved_mb = 0; + + *debug = 0; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, sdcardfs_tokens, args); + + switch (token) { + case Opt_debug: + *debug = 1; + break; + case Opt_uid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_low_uid = option; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_low_gid = option; + break; + case Opt_wgid: + if (match_int(&args[0], &option)) + return 0; + opts->write_gid = option; + break; + case Opt_split: + opts->split_perms=1; + break; + case Opt_derive: + string_option = match_strdup(&args[0]); + if (!strcmp("none", string_option)) { + opts->derive = DERIVE_NONE; + } else if (!strcmp("legacy", string_option)) { + opts->derive = DERIVE_LEGACY; + } else if (!strcmp("unified", string_option)) { + opts->derive = DERIVE_UNIFIED; + } else { + kfree(string_option); + goto invalid_option; + } + kfree(string_option); + break; + case Opt_lower_fs: + string_option = match_strdup(&args[0]); + if (!strcmp("ext4", string_option)) { + opts->lower_fs = LOWER_FS_EXT4; + } else if (!strcmp("fat", string_option)) { + opts->lower_fs = LOWER_FS_FAT; + } else { + kfree(string_option); + goto invalid_option; + } + kfree(string_option); + break; + case Opt_reserved_mb: + if (match_int(&args[0], &option)) + return 0; + opts->reserved_mb = option; + break; + /* unknown option */ + default: +invalid_option: + if (!silent) { + printk( KERN_ERR "Unrecognized mount option \"%s\" " + "or missing value", p); + } + return -EINVAL; + } + } + + if (*debug) { + printk( KERN_INFO "sdcardfs : options - debug:%d\n", *debug); + printk( KERN_INFO "sdcardfs : options - uid:%d\n", + opts->fs_low_uid); + printk( KERN_INFO "sdcardfs : options - gid:%d\n", + opts->fs_low_gid); + } + + return 0; +} + +/* + * our custom d_alloc_root work-alike + * + * we can't use d_alloc_root if we want to use our own interpose function + * unchanged, so we simply call our own "fake" d_alloc_root + */ +static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb) +{ + struct dentry *ret = NULL; + + if (sb) { + static const struct qstr name = { + .name = "/", + .len = 1 + }; + + ret = d_alloc(NULL, &name); + if (ret) { + d_set_d_op(ret, &sdcardfs_ci_dops); + ret->d_sb = sb; + ret->d_parent = ret; + } + } + return ret; +} + +/* + * There is no need to lock the sdcardfs_super_info's rwsem as there is no + * way anyone can have a reference to the superblock at this point in time. + */ +static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, + void *raw_data, int silent) +{ + int err = 0; + int debug; + struct super_block *lower_sb; + struct path lower_path; + struct sdcardfs_sb_info *sb_info; + void *pkgl_id; + + printk(KERN_INFO "sdcardfs version 2.0\n"); + + if (!dev_name) { + printk(KERN_ERR + "sdcardfs: read_super: missing dev_name argument\n"); + err = -EINVAL; + goto out; + } + + printk(KERN_INFO "sdcardfs: dev_name -> %s\n", dev_name); + printk(KERN_INFO "sdcardfs: options -> %s\n", (char *)raw_data); + + /* parse lower path */ + err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &lower_path); + if (err) { + printk(KERN_ERR "sdcardfs: error accessing " + "lower directory '%s'\n", dev_name); + goto out; + } + + /* allocate superblock private data */ + sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL); + if (!SDCARDFS_SB(sb)) { + printk(KERN_CRIT "sdcardfs: read_super: out of memory\n"); + err = -ENOMEM; + goto out_free; + } + + sb_info = sb->s_fs_info; + + /* parse options */ + err = parse_options(sb, raw_data, silent, &debug, &sb_info->options); + if (err) { + printk(KERN_ERR "sdcardfs: invalid options\n"); + goto out_freesbi; + } + + if (sb_info->options.derive != DERIVE_NONE) { + pkgl_id = packagelist_create(sb_info->options.write_gid); + if(IS_ERR(pkgl_id)) + goto out_freesbi; + else + sb_info->pkgl_id = pkgl_id; + } + + /* set the lower superblock field of upper superblock */ + lower_sb = lower_path.dentry->d_sb; + atomic_inc(&lower_sb->s_active); + sdcardfs_set_lower_super(sb, lower_sb); + + /* inherit maxbytes from lower file system */ + sb->s_maxbytes = lower_sb->s_maxbytes; + + /* + * Our c/m/atime granularity is 1 ns because we may stack on file + * systems whose granularity is as good. + */ + sb->s_time_gran = 1; + + sb->s_magic = SDCARDFS_SUPER_MAGIC; + sb->s_op = &sdcardfs_sops; + + /* see comment next to the definition of sdcardfs_d_alloc_root */ + sb->s_root = sdcardfs_d_alloc_root(sb); + if (!sb->s_root) { + err = -ENOMEM; + goto out_sput; + } + + /* link the upper and lower dentries */ + sb->s_root->d_fsdata = NULL; + err = new_dentry_private_data(sb->s_root); + if (err) + goto out_freeroot; + + /* set the lower dentries for s_root */ + sdcardfs_set_lower_path(sb->s_root, &lower_path); + + /* call interpose to create the upper level inode */ + err = sdcardfs_interpose(sb->s_root, sb, &lower_path); + if (!err) { + /* setup permission policy */ + switch(sb_info->options.derive) { + case DERIVE_NONE: + setup_derived_state(sb->s_root->d_inode, + PERM_ROOT, 0, AID_ROOT, AID_SDCARD_RW, 00775); + sb_info->obbpath_s = NULL; + break; + case DERIVE_LEGACY: + /* Legacy behavior used to support internal multiuser layout which + * places user_id at the top directory level, with the actual roots + * just below that. Shared OBB path is also at top level. */ + setup_derived_state(sb->s_root->d_inode, + PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); + /* initialize the obbpath string and lookup the path + * sb_info->obb_path will be deactivated by path_put + * on sdcardfs_put_super */ + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); + err = prepare_dir(sb_info->obbpath_s, + sb_info->options.fs_low_uid, + sb_info->options.fs_low_gid, 00755); + if(err) + printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n", + __func__,__LINE__, sb_info->obbpath_s); + break; + case DERIVE_UNIFIED: + /* Unified multiuser layout which places secondary user_id under + * /Android/user and shared OBB path under /Android/obb. */ + setup_derived_state(sb->s_root->d_inode, + PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); + + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); + break; + } + fix_derived_permission(sb->s_root->d_inode); + + if (!silent) + printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", + dev_name, lower_sb->s_type->name); + goto out; + } + /* else error: fall through */ + + free_dentry_private_data(sb->s_root); +out_freeroot: + dput(sb->s_root); +out_sput: + /* drop refs we took earlier */ + atomic_dec(&lower_sb->s_active); + packagelist_destroy(sb_info->pkgl_id); +out_freesbi: + kfree(SDCARDFS_SB(sb)); + sb->s_fs_info = NULL; +out_free: + path_put(&lower_path); + +out: + return err; +} + +/* A feature which supports mount_nodev() with options */ +static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + int (*fill_super)(struct super_block *, const char *, void *, int)) + +{ + int error; + struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + + if (IS_ERR(s)) + return ERR_CAST(s); + + s->s_flags = flags; + + error = fill_super(s, dev_name, data, flags & MS_SILENT ? 1 : 0); + if (error) { + deactivate_locked_super(s); + return ERR_PTR(error); + } + s->s_flags |= MS_ACTIVE; + return dget(s->s_root); +} + +struct dentry *sdcardfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + /* + * dev_name is a lower_path_name, + * raw_data is a option string. + */ + return mount_nodev_with_options(fs_type, flags, dev_name, + raw_data, sdcardfs_read_super); +} + +static struct file_system_type sdcardfs_fs_type = { + .owner = THIS_MODULE, + .name = SDCARDFS_NAME, + .mount = sdcardfs_mount, + .kill_sb = generic_shutdown_super, + .fs_flags = FS_REVAL_DOT, +}; + +static int __init init_sdcardfs_fs(void) +{ + int err; + + pr_info("Registering sdcardfs " SDCARDFS_VERSION "\n"); + + err = sdcardfs_init_inode_cache(); + if (err) + goto out; + err = sdcardfs_init_dentry_cache(); + if (err) + goto out; + err = packagelist_init(); + if (err) + goto out; + err = register_filesystem(&sdcardfs_fs_type); +out: + if (err) { + sdcardfs_destroy_inode_cache(); + sdcardfs_destroy_dentry_cache(); + packagelist_exit(); + } + return err; +} + +static void __exit exit_sdcardfs_fs(void) +{ + sdcardfs_destroy_inode_cache(); + sdcardfs_destroy_dentry_cache(); + packagelist_exit(); + unregister_filesystem(&sdcardfs_fs_type); + pr_info("Completed sdcardfs module unload\n"); +} + +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University" + " (http://www.fsl.cs.sunysb.edu/)"); +MODULE_DESCRIPTION("Wrapfs " SDCARDFS_VERSION + " (http://wrapfs.filesystems.org/)"); +MODULE_LICENSE("GPL"); + +module_init(init_sdcardfs_fs); +module_exit(exit_sdcardfs_fs); diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c new file mode 100644 index 000000000000..c807d7f18f8b --- /dev/null +++ b/fs/sdcardfs/mmap.c @@ -0,0 +1,82 @@ +/* + * fs/sdcardfs/mmap.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + int err; + struct file *file, *lower_file; + const struct vm_operations_struct *lower_vm_ops; + struct vm_area_struct lower_vma; + + memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); + file = lower_vma.vm_file; + lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops; + BUG_ON(!lower_vm_ops); + + lower_file = sdcardfs_lower_file(file); + /* + * XXX: vm_ops->fault may be called in parallel. Because we have to + * resort to temporarily changing the vma->vm_file to point to the + * lower file, a concurrent invocation of sdcardfs_fault could see a + * different value. In this workaround, we keep a different copy of + * the vma structure in our stack, so we never expose a different + * value of the vma->vm_file called to us, even temporarily. A + * better fix would be to change the calling semantics of ->fault to + * take an explicit file pointer. + */ + lower_vma.vm_file = lower_file; + err = lower_vm_ops->fault(&lower_vma, vmf); + return err; +} + +static ssize_t sdcardfs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + /* + * This function returns zero on purpose in order to support direct IO. + * __dentry_open checks a_ops->direct_IO and returns EINVAL if it is null. + * + * However, this function won't be called by certain file operations + * including generic fs functions. * reads and writes are delivered to + * the lower file systems and the direct IOs will be handled by them. + * + * NOTE: exceptionally, on the recent kernels (since Linux 3.8.x), + * swap_writepage invokes this function directly. + */ + printk(KERN_INFO "%s, operation is not supported\n", __func__); + return 0; +} + +/* + * XXX: the default address_space_ops for sdcardfs is empty. We cannot set + * our inode->i_mapping->a_ops to NULL because too many code paths expect + * the a_ops vector to be non-NULL. + */ +const struct address_space_operations sdcardfs_aops = { + /* empty on purpose */ + .direct_IO = sdcardfs_direct_IO, +}; + +const struct vm_operations_struct sdcardfs_vm_ops = { + .fault = sdcardfs_fault, +}; diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h new file mode 100644 index 000000000000..923ba101dfa9 --- /dev/null +++ b/fs/sdcardfs/multiuser.h @@ -0,0 +1,37 @@ +/* + * fs/sdcardfs/multiuser.h + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#define MULTIUSER_APP_PER_USER_RANGE 100000 + +typedef uid_t userid_t; +typedef uid_t appid_t; + +static inline userid_t multiuser_get_user_id(uid_t uid) { + return uid / MULTIUSER_APP_PER_USER_RANGE; +} + +static inline appid_t multiuser_get_app_id(uid_t uid) { + return uid % MULTIUSER_APP_PER_USER_RANGE; +} + +static inline uid_t multiuser_get_uid(userid_t userId, appid_t appId) { + return userId * MULTIUSER_APP_PER_USER_RANGE + (appId % MULTIUSER_APP_PER_USER_RANGE); +} + diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c new file mode 100644 index 000000000000..c786d8f92203 --- /dev/null +++ b/fs/sdcardfs/packagelist.c @@ -0,0 +1,458 @@ +/* + * fs/sdcardfs/packagelist.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include "strtok.h" +#include "hashtable.h" +#include +#include +#include +#include + +#define STRING_BUF_SIZE (512) + +struct hashtable_entry { + struct hlist_node hlist; + void *key; + int value; +}; + +struct packagelist_data { + DECLARE_HASHTABLE(package_to_appid,8); + DECLARE_HASHTABLE(appid_with_rw,7); + struct mutex hashtable_lock; + struct task_struct *thread_id; + gid_t write_gid; + char *strtok_last; + char read_buf[STRING_BUF_SIZE]; + char event_buf[STRING_BUF_SIZE]; + char app_name_buf[STRING_BUF_SIZE]; + char gids_buf[STRING_BUF_SIZE]; +}; + +static struct kmem_cache *hashtable_entry_cachep; + +/* Path to system-provided mapping of package name to appIds */ +static const char* const kpackageslist_file = "/data/system/packages.list"; +/* Supplementary groups to execute with */ +static const gid_t kgroups[1] = { AID_PACKAGE_INFO }; + +static unsigned int str_hash(void *key) { + int i; + unsigned int h = strlen(key); + char *data = (char *)key; + + for (i = 0; i < strlen(key); i++) { + h = h * 31 + *data; + data++; + } + return h; +} + +static int contain_appid_key(struct packagelist_data *pkgl_dat, void *appid) { + struct hashtable_entry *hash_cur; + struct hlist_node *h_n; + + hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, (unsigned int)appid, h_n) + if (appid == hash_cur->key) + return 1; + return 0; +} + +/* Return if the calling UID holds sdcard_rw. */ +int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) { + struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; + appid_t appid; + int ret; + + /* No additional permissions enforcement */ + if (derive == DERIVE_NONE) { + return 1; + } + + appid = multiuser_get_app_id(current_fsuid()); + mutex_lock(&pkgl_dat->hashtable_lock); + ret = contain_appid_key(pkgl_dat, (void *)appid); + mutex_unlock(&pkgl_dat->hashtable_lock); + return ret; +} + +appid_t get_appid(void *pkgl_id, const char *app_name) +{ + struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; + struct hashtable_entry *hash_cur; + struct hlist_node *h_n; + unsigned int hash = str_hash((void *)app_name); + appid_t ret_id; + + //printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash); + mutex_lock(&pkgl_dat->hashtable_lock); + hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) { + //printk(KERN_INFO "sdcardfs: %s: %s\n", __func__, (char *)hash_cur->key); + if (!strcasecmp(app_name, hash_cur->key)) { + ret_id = (appid_t)hash_cur->value; + mutex_unlock(&pkgl_dat->hashtable_lock); + //printk(KERN_INFO "=> app_id: %d\n", (int)ret_id); + return ret_id; + } + } + mutex_unlock(&pkgl_dat->hashtable_lock); + //printk(KERN_INFO "=> app_id: %d\n", 0); + return 0; +} + +/* Kernel has already enforced everything we returned through + * derive_permissions_locked(), so this is used to lock down access + * even further, such as enforcing that apps hold sdcard_rw. */ +int check_caller_access_to_name(struct inode *parent_node, const char* name, + derive_t derive, int w_ok, int has_rw) { + + /* Always block security-sensitive files at root */ + if (parent_node && SDCARDFS_I(parent_node)->perm == PERM_ROOT) { + if (!strcasecmp(name, "autorun.inf") + || !strcasecmp(name, ".android_secure") + || !strcasecmp(name, "android_secure")) { + return 0; + } + } + + /* No additional permissions enforcement */ + if (derive == DERIVE_NONE) { + return 1; + } + + /* Root always has access; access for any other UIDs should always + * be controlled through packages.list. */ + if (current_fsuid() == 0) { + return 1; + } + + /* If asking to write, verify that caller either owns the + * parent or holds sdcard_rw. */ + if (w_ok) { + if (parent_node && + (current_fsuid() == SDCARDFS_I(parent_node)->d_uid)) { + return 1; + } + return has_rw; + } + + /* No extra permissions to enforce */ + return 1; +} + +/* This function is used when file opening. The open flags must be + * checked before calling check_caller_access_to_name() */ +int open_flags_to_access_mode(int open_flags) { + if((open_flags & O_ACCMODE) == O_RDONLY) { + return 0; /* R_OK */ + } else if ((open_flags & O_ACCMODE) == O_WRONLY) { + return 1; /* W_OK */ + } else { + /* Probably O_RDRW, but treat as default to be safe */ + return 1; /* R_OK | W_OK */ + } +} + +static int insert_str_to_int(struct packagelist_data *pkgl_dat, void *key, int value) { + struct hashtable_entry *hash_cur; + struct hashtable_entry *new_entry; + struct hlist_node *h_n; + unsigned int hash = str_hash(key); + + //printk(KERN_INFO "sdcardfs: %s: %s: %d, %u\n", __func__, (char *)key, value, hash); + hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) { + if (!strcasecmp(key, hash_cur->key)) { + hash_cur->value = value; + return 0; + } + } + new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + new_entry->key = kstrdup(key, GFP_KERNEL); + new_entry->value = value; + hash_add(pkgl_dat->package_to_appid, &new_entry->hlist, hash); + return 0; +} + +static void remove_str_to_int(struct hashtable_entry *h_entry) { + //printk(KERN_INFO "sdcardfs: %s: %s: %d\n", __func__, (char *)h_entry->key, h_entry->value); + kfree(h_entry->key); + kmem_cache_free(hashtable_entry_cachep, h_entry); +} + +static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int value) { + struct hashtable_entry *hash_cur; + struct hashtable_entry *new_entry; + struct hlist_node *h_n; + + //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value); + hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, + (unsigned int)key, h_n) { + if (key == hash_cur->key) { + hash_cur->value = value; + return 0; + } + } + new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + new_entry->key = key; + new_entry->value = value; + hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist, + (unsigned int)new_entry->key); + return 0; +} + +static void remove_int_to_null(struct hashtable_entry *h_entry) { + //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)h_entry->key, h_entry->value); + kmem_cache_free(hashtable_entry_cachep, h_entry); +} + +static void remove_all_hashentrys(struct packagelist_data *pkgl_dat) +{ + struct hashtable_entry *hash_cur; + struct hlist_node *h_n; + struct hlist_node *h_t; + int i; + + hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist, h_n) + remove_str_to_int(hash_cur); + hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist, h_n) + remove_int_to_null(hash_cur); + + hash_init(pkgl_dat->package_to_appid); + hash_init(pkgl_dat->appid_with_rw); +} + +static int read_package_list(struct packagelist_data *pkgl_dat) { + int ret; + int fd; + int read_amount; + + printk(KERN_INFO "sdcardfs: read_package_list\n"); + + mutex_lock(&pkgl_dat->hashtable_lock); + + remove_all_hashentrys(pkgl_dat); + + fd = sys_open(kpackageslist_file, O_RDONLY, 0); + if (fd < 0) { + printk(KERN_ERR "sdcardfs: failed to open package list\n"); + mutex_unlock(&pkgl_dat->hashtable_lock); + return fd; + } + + while ((read_amount = sys_read(fd, pkgl_dat->read_buf, + sizeof(pkgl_dat->read_buf))) > 0) { + int appid; + char *token; + int one_line_len = 0; + int additional_read; + unsigned long ret_gid; + + while (one_line_len < read_amount) { + if (pkgl_dat->read_buf[one_line_len] == '\n') { + one_line_len++; + break; + } + one_line_len++; + } + additional_read = read_amount - one_line_len; + if (additional_read > 0) + sys_lseek(fd, -additional_read, SEEK_CUR); + + if (sscanf(pkgl_dat->read_buf, "%s %d %*d %*s %*s %s", + pkgl_dat->app_name_buf, &appid, + pkgl_dat->gids_buf) == 3) { + ret = insert_str_to_int(pkgl_dat, pkgl_dat->app_name_buf, appid); + if (ret) { + sys_close(fd); + mutex_unlock(&pkgl_dat->hashtable_lock); + return ret; + } + + token = strtok_r(pkgl_dat->gids_buf, ",", &pkgl_dat->strtok_last); + while (token != NULL) { + if (!kstrtoul(token, 10, &ret_gid) && + (ret_gid == pkgl_dat->write_gid)) { + ret = insert_int_to_null(pkgl_dat, (void *)appid, 1); + if (ret) { + sys_close(fd); + mutex_unlock(&pkgl_dat->hashtable_lock); + return ret; + } + break; + } + token = strtok_r(NULL, ",", &pkgl_dat->strtok_last); + } + } + } + + sys_close(fd); + mutex_unlock(&pkgl_dat->hashtable_lock); + return 0; +} + +static int packagelist_reader(void *thread_data) +{ + struct packagelist_data *pkgl_dat = (struct packagelist_data *)thread_data; + struct inotify_event *event; + bool active = false; + int event_pos; + int event_size; + int res = 0; + int nfd; + + allow_signal(SIGINT); + + nfd = sys_inotify_init(); + if (nfd < 0) { + printk(KERN_ERR "sdcardfs: inotify_init failed: %d\n", nfd); + return nfd; + } + + while (!kthread_should_stop()) { + if (signal_pending(current)) { + ssleep(1); + continue; + } + + if (!active) { + res = sys_inotify_add_watch(nfd, kpackageslist_file, IN_DELETE_SELF); + if (res < 0) { + if (res == -ENOENT || res == -EACCES) { + /* Framework may not have created yet, sleep and retry */ + printk(KERN_ERR "sdcardfs: missing packages.list; retrying\n"); + ssleep(2); + printk(KERN_ERR "sdcardfs: missing packages.list_end; retrying\n"); + continue; + } else { + printk(KERN_ERR "sdcardfs: inotify_add_watch failed: %d\n", res); + goto interruptable_sleep; + } + } + /* Watch above will tell us about any future changes, so + * read the current state. */ + res = read_package_list(pkgl_dat); + if (res) { + printk(KERN_ERR "sdcardfs: read_package_list failed: %d\n", res); + goto interruptable_sleep; + } + active = true; + } + + event_pos = 0; + res = sys_read(nfd, pkgl_dat->event_buf, sizeof(pkgl_dat->event_buf)); + if (res < (int) sizeof(*event)) { + if (res == -EINTR) + continue; + printk(KERN_ERR "sdcardfs: failed to read inotify event: %d\n", res); + goto interruptable_sleep; + } + + while (res >= (int) sizeof(*event)) { + event = (struct inotify_event *) (pkgl_dat->event_buf + event_pos); + + printk(KERN_INFO "sdcardfs: inotify event: %08x\n", event->mask); + if ((event->mask & IN_IGNORED) == IN_IGNORED) { + /* Previously watched file was deleted, probably due to move + * that swapped in new data; re-arm the watch and read. */ + active = false; + } + + event_size = sizeof(*event) + event->len; + res -= event_size; + event_pos += event_size; + } + continue; + +interruptable_sleep: + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + flush_signals(current); + sys_close(nfd); + return res; +} + +void * packagelist_create(gid_t write_gid) +{ + struct packagelist_data *pkgl_dat; + struct task_struct *packagelist_thread; + + pkgl_dat = kmalloc(sizeof(*pkgl_dat), GFP_KERNEL | __GFP_ZERO); + if (!pkgl_dat) { + printk(KERN_ERR "sdcardfs: creating kthread failed\n"); + return ERR_PTR(-ENOMEM); + } + + mutex_init(&pkgl_dat->hashtable_lock); + hash_init(pkgl_dat->package_to_appid); + hash_init(pkgl_dat->appid_with_rw); + pkgl_dat->write_gid = write_gid; + + packagelist_thread = kthread_run(packagelist_reader, (void *)pkgl_dat, "pkgld"); + if (IS_ERR(packagelist_thread)) { + printk(KERN_ERR "sdcardfs: creating kthread failed\n"); + kfree(pkgl_dat); + return packagelist_thread; + } + pkgl_dat->thread_id = packagelist_thread; + + printk(KERN_INFO "sdcardfs: created packagelist pkgld/%d\n", + (int)pkgl_dat->thread_id->pid); + + return (void *)pkgl_dat; +} + +void packagelist_destroy(void *pkgl_id) +{ + struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; + pid_t pkgl_pid = pkgl_dat->thread_id->pid; + + force_sig_info(SIGINT, SEND_SIG_PRIV, pkgl_dat->thread_id); + kthread_stop(pkgl_dat->thread_id); + remove_all_hashentrys(pkgl_dat); + printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld/%d\n", (int)pkgl_pid); + kfree(pkgl_dat); +} + +int packagelist_init(void) +{ + hashtable_entry_cachep = + kmem_cache_create("packagelist_hashtable_entry", + sizeof(struct hashtable_entry), 0, 0, NULL); + if (!hashtable_entry_cachep) { + printk(KERN_ERR "sdcardfs: failed creating pkgl_hashtable entry slab cache\n"); + return -ENOMEM; + } + + return 0; +} + +void packagelist_exit(void) +{ + if (hashtable_entry_cachep) + kmem_cache_destroy(hashtable_entry_cachep); +} + + diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h new file mode 100644 index 000000000000..90f8b24e4a52 --- /dev/null +++ b/fs/sdcardfs/sdcardfs.h @@ -0,0 +1,493 @@ +/* + * fs/sdcardfs/sdcardfs.h + * + * The sdcardfs v2.0 + * This file system replaces the sdcard daemon on Android + * On version 2.0, some of the daemon functions have been ported + * to support the multi-user concepts of Android 4.4 + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#ifndef _SDCARDFS_H_ +#define _SDCARDFS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "multiuser.h" + +/* the file system name */ +#define SDCARDFS_NAME "sdcardfs" + +/* sdcardfs root inode number */ +#define SDCARDFS_ROOT_INO 1 + +/* useful for tracking code reachability */ +#define UDBG printk(KERN_DEFAULT "DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) + +#define SDCARDFS_DIRENT_SIZE 256 + +/* temporary static uid settings for development */ +#define AID_ROOT 0 /* uid for accessing /mnt/sdcard & extSdcard */ +#define AID_MEDIA_RW 1023 /* internal media storage write access */ + +#define AID_SDCARD_RW 1015 /* external storage write access */ +#define AID_SDCARD_R 1028 /* external storage read access */ +#define AID_SDCARD_PICS 1033 /* external storage photos access */ +#define AID_SDCARD_AV 1034 /* external storage audio/video access */ +#define AID_SDCARD_ALL 1035 /* access all users external storage */ + +#define AID_PACKAGE_INFO 1027 + +#define fix_derived_permission(x) \ + do { \ + (x)->i_uid = SDCARDFS_I(x)->d_uid; \ + (x)->i_gid = SDCARDFS_I(x)->d_gid; \ + (x)->i_mode = ((x)->i_mode & S_IFMT) | SDCARDFS_I(x)->d_mode;\ + } while (0) + +/* OVERRIDE_CRED() and REVERT_CRED() + * OVERRID_CRED() + * backup original task->cred + * and modifies task->cred->fsuid/fsgid to specified value. + * REVERT_CRED() + * restore original task->cred->fsuid/fsgid. + * These two macro should be used in pair, and OVERRIDE_CRED() should be + * placed at the beginning of a function, right after variable declaration. + */ +#define OVERRIDE_CRED(sdcardfs_sbi, saved_cred) \ + saved_cred = override_fsids(sdcardfs_sbi); \ + if (!saved_cred) { return -ENOMEM; } + +#define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred) \ + saved_cred = override_fsids(sdcardfs_sbi); \ + if (!saved_cred) { return ERR_PTR(-ENOMEM); } + +#define REVERT_CRED(saved_cred) revert_fsids(saved_cred) + +#define DEBUG_CRED() \ + printk("KAKJAGI: %s:%d fsuid %d fsgid %d\n", \ + __FUNCTION__, __LINE__, \ + (int)current->cred->fsuid, \ + (int)current->cred->fsgid); + +/* Android 4.4 support */ + +/* Permission mode for a specific node. Controls how file permissions + * are derived for children nodes. */ +typedef enum { + /* Nothing special; this node should just inherit from its parent. */ + PERM_INHERIT, + /* This node is one level above a normal root; used for legacy layouts + * which use the first level to represent user_id. */ + PERM_LEGACY_PRE_ROOT, + /* This node is "/" */ + PERM_ROOT, + /* This node is "/Android" */ + PERM_ANDROID, + /* This node is "/Android/data" */ + PERM_ANDROID_DATA, + /* This node is "/Android/obb" */ + PERM_ANDROID_OBB, + /* This node is "/Android/user" */ + PERM_ANDROID_USER, +} perm_t; + +/* Permissions structure to derive */ +typedef enum { + DERIVE_NONE, + DERIVE_LEGACY, + DERIVE_UNIFIED, +} derive_t; + +typedef enum { + LOWER_FS_EXT4, + LOWER_FS_FAT, +} lower_fs_t; + +struct sdcardfs_sb_info; +struct sdcardfs_mount_options; + +/* Do not directly use this function. Use OVERRIDE_CRED() instead. */ +const struct cred * override_fsids(struct sdcardfs_sb_info* sbi); +/* Do not directly use this function, use REVERT_CRED() instead. */ +void revert_fsids(const struct cred * old_cred); + +/* operations vectors defined in specific files */ +extern const struct file_operations sdcardfs_main_fops; +extern const struct file_operations sdcardfs_dir_fops; +extern const struct inode_operations sdcardfs_main_iops; +extern const struct inode_operations sdcardfs_dir_iops; +extern const struct inode_operations sdcardfs_symlink_iops; +extern const struct super_operations sdcardfs_sops; +extern const struct dentry_operations sdcardfs_ci_dops; +extern const struct address_space_operations sdcardfs_aops, sdcardfs_dummy_aops; +extern const struct vm_operations_struct sdcardfs_vm_ops; + +extern int sdcardfs_init_inode_cache(void); +extern void sdcardfs_destroy_inode_cache(void); +extern int sdcardfs_init_dentry_cache(void); +extern void sdcardfs_destroy_dentry_cache(void); +extern int new_dentry_private_data(struct dentry *dentry); +extern void free_dentry_private_data(struct dentry *dentry); +extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd); +extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path); + +/* file private data */ +struct sdcardfs_file_info { + struct file *lower_file; + const struct vm_operations_struct *lower_vm_ops; +}; + +/* sdcardfs inode data in memory */ +struct sdcardfs_inode_info { + struct inode *lower_inode; + /* state derived based on current position in hierachy + * caution: d_mode does not include file types + */ + perm_t perm; + userid_t userid; + uid_t d_uid; + gid_t d_gid; + mode_t d_mode; + + struct inode vfs_inode; +}; + +/* sdcardfs dentry data in memory */ +struct sdcardfs_dentry_info { + spinlock_t lock; /* protects lower_path */ + struct path lower_path; + struct path orig_path; +}; + +struct sdcardfs_mount_options { + uid_t fs_low_uid; + gid_t fs_low_gid; + gid_t write_gid; + int split_perms; + derive_t derive; + lower_fs_t lower_fs; + unsigned int reserved_mb; +}; + +/* sdcardfs super-block data in memory */ +struct sdcardfs_sb_info { + struct super_block *lower_sb; + /* derived perm policy : some of options have been added + * to sdcardfs_mount_options (Android 4.4 support) */ + struct sdcardfs_mount_options options; + spinlock_t lock; /* protects obbpath */ + char *obbpath_s; + struct path obbpath; + void *pkgl_id; +}; + +/* + * inode to private data + * + * Since we use containers and the struct inode is _inside_ the + * sdcardfs_inode_info structure, SDCARDFS_I will always (given a non-NULL + * inode pointer), return a valid non-NULL pointer. + */ +static inline struct sdcardfs_inode_info *SDCARDFS_I(const struct inode *inode) +{ + return container_of(inode, struct sdcardfs_inode_info, vfs_inode); +} + +/* dentry to private data */ +#define SDCARDFS_D(dent) ((struct sdcardfs_dentry_info *)(dent)->d_fsdata) + +/* superblock to private data */ +#define SDCARDFS_SB(super) ((struct sdcardfs_sb_info *)(super)->s_fs_info) + +/* file to private Data */ +#define SDCARDFS_F(file) ((struct sdcardfs_file_info *)((file)->private_data)) + +/* file to lower file */ +static inline struct file *sdcardfs_lower_file(const struct file *f) +{ + return SDCARDFS_F(f)->lower_file; +} + +static inline void sdcardfs_set_lower_file(struct file *f, struct file *val) +{ + SDCARDFS_F(f)->lower_file = val; +} + +/* inode to lower inode. */ +static inline struct inode *sdcardfs_lower_inode(const struct inode *i) +{ + return SDCARDFS_I(i)->lower_inode; +} + +static inline void sdcardfs_set_lower_inode(struct inode *i, struct inode *val) +{ + SDCARDFS_I(i)->lower_inode = val; +} + +/* superblock to lower superblock */ +static inline struct super_block *sdcardfs_lower_super( + const struct super_block *sb) +{ + return SDCARDFS_SB(sb)->lower_sb; +} + +static inline void sdcardfs_set_lower_super(struct super_block *sb, + struct super_block *val) +{ + SDCARDFS_SB(sb)->lower_sb = val; +} + +/* path based (dentry/mnt) macros */ +static inline void pathcpy(struct path *dst, const struct path *src) +{ + dst->dentry = src->dentry; + dst->mnt = src->mnt; +} + +/* sdcardfs_get_pname functions calls path_get() + * therefore, the caller must call "proper" path_put functions + */ +#define SDCARDFS_DENT_FUNC(pname) \ +static inline void sdcardfs_get_##pname(const struct dentry *dent, \ + struct path *pname) \ +{ \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + pathcpy(pname, &SDCARDFS_D(dent)->pname); \ + path_get(pname); \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} \ +static inline void sdcardfs_put_##pname(const struct dentry *dent, \ + struct path *pname) \ +{ \ + path_put(pname); \ + return; \ +} \ +static inline void sdcardfs_set_##pname(const struct dentry *dent, \ + struct path *pname) \ +{ \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + pathcpy(&SDCARDFS_D(dent)->pname, pname); \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} \ +static inline void sdcardfs_reset_##pname(const struct dentry *dent) \ +{ \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + SDCARDFS_D(dent)->pname.dentry = NULL; \ + SDCARDFS_D(dent)->pname.mnt = NULL; \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} \ +static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \ +{ \ + struct path pname; \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + if(SDCARDFS_D(dent)->pname.dentry) { \ + pathcpy(&pname, &SDCARDFS_D(dent)->pname); \ + SDCARDFS_D(dent)->pname.dentry = NULL; \ + SDCARDFS_D(dent)->pname.mnt = NULL; \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + path_put(&pname); \ + } else \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} + +SDCARDFS_DENT_FUNC(lower_path) +SDCARDFS_DENT_FUNC(orig_path) + +static inline int has_graft_path(const struct dentry *dent) +{ + int ret = 0; + + spin_lock(&SDCARDFS_D(dent)->lock); + if (SDCARDFS_D(dent)->orig_path.dentry != NULL) + ret = 1; + spin_unlock(&SDCARDFS_D(dent)->lock); + + return ret; +} + +static inline void sdcardfs_get_real_lower(const struct dentry *dent, + struct path *real_lower) +{ + /* in case of a local obb dentry + * the orig_path should be returned + */ + if(has_graft_path(dent)) + sdcardfs_get_orig_path(dent, real_lower); + else + sdcardfs_get_lower_path(dent, real_lower); +} + +static inline void sdcardfs_put_real_lower(const struct dentry *dent, + struct path *real_lower) +{ + if(has_graft_path(dent)) + sdcardfs_put_orig_path(dent, real_lower); + else + sdcardfs_put_lower_path(dent, real_lower); +} + +/* for packagelist.c */ +extern int get_caller_has_rw_locked(void *pkgl_id, derive_t derive); +extern appid_t get_appid(void *pkgl_id, const char *app_name); +extern int check_caller_access_to_name(struct inode *parent_node, const char* name, + derive_t derive, int w_ok, int has_rw); +extern int open_flags_to_access_mode(int open_flags); +extern void * packagelist_create(gid_t write_gid); +extern void packagelist_destroy(void *pkgl_id); +extern int packagelist_init(void); +extern void packagelist_exit(void); + +/* for derived_perm.c */ +extern void setup_derived_state(struct inode *inode, perm_t perm, + userid_t userid, uid_t uid, gid_t gid, mode_t mode); +extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); +extern void update_derived_permission(struct dentry *dentry); +extern int need_graft_path(struct dentry *dentry); +extern int is_base_obbpath(struct dentry *dentry); +extern int is_obbpath_invalid(struct dentry *dentry); +extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path); + +/* locking helpers */ +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget_parent(dentry); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); + return dir; +} + +static inline void unlock_dir(struct dentry *dir) +{ + mutex_unlock(&dir->d_inode->i_mutex); + dput(dir); +} + +static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t mode) +{ + int err; + struct dentry *dent; + struct iattr attrs; + struct nameidata nd; + + err = kern_path_parent(path_s, &nd); + if (err) { + if (err == -EEXIST) + err = 0; + goto out; + } + + dent = lookup_create(&nd, 1); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + if (err == -EEXIST) + err = 0; + goto out_unlock; + } + + err = vfs_mkdir(nd.path.dentry->d_inode, dent, mode); + if (err) { + if (err == -EEXIST) + err = 0; + goto out_dput; + } + + attrs.ia_uid = uid; + attrs.ia_gid = gid; + attrs.ia_valid = ATTR_UID | ATTR_GID; + mutex_lock(&dent->d_inode->i_mutex); + notify_change(dent, &attrs); + mutex_unlock(&dent->d_inode->i_mutex); + +out_dput: + dput(dent); + +out_unlock: + /* parent dentry locked by lookup_create */ + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); + +out: + return err; +} + +/* + * Return 1, if a disk has enough free space, otherwise 0. + * We assume that any files can not be overwritten. + */ +static inline int check_min_free_space(struct dentry *dentry, size_t size, int dir) +{ + int err; + struct path lower_path; + struct kstatfs statfs; + u64 avail; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + if (sbi->options.reserved_mb) { + /* Get fs stat of lower filesystem. */ + sdcardfs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, &statfs); + sdcardfs_put_lower_path(dentry, &lower_path); + + if (unlikely(err)) + return 0; + + /* Invalid statfs informations. */ + if (unlikely(statfs.f_bsize == 0)) + return 0; + + /* if you are checking directory, set size to f_bsize. */ + if (unlikely(dir)) + size = statfs.f_bsize; + + /* available size */ + avail = statfs.f_bavail * statfs.f_bsize; + + /* not enough space */ + if ((u64)size > avail) + return 0; + + /* enough space */ + if ((avail - size) > (sbi->options.reserved_mb * 1024 * 1024)) + return 1; + + return 0; + } else + return 1; +} + +#endif /* not _SDCARDFS_H_ */ diff --git a/fs/sdcardfs/strtok.h b/fs/sdcardfs/strtok.h new file mode 100644 index 000000000000..50ab25aa0bc4 --- /dev/null +++ b/fs/sdcardfs/strtok.h @@ -0,0 +1,75 @@ +/* + * fs/sdcardfs/strtok.h + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +static char * +strtok_r(char *s, const char *delim, char **last) +{ + char *spanp; + int c, sc; + char *tok; + + + /* if (s == NULL && (s = *last) == NULL) + return NULL; */ + if (s == NULL) { + s = *last; + if (s == NULL) + return NULL; + } + + /* + * Skip (span) leading delimiters (s += strspn(s, delim), sort of). + */ +cont: + c = *s++; + for (spanp = (char *)delim; (sc = *spanp++) != 0;) { + if (c == sc) + goto cont; + } + + if (c == 0) { /* no non-delimiter characters */ + *last = NULL; + return NULL; + } + tok = s - 1; + + /* + * Scan token (scan for delimiters: s += strcspn(s, delim), sort of). + * Note that delim must have one NUL; we stop if we see that, too. + */ + for (;;) { + c = *s++; + spanp = (char *)delim; + do { + sc = *spanp++; + if (sc == c) { + if (c == 0) + s = NULL; + else + s[-1] = 0; + *last = s; + return tok; + } + } while (sc != 0); + } + + /* NOTREACHED */ +} + diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c new file mode 100644 index 000000000000..1d206c82dfdf --- /dev/null +++ b/fs/sdcardfs/super.c @@ -0,0 +1,229 @@ +/* + * fs/sdcardfs/super.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +/* + * The inode cache is used with alloc_inode for both our inode info and the + * vfs inode. + */ +static struct kmem_cache *sdcardfs_inode_cachep; + +/* final actions when unmounting a file system */ +static void sdcardfs_put_super(struct super_block *sb) +{ + struct sdcardfs_sb_info *spd; + struct super_block *s; + + spd = SDCARDFS_SB(sb); + if (!spd) + return; + + if(spd->obbpath_s) { + kfree(spd->obbpath_s); + path_put(&spd->obbpath); + } + + /* decrement lower super references */ + s = sdcardfs_lower_super(sb); + sdcardfs_set_lower_super(sb, NULL); + atomic_dec(&s->s_active); + + if(spd->pkgl_id) + packagelist_destroy(spd->pkgl_id); + + kfree(spd); + sb->s_fs_info = NULL; +} + +static int sdcardfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int err; + struct path lower_path; + u32 min_blocks; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + sdcardfs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, buf); + sdcardfs_put_lower_path(dentry, &lower_path); + + if (sbi->options.reserved_mb) { + /* Invalid statfs informations. */ + if (buf->f_bsize == 0) { + printk(KERN_ERR "Returned block size is zero.\n"); + return -EINVAL; + } + + min_blocks = ((sbi->options.reserved_mb * 1024 * 1024)/buf->f_bsize); + buf->f_blocks -= min_blocks; + + if (buf->f_bavail > min_blocks) + buf->f_bavail -= min_blocks; + else + buf->f_bavail = 0; + + /* Make reserved blocks invisiable to media storage */ + buf->f_bfree = buf->f_bavail; + } + + /* set return buf to our f/s to avoid confusing user-level utils */ + buf->f_type = SDCARDFS_SUPER_MAGIC; + + return err; +} + +/* + * @flags: numeric mount options + * @options: mount options string + */ +static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options) +{ + int err = 0; + + /* + * The VFS will take care of "ro" and "rw" flags among others. We + * can safely accept a few flags (RDONLY, MANDLOCK), and honor + * SILENT, but anything else left over is an error. + */ + if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) { + printk(KERN_ERR + "sdcardfs: remount flags 0x%x unsupported\n", *flags); + err = -EINVAL; + } + + return err; +} + +/* + * Called by iput() when the inode reference count reached zero + * and the inode is not hashed anywhere. Used to clear anything + * that needs to be, before the inode is completely destroyed and put + * on the inode free list. + */ +static void sdcardfs_evict_inode(struct inode *inode) +{ + struct inode *lower_inode; + + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); + /* + * Decrement a reference to a lower_inode, which was incremented + * by our read_inode when it was created initially. + */ + lower_inode = sdcardfs_lower_inode(inode); + sdcardfs_set_lower_inode(inode, NULL); + iput(lower_inode); +} + +static struct inode *sdcardfs_alloc_inode(struct super_block *sb) +{ + struct sdcardfs_inode_info *i; + + i = kmem_cache_alloc(sdcardfs_inode_cachep, GFP_KERNEL); + if (!i) + return NULL; + + /* memset everything up to the inode to 0 */ + memset(i, 0, offsetof(struct sdcardfs_inode_info, vfs_inode)); + + i->vfs_inode.i_version = 1; + return &i->vfs_inode; +} + +static void sdcardfs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(sdcardfs_inode_cachep, SDCARDFS_I(inode)); +} + +/* sdcardfs inode cache constructor */ +static void init_once(void *obj) +{ + struct sdcardfs_inode_info *i = obj; + + inode_init_once(&i->vfs_inode); +} + +int sdcardfs_init_inode_cache(void) +{ + int err = 0; + + sdcardfs_inode_cachep = + kmem_cache_create("sdcardfs_inode_cache", + sizeof(struct sdcardfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, init_once); + if (!sdcardfs_inode_cachep) + err = -ENOMEM; + return err; +} + +/* sdcardfs inode cache destructor */ +void sdcardfs_destroy_inode_cache(void) +{ + if (sdcardfs_inode_cachep) + kmem_cache_destroy(sdcardfs_inode_cachep); +} + +/* + * Used only in nfs, to kill any pending RPC tasks, so that subsequent + * code can actually succeed and won't leave tasks that need handling. + */ +static void sdcardfs_umount_begin(struct super_block *sb) +{ + struct super_block *lower_sb; + + lower_sb = sdcardfs_lower_super(sb); + if (lower_sb && lower_sb->s_op && lower_sb->s_op->umount_begin) + lower_sb->s_op->umount_begin(lower_sb); +} + +static int sdcardfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(mnt->mnt_sb); + struct sdcardfs_mount_options *opts = &sbi->options; + + if (opts->fs_low_uid != 0) + seq_printf(m, ",uid=%u", opts->fs_low_uid); + if (opts->fs_low_gid != 0) + seq_printf(m, ",gid=%u", opts->fs_low_gid); + + if (opts->derive == DERIVE_NONE) + seq_printf(m, ",derive=none"); + else if (opts->derive == DERIVE_LEGACY) + seq_printf(m, ",derive=legacy"); + else if (opts->derive == DERIVE_UNIFIED) + seq_printf(m, ",derive=unified"); + + if (opts->reserved_mb != 0) + seq_printf(m, ",reserved=%uMB", opts->reserved_mb); + + return 0; +}; + +const struct super_operations sdcardfs_sops = { + .put_super = sdcardfs_put_super, + .statfs = sdcardfs_statfs, + .remount_fs = sdcardfs_remount_fs, + .evict_inode = sdcardfs_evict_inode, + .umount_begin = sdcardfs_umount_begin, + .show_options = sdcardfs_show_options, + .alloc_inode = sdcardfs_alloc_inode, + .destroy_inode = sdcardfs_destroy_inode, + .drop_inode = generic_delete_inode, +}; diff --git a/include/linux/namei.h b/include/linux/namei.h index d8c6334cd150..ef3b4f74eaf0 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -43,6 +43,9 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_JUMPED 0x1000 #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 +#ifdef CONFIG_SDCARD_FS_CI_SEARCH +#define LOOKUP_CASE_INSENSITIVE 0x8000 +#endif extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index accb036bbc9c..cfb5c406f344 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -52,6 +52,8 @@ #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" +#define SDCARDFS_SUPER_MAGIC 0xb550ca10 + #define SMB_SUPER_MAGIC 0x517B #define CGROUP_SUPER_MAGIC 0x27e0eb From d2d6d73bc37156b6402fdaef787e0be86b6d5901 Mon Sep 17 00:00:00 2001 From: Daniel Campello Date: Mon, 20 Jul 2015 16:27:37 -0700 Subject: [PATCH 038/797] Port of sdcardfs to 4.4 Change-Id: I25b99ecf214e72ebf6a57ec3085972542a8d7951 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/Kconfig | 1 - fs/sdcardfs/dentry.c | 9 +- fs/sdcardfs/file.c | 47 ++++--- fs/sdcardfs/hashtable.h | 190 -------------------------- fs/sdcardfs/inode.c | 280 +++++++++++++++----------------------- fs/sdcardfs/lookup.c | 25 ++-- fs/sdcardfs/main.c | 113 ++++++++------- fs/sdcardfs/mmap.c | 5 +- fs/sdcardfs/packagelist.c | 39 +++--- fs/sdcardfs/sdcardfs.h | 41 +++--- fs/sdcardfs/super.c | 6 +- include/linux/namei.h | 2 + 12 files changed, 252 insertions(+), 506 deletions(-) delete mode 100644 fs/sdcardfs/hashtable.h diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig index 657f4958e8d6..d995f3eaae6d 100644 --- a/fs/sdcardfs/Kconfig +++ b/fs/sdcardfs/Kconfig @@ -1,6 +1,5 @@ config SDCARD_FS tristate "sdcard file system" - depends on EXPERIMENTAL default n help Sdcardfs is based on Wrapfs file system. diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index 4572a5403bb2..dbbcfd091fc7 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -26,7 +26,7 @@ * 0: tell VFS to invalidate dentry * 1: dentry is valid */ -static int sdcardfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags) { int err = 1; struct path parent_lower_path, lower_path; @@ -35,7 +35,7 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) struct dentry *lower_cur_parent_dentry = NULL; struct dentry *lower_dentry = NULL; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; spin_lock(&dentry->d_lock); @@ -119,7 +119,7 @@ static void sdcardfs_d_release(struct dentry *dentry) } static int sdcardfs_hash_ci(const struct dentry *dentry, - const struct inode *inode, struct qstr *qstr) + struct qstr *qstr) { /* * This function is copy of vfat_hashi. @@ -148,8 +148,7 @@ static int sdcardfs_hash_ci(const struct dentry *dentry, * Case insensitive compare of two vfat names. */ static int sdcardfs_cmp_ci(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, + const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { /* This function is copy of vfat_cmpi */ diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index bcacb947c874..f9c5eaafc619 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -50,8 +50,8 @@ static ssize_t sdcardfs_read(struct file *file, char __user *buf, err = vfs_read(lower_file, buf, count, ppos); /* update our inode atime upon a successful lower read */ if (err >= 0) - fsstack_copy_attr_atime(dentry->d_inode, - lower_file->f_path.dentry->d_inode); + fsstack_copy_attr_atime(d_inode(dentry), + file_inode(lower_file)); return err; } @@ -59,7 +59,7 @@ static ssize_t sdcardfs_read(struct file *file, char __user *buf, static ssize_t sdcardfs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - int err = 0; + int err; struct file *lower_file; struct dentry *dentry = file->f_path.dentry; @@ -73,29 +73,29 @@ static ssize_t sdcardfs_write(struct file *file, const char __user *buf, err = vfs_write(lower_file, buf, count, ppos); /* update our inode times+sizes upon a successful lower write */ if (err >= 0) { - fsstack_copy_inode_size(dentry->d_inode, - lower_file->f_path.dentry->d_inode); - fsstack_copy_attr_times(dentry->d_inode, - lower_file->f_path.dentry->d_inode); + fsstack_copy_inode_size(d_inode(dentry), + file_inode(lower_file)); + fsstack_copy_attr_times(d_inode(dentry), + file_inode(lower_file)); } return err; } -static int sdcardfs_readdir(struct file *file, void *dirent, filldir_t filldir) +static int sdcardfs_readdir(struct file *file, struct dir_context *ctx) { - int err = 0; + int err; struct file *lower_file = NULL; struct dentry *dentry = file->f_path.dentry; lower_file = sdcardfs_lower_file(file); lower_file->f_pos = file->f_pos; - err = vfs_readdir(lower_file, filldir, dirent); + err = iterate_dir(lower_file, ctx); file->f_pos = lower_file->f_pos; if (err >= 0) /* copy the atime */ - fsstack_copy_attr_atime(dentry->d_inode, - lower_file->f_path.dentry->d_inode); + fsstack_copy_attr_atime(d_inode(dentry), + file_inode(lower_file)); return err; } @@ -191,7 +191,6 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) */ file_accessed(file); vma->vm_ops = &sdcardfs_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */ if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */ @@ -242,8 +241,8 @@ static int sdcardfs_open(struct inode *inode, struct file *file) /* open lower object and link sdcardfs's file struct to lower's */ sdcardfs_get_lower_path(file->f_path.dentry, &lower_path); - lower_file = dentry_open(lower_path.dentry, lower_path.mnt, - file->f_flags, current_cred()); + lower_file = dentry_open(&lower_path, file->f_flags, current_cred()); + path_put(&lower_path); if (IS_ERR(lower_file)) { err = PTR_ERR(lower_file); lower_file = sdcardfs_lower_file(file); @@ -275,8 +274,10 @@ static int sdcardfs_flush(struct file *file, fl_owner_t id) struct file *lower_file = NULL; lower_file = sdcardfs_lower_file(file); - if (lower_file && lower_file->f_op && lower_file->f_op->flush) + if (lower_file && lower_file->f_op && lower_file->f_op->flush) { + filemap_write_and_wait(file->f_mapping); err = lower_file->f_op->flush(lower_file, id); + } return err; } @@ -296,19 +297,23 @@ static int sdcardfs_file_release(struct inode *inode, struct file *file) return 0; } -static int -sdcardfs_fsync(struct file *file, int datasync) +static int sdcardfs_fsync(struct file *file, loff_t start, loff_t end, + int datasync) { int err; struct file *lower_file; struct path lower_path; struct dentry *dentry = file->f_path.dentry; + err = __generic_file_fsync(file, start, end, datasync); + if (err) + goto out; + lower_file = sdcardfs_lower_file(file); sdcardfs_get_lower_path(dentry, &lower_path); - err = vfs_fsync(lower_file, datasync); + err = vfs_fsync_range(lower_file, start, end, datasync); sdcardfs_put_lower_path(dentry, &lower_path); - +out: return err; } @@ -344,7 +349,7 @@ const struct file_operations sdcardfs_main_fops = { const struct file_operations sdcardfs_dir_fops = { .llseek = generic_file_llseek, .read = generic_read_dir, - .readdir = sdcardfs_readdir, + .iterate = sdcardfs_readdir, .unlocked_ioctl = sdcardfs_unlocked_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = sdcardfs_compat_ioctl, diff --git a/fs/sdcardfs/hashtable.h b/fs/sdcardfs/hashtable.h deleted file mode 100644 index 1e770f3df148..000000000000 --- a/fs/sdcardfs/hashtable.h +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Statically sized hash table implementation - * (C) 2012 Sasha Levin - */ - -#ifndef _LINUX_HASHTABLE_H -#define _LINUX_HASHTABLE_H - -#include -#include -#include -#include -#include - -#define DEFINE_HASHTABLE(name, bits) \ - struct hlist_head name[1 << (bits)] = \ - { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } - -#define DECLARE_HASHTABLE(name, bits) \ - struct hlist_head name[1 << (bits)] - -#define HASH_SIZE(name) (ARRAY_SIZE(name)) -#define HASH_BITS(name) ilog2(HASH_SIZE(name)) - -/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ -#define hash_min(val, bits) \ - (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) - -static inline void __hash_init(struct hlist_head *ht, unsigned int sz) -{ - unsigned int i; - - for (i = 0; i < sz; i++) - INIT_HLIST_HEAD(&ht[i]); -} - -/** - * hash_init - initialize a hash table - * @hashtable: hashtable to be initialized - * - * Calculates the size of the hashtable from the given parameter, otherwise - * same as hash_init_size. - * - * This has to be a macro since HASH_BITS() will not work on pointers since - * it calculates the size during preprocessing. - */ -#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) - -/** - * hash_add - add an object to a hashtable - * @hashtable: hashtable to add to - * @node: the &struct hlist_node of the object to be added - * @key: the key of the object to be added - */ -#define hash_add(hashtable, node, key) \ - hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) - -/** - * hash_add_rcu - add an object to a rcu enabled hashtable - * @hashtable: hashtable to add to - * @node: the &struct hlist_node of the object to be added - * @key: the key of the object to be added - */ -#define hash_add_rcu(hashtable, node, key) \ - hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) - -/** - * hash_hashed - check whether an object is in any hashtable - * @node: the &struct hlist_node of the object to be checked - */ -static inline bool hash_hashed(struct hlist_node *node) -{ - return !hlist_unhashed(node); -} - -static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) -{ - unsigned int i; - - for (i = 0; i < sz; i++) - if (!hlist_empty(&ht[i])) - return false; - - return true; -} - -/** - * hash_empty - check whether a hashtable is empty - * @hashtable: hashtable to check - * - * This has to be a macro since HASH_BITS() will not work on pointers since - * it calculates the size during preprocessing. - */ -#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) - -/** - * hash_del - remove an object from a hashtable - * @node: &struct hlist_node of the object to remove - */ -static inline void hash_del(struct hlist_node *node) -{ - hlist_del_init(node); -} - -/** - * hash_del_rcu - remove an object from a rcu enabled hashtable - * @node: &struct hlist_node of the object to remove - */ -static inline void hash_del_rcu(struct hlist_node *node) -{ - hlist_del_init_rcu(node); -} - -/** - * hash_for_each - iterate over a hashtable - * @name: hashtable to iterate - * @bkt: integer to use as bucket loop cursor - * @obj: the type * to use as a loop cursor for each entry - * @member: the name of the hlist_node within the struct - */ -#define hash_for_each(name, bkt, obj, member, pos) \ - for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ - (bkt)++)\ - hlist_for_each_entry(obj, pos, &name[bkt], member) - -/** - * hash_for_each_rcu - iterate over a rcu enabled hashtable - * @name: hashtable to iterate - * @bkt: integer to use as bucket loop cursor - * @obj: the type * to use as a loop cursor for each entry - * @member: the name of the hlist_node within the struct - */ -#define hash_for_each_rcu(name, bkt, obj, member) \ - for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ - (bkt)++)\ - hlist_for_each_entry_rcu(obj, &name[bkt], member) - -/** - * hash_for_each_safe - iterate over a hashtable safe against removal of - * hash entry - * @name: hashtable to iterate - * @bkt: integer to use as bucket loop cursor - * @tmp: a &struct used for temporary storage - * @obj: the type * to use as a loop cursor for each entry - * @member: the name of the hlist_node within the struct - */ -#define hash_for_each_safe(name, bkt, tmp, obj, member, pos) \ - for ((bkt) = 0, obj = NULL; (bkt) < HASH_SIZE(name);\ - (bkt)++)\ - hlist_for_each_entry_safe(obj, pos, tmp, &name[bkt], member) - -/** - * hash_for_each_possible - iterate over all possible objects hashing to the - * same bucket - * @name: hashtable to iterate - * @obj: the type * to use as a loop cursor for each entry - * @member: the name of the hlist_node within the struct - * @key: the key of the objects to iterate over - */ -#define hash_for_each_possible(name, obj, member, key, pos) \ - hlist_for_each_entry(obj, pos, &name[hash_min(key, HASH_BITS(name))], member) - -/** - * hash_for_each_possible_rcu - iterate over all possible objects hashing to the - * same bucket in an rcu enabled hashtable - * in a rcu enabled hashtable - * @name: hashtable to iterate - * @obj: the type * to use as a loop cursor for each entry - * @member: the name of the hlist_node within the struct - * @key: the key of the objects to iterate over - */ -#define hash_for_each_possible_rcu(name, obj, member, key) \ - hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ - member) - -/** - * hash_for_each_possible_safe - iterate over all possible objects hashing to the - * same bucket safe against removals - * @name: hashtable to iterate - * @obj: the type * to use as a loop cursor for each entry - * @tmp: a &struct used for temporary storage - * @member: the name of the hlist_node within the struct - * @key: the key of the objects to iterate over - */ -#define hash_for_each_possible_safe(name, obj, tmp, member, key) \ - hlist_for_each_entry_safe(obj, tmp,\ - &name[hash_min(key, HASH_BITS(name))], member) - - -#endif diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index e8ed04250ed1..75c622bac2f5 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -30,8 +30,8 @@ const struct cred * override_fsids(struct sdcardfs_sb_info* sbi) if (!cred) return NULL; - cred->fsuid = sbi->options.fs_low_uid; - cred->fsgid = sbi->options.fs_low_gid; + cred->fsuid = make_kuid(&init_user_ns, sbi->options.fs_low_uid); + cred->fsgid = make_kgid(&init_user_ns, sbi->options.fs_low_gid); old_cred = override_creds(cred); @@ -49,12 +49,12 @@ void revert_fsids(const struct cred * old_cred) } static int sdcardfs_create(struct inode *dir, struct dentry *dentry, - int mode, struct nameidata *nd) + umode_t mode, bool want_excl) { - int err = 0; + int err; struct dentry *lower_dentry; struct dentry *lower_parent_dentry = NULL; - struct path lower_path, saved_path; + struct path lower_path; struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; @@ -74,18 +74,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - - pathcpy(&saved_path, &nd->path); - pathcpy(&nd->path, &lower_path); - /* set last 16bytes of mode field to 0664 */ mode = (mode & S_IFMT) | 00664; - err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode, nd); - - pathcpy(&nd->path, &saved_path); + err = vfs_create(d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); if (err) goto out; @@ -93,11 +84,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, if (err) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); - fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); REVERT_CRED(saved_cred); @@ -118,33 +107,27 @@ static int sdcardfs_link(struct dentry *old_dentry, struct inode *dir, OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); - file_size_save = i_size_read(old_dentry->d_inode); + file_size_save = i_size_read(d_inode(old_dentry)); sdcardfs_get_lower_path(old_dentry, &lower_old_path); sdcardfs_get_lower_path(new_dentry, &lower_new_path); lower_old_dentry = lower_old_path.dentry; lower_new_dentry = lower_new_path.dentry; lower_dir_dentry = lock_parent(lower_new_dentry); - err = mnt_want_write(lower_new_path.mnt); - if (err) - goto out_unlock; - - err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, - lower_new_dentry); - if (err || !lower_new_dentry->d_inode) + err = vfs_link(lower_old_dentry, d_inode(lower_dir_dentry), + lower_new_dentry, NULL); + if (err || !d_inode(lower_new_dentry)) goto out; err = sdcardfs_interpose(new_dentry, dir->i_sb, &lower_new_path); if (err) goto out; - fsstack_copy_attr_times(dir, lower_new_dentry->d_inode); - fsstack_copy_inode_size(dir, lower_new_dentry->d_inode); - old_dentry->d_inode->i_nlink = - sdcardfs_lower_inode(old_dentry->d_inode)->i_nlink; - i_size_write(new_dentry->d_inode, file_size_save); + fsstack_copy_attr_times(dir, d_inode(lower_new_dentry)); + fsstack_copy_inode_size(dir, d_inode(lower_new_dentry)); + set_nlink(d_inode(old_dentry), + sdcardfs_lower_inode(d_inode(old_dentry))->i_nlink); + i_size_write(d_inode(new_dentry), file_size_save); out: - mnt_drop_write(lower_new_path.mnt); -out_unlock: unlock_dir(lower_dir_dentry); sdcardfs_put_lower_path(old_dentry, &lower_old_path); sdcardfs_put_lower_path(new_dentry, &lower_new_path); @@ -180,10 +163,7 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - err = vfs_unlink(lower_dir_inode, lower_dentry); + err = vfs_unlink(lower_dir_inode, lower_dentry, NULL); /* * Note: unlinking on top of NFS can cause silly-renamed files. @@ -198,13 +178,11 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) goto out; fsstack_copy_attr_times(dir, lower_dir_inode); fsstack_copy_inode_size(dir, lower_dir_inode); - dentry->d_inode->i_nlink = - sdcardfs_lower_inode(dentry->d_inode)->i_nlink; - dentry->d_inode->i_ctime = dir->i_ctime; + set_nlink(d_inode(dentry), + sdcardfs_lower_inode(d_inode(dentry))->i_nlink); + d_inode(dentry)->i_ctime = dir->i_ctime; d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */ out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_dir_dentry); dput(lower_dentry); sdcardfs_put_lower_path(dentry, &lower_path); @@ -217,7 +195,7 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - int err = 0; + int err; struct dentry *lower_dentry; struct dentry *lower_parent_dentry = NULL; struct path lower_path; @@ -228,21 +206,16 @@ static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry, lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname); + err = vfs_symlink(d_inode(lower_parent_dentry), lower_dentry, symname); if (err) goto out; err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); if (err) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); - fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); REVERT_CRED(); @@ -266,9 +239,9 @@ static int touch(char *abs_path, mode_t mode) { return 0; } -static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - int err = 0; + int err; int make_nomedia_in_obb = 0; struct dentry *lower_dentry; struct dentry *lower_parent_dentry = NULL; @@ -306,13 +279,9 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - /* set last 16bytes of mode field to 0775 */ mode = (mode & S_IFMT) | 00775; - err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry, mode); + err = vfs_mkdir(d_inode(lower_parent_dentry), lower_dentry, mode); if (err) goto out; @@ -341,9 +310,9 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); - fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); /* update number of links on parent directory */ - dir->i_nlink = sdcardfs_lower_inode(dir)->i_nlink; + set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink); if ((sbi->options.derive == DERIVE_UNIFIED) && (!strcasecmp(dentry->d_name.name, "obb")) && (pi->perm == PERM_ANDROID) && (pi->userid == 0)) @@ -388,8 +357,6 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) kfree(nomedia_fullpath); } out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); out_revert: @@ -427,23 +394,18 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) lower_dentry = lower_path.dentry; lower_dir_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); + err = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry); if (err) goto out; d_drop(dentry); /* drop our dentry on success (why not VFS's job?) */ - if (dentry->d_inode) - clear_nlink(dentry->d_inode); - fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); - fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); - dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; + if (d_inode(dentry)) + clear_nlink(d_inode(dentry)); + fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry)); + fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry)); + set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink); out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_dir_dentry); sdcardfs_put_real_lower(dentry, &lower_path); REVERT_CRED(saved_cred); @@ -452,10 +414,10 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) } #if 0 -static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode, +static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - int err = 0; + int err; struct dentry *lower_dentry; struct dentry *lower_parent_dentry = NULL; struct path lower_path; @@ -466,10 +428,7 @@ static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode, lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev); + err = vfs_mknod(d_inode(lower_parent_dentry), lower_dentry, mode, dev); if (err) goto out; @@ -477,11 +436,9 @@ static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode, if (err) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); - fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); REVERT_CRED(); @@ -541,43 +498,33 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } - err = mnt_want_write(lower_old_path.mnt); + err = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry, + d_inode(lower_new_dir_dentry), lower_new_dentry, + NULL, 0); if (err) goto out; - err = mnt_want_write(lower_new_path.mnt); - if (err) - goto out_drop_old_write; - - err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, - lower_new_dir_dentry->d_inode, lower_new_dentry); - if (err) - goto out_err; /* Copy attrs from lower dir, but i_uid/i_gid */ - fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); - fsstack_copy_inode_size(new_dir, lower_new_dir_dentry->d_inode); + fsstack_copy_attr_all(new_dir, d_inode(lower_new_dir_dentry)); + fsstack_copy_inode_size(new_dir, d_inode(lower_new_dir_dentry)); fix_derived_permission(new_dir); if (new_dir != old_dir) { - fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); - fsstack_copy_inode_size(old_dir, lower_old_dir_dentry->d_inode); + fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry)); + fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry)); fix_derived_permission(old_dir); /* update the derived permission of the old_dentry * with its new parent */ new_parent = dget_parent(new_dentry); if(new_parent) { - if(old_dentry->d_inode) { + if(d_inode(old_dentry)) { get_derived_permission(new_parent, old_dentry); - fix_derived_permission(old_dentry->d_inode); + fix_derived_permission(d_inode(old_dentry)); } dput(new_parent); } } -out_err: - mnt_drop_write(lower_new_path.mnt); -out_drop_old_write: - mnt_drop_write(lower_old_path.mnt); out: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); dput(lower_old_dir_dentry); @@ -599,17 +546,17 @@ static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; - if (!lower_dentry->d_inode->i_op || - !lower_dentry->d_inode->i_op->readlink) { + if (!d_inode(lower_dentry)->i_op || + !d_inode(lower_dentry)->i_op->readlink) { err = -EINVAL; goto out; } - err = lower_dentry->d_inode->i_op->readlink(lower_dentry, + err = d_inode(lower_dentry)->i_op->readlink(lower_dentry, buf, bufsiz); if (err < 0) goto out; - fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode); + fsstack_copy_attr_atime(d_inode(dentry), d_inode(lower_dentry)); out: sdcardfs_put_lower_path(dentry, &lower_path); @@ -618,7 +565,7 @@ static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz #endif #if 0 -static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie) { char *buf; int len = PAGE_SIZE, err; @@ -628,7 +575,7 @@ static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd) buf = kmalloc(len, GFP_KERNEL); if (!buf) { buf = ERR_PTR(-ENOMEM); - goto out; + return buf; } /* read the symlink, and then we will follow it */ @@ -642,35 +589,19 @@ static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd) } else { buf[err] = '\0'; } -out: - nd_set_link(nd, buf); - return NULL; + return *cookie = buf; } #endif -#if 0 -/* this @nd *IS* still used */ -static void sdcardfs_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *buf = nd_get_link(nd); - if (!IS_ERR(buf)) /* free the char* */ - kfree(buf); -} -#endif - -static int sdcardfs_permission(struct inode *inode, int mask, unsigned int flags) +static int sdcardfs_permission(struct inode *inode, int mask) { int err; - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - /* * Permission check on sdcardfs inode. * Calling process should have AID_SDCARD_RW permission */ - err = generic_permission(inode, mask, 0, inode->i_op->check_acl); + err = generic_permission(inode, mask); /* XXX * Original sdcardfs code calls inode_permission(lower_inode,.. ) @@ -700,49 +631,9 @@ static int sdcardfs_permission(struct inode *inode, int mask, unsigned int flags } -static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) -{ - struct dentry *lower_dentry; - struct inode *inode; - struct inode *lower_inode; - struct path lower_path; - struct dentry *parent; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); - - parent = dget_parent(dentry); - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, - sbi->options.derive, 0, 0)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); - dput(parent); - return -EACCES; - } - dput(parent); - - inode = dentry->d_inode; - - sdcardfs_get_lower_path(dentry, &lower_path); - lower_dentry = lower_path.dentry; - lower_inode = sdcardfs_lower_inode(inode); - - fsstack_copy_attr_all(inode, lower_inode); - fsstack_copy_inode_size(inode, lower_inode); - /* if the dentry has been moved from other location - * so, on this stage, its derived permission must be - * rechecked from its private field. - */ - fix_derived_permission(inode); - - generic_fillattr(inode, stat); - sdcardfs_put_lower_path(dentry, &lower_path); - return 0; -} - static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) { - int err = 0; + int err; struct dentry *lower_dentry; struct inode *inode; struct inode *lower_inode; @@ -752,7 +643,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) struct dentry *parent; int has_rw; - inode = dentry->d_inode; + inode = d_inode(dentry); /* * Check if user has permission to change inode. We don't check if @@ -766,7 +657,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) /* check the Android group ID */ has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); parent = dget_parent(dentry); - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name, sbi->options.derive, 1, has_rw)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", @@ -819,13 +710,14 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) /* notify the (possibly copied-up) lower inode */ /* - * Note: we use lower_dentry->d_inode, because lower_inode may be + * Note: we use d_inode(lower_dentry), because lower_inode may be * unlinked (no inode->i_sb and i_ino==0. This happens if someone * tries to open(), unlink(), then ftruncate() a file. */ - mutex_lock(&lower_dentry->d_inode->i_mutex); - err = notify_change(lower_dentry, &lower_ia); /* note: lower_ia */ - mutex_unlock(&lower_dentry->d_inode->i_mutex); + mutex_lock(&d_inode(lower_dentry)->i_mutex); + err = notify_change(lower_dentry, &lower_ia, /* note: lower_ia */ + NULL); + mutex_unlock(&d_inode(lower_dentry)->i_mutex); if (current->mm) up_write(¤t->mm->mmap_sem); if (err) @@ -848,6 +740,46 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) return err; } +static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct dentry *lower_dentry; + struct inode *inode; + struct inode *lower_inode; + struct path lower_path; + struct dentry *parent; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + parent = dget_parent(dentry); + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name, + sbi->options.derive, 0, 0)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + dput(parent); + return -EACCES; + } + dput(parent); + + inode = d_inode(dentry); + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_inode = sdcardfs_lower_inode(inode); + + fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_inode_size(inode, lower_inode); + /* if the dentry has been moved from other location + * so, on this stage, its derived permission must be + * rechecked from its private field. + */ + fix_derived_permission(inode); + + generic_fillattr(inode, stat); + sdcardfs_put_lower_path(dentry, &lower_path); + return 0; +} + const struct inode_operations sdcardfs_symlink_iops = { .permission = sdcardfs_permission, .setattr = sdcardfs_setattr, @@ -856,14 +788,16 @@ const struct inode_operations sdcardfs_symlink_iops = { * These methods are *NOT* perfectly tested. .readlink = sdcardfs_readlink, .follow_link = sdcardfs_follow_link, - .put_link = sdcardfs_put_link, + .put_link = kfree_put_link, */ }; const struct inode_operations sdcardfs_dir_iops = { .create = sdcardfs_create, .lookup = sdcardfs_lookup, +#if 0 .permission = sdcardfs_permission, +#endif .unlink = sdcardfs_unlink, .mkdir = sdcardfs_mkdir, .rmdir = sdcardfs_rmdir, diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index c0b12375b1bf..a4b94df99f32 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -79,8 +79,7 @@ static int sdcardfs_inode_set(struct inode *inode, void *lower_inode) return 0; } -static struct inode *sdcardfs_iget(struct super_block *sb, - struct inode *lower_inode) +struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode) { struct sdcardfs_inode_info *info; struct inode *inode; /* the new inode to return */ @@ -206,14 +205,13 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, * Fills in lower_parent_path with on success. */ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, - struct nameidata *nd, struct path *lower_parent_path) + unsigned int flags, struct path *lower_parent_path) { int err = 0; struct vfsmount *lower_dir_mnt; struct dentry *lower_dir_dentry = NULL; struct dentry *lower_dentry; const char *name; - struct nameidata lower_nd; struct path lower_path; struct qstr this; struct sdcardfs_sb_info *sbi; @@ -234,10 +232,10 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, /* Use vfs_path_lookup to check if the dentry exists or not */ if (sbi->options.lower_fs == LOWER_FS_EXT4) { err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, - LOOKUP_CASE_INSENSITIVE, &lower_nd); + LOOKUP_CASE_INSENSITIVE, &lower_path); } else if (sbi->options.lower_fs == LOWER_FS_FAT) { err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, - &lower_nd); + &lower_path); } /* no error: handle positive dentries */ @@ -253,7 +251,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, * and the base obbpath will be copyed to the lower_path variable. * if an error returned, there's no change in the lower_path * returns: -ERRNO if error (0: no error) */ - err = setup_obb_dentry(dentry, &lower_nd.path); + err = setup_obb_dentry(dentry, &lower_path); if(err) { /* if the sbi->obbpath is not available, we can optionally @@ -267,8 +265,8 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, } } - sdcardfs_set_lower_path(dentry, &lower_nd.path); - err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_nd.path); + sdcardfs_set_lower_path(dentry, &lower_path); + err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path); if (err) /* path_put underlying path on error */ sdcardfs_put_reset_lower_path(dentry); goto out; @@ -306,10 +304,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, * the VFS will continue the process of making this negative dentry * into a positive one. */ - if (nd) { - if (nd->flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET)) - err = 0; - } else + if (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET)) err = 0; out: @@ -328,7 +323,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, * @nd : nameidata of parent inode */ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *ret = NULL, *parent; struct path lower_parent_path; @@ -359,7 +354,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - ret = __sdcardfs_lookup(dentry, nd, &lower_parent_path); + ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path); if (IS_ERR(ret)) { goto out; diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 1fdceffec72c..9d04ae8ceb46 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -156,6 +156,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, return 0; } +#if 0 /* * our custom d_alloc_root work-alike * @@ -181,6 +182,7 @@ static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb) } return ret; } +#endif /* * There is no need to lock the sdcardfs_super_info's rwsem as there is no @@ -195,6 +197,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, struct path lower_path; struct sdcardfs_sb_info *sb_info; void *pkgl_id; + struct inode *inode; printk(KERN_INFO "sdcardfs version 2.0\n"); @@ -259,12 +262,18 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb->s_magic = SDCARDFS_SUPER_MAGIC; sb->s_op = &sdcardfs_sops; - /* see comment next to the definition of sdcardfs_d_alloc_root */ - sb->s_root = sdcardfs_d_alloc_root(sb); - if (!sb->s_root) { - err = -ENOMEM; + /* get a new inode and allocate our root dentry */ + inode = sdcardfs_iget(sb, lower_path.dentry->d_inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto out_sput; } + sb->s_root = d_make_root(inode); + if (!sb->s_root) { + err = -ENOMEM; + goto out_iput; + } + d_set_d_op(sb->s_root, &sdcardfs_ci_dops); /* link the upper and lower dentries */ sb->s_root->d_fsdata = NULL; @@ -275,56 +284,60 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, /* set the lower dentries for s_root */ sdcardfs_set_lower_path(sb->s_root, &lower_path); - /* call interpose to create the upper level inode */ - err = sdcardfs_interpose(sb->s_root, sb, &lower_path); - if (!err) { - /* setup permission policy */ - switch(sb_info->options.derive) { - case DERIVE_NONE: - setup_derived_state(sb->s_root->d_inode, + /* + * No need to call interpose because we already have a positive + * dentry, which was instantiated by d_make_root. Just need to + * d_rehash it. + */ + d_rehash(sb->s_root); + + /* setup permission policy */ + switch(sb_info->options.derive) { + case DERIVE_NONE: + setup_derived_state(sb->s_root->d_inode, PERM_ROOT, 0, AID_ROOT, AID_SDCARD_RW, 00775); - sb_info->obbpath_s = NULL; - break; - case DERIVE_LEGACY: - /* Legacy behavior used to support internal multiuser layout which - * places user_id at the top directory level, with the actual roots - * just below that. Shared OBB path is also at top level. */ - setup_derived_state(sb->s_root->d_inode, - PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); - /* initialize the obbpath string and lookup the path - * sb_info->obb_path will be deactivated by path_put - * on sdcardfs_put_super */ - sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); - snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); - err = prepare_dir(sb_info->obbpath_s, - sb_info->options.fs_low_uid, - sb_info->options.fs_low_gid, 00755); - if(err) - printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n", - __func__,__LINE__, sb_info->obbpath_s); - break; - case DERIVE_UNIFIED: - /* Unified multiuser layout which places secondary user_id under - * /Android/user and shared OBB path under /Android/obb. */ - setup_derived_state(sb->s_root->d_inode, - PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); + sb_info->obbpath_s = NULL; + break; + case DERIVE_LEGACY: + /* Legacy behavior used to support internal multiuser layout which + * places user_id at the top directory level, with the actual roots + * just below that. Shared OBB path is also at top level. */ + setup_derived_state(sb->s_root->d_inode, + PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); + /* initialize the obbpath string and lookup the path + * sb_info->obb_path will be deactivated by path_put + * on sdcardfs_put_super */ + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); + err = prepare_dir(sb_info->obbpath_s, + sb_info->options.fs_low_uid, + sb_info->options.fs_low_gid, 00755); + if(err) + printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n", + __func__,__LINE__, sb_info->obbpath_s); + break; + case DERIVE_UNIFIED: + /* Unified multiuser layout which places secondary user_id under + * /Android/user and shared OBB path under /Android/obb. */ + setup_derived_state(sb->s_root->d_inode, + PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); - sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); - snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); - break; - } - fix_derived_permission(sb->s_root->d_inode); - - if (!silent) - printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", - dev_name, lower_sb->s_type->name); - goto out; + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); + break; } - /* else error: fall through */ + fix_derived_permission(sb->s_root->d_inode); - free_dentry_private_data(sb->s_root); + if (!silent) + printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", + dev_name, lower_sb->s_type->name); + goto out; /* all is well */ + + /* no longer needed: free_dentry_private_data(sb->s_root); */ out_freeroot: dput(sb->s_root); +out_iput: + iput(inode); out_sput: /* drop refs we took earlier */ atomic_dec(&lower_sb->s_active); @@ -346,7 +359,7 @@ static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, { int error; - struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); @@ -378,7 +391,7 @@ static struct file_system_type sdcardfs_fs_type = { .name = SDCARDFS_NAME, .mount = sdcardfs_mount, .kill_sb = generic_shutdown_super, - .fs_flags = FS_REVAL_DOT, + .fs_flags = 0, }; static int __init init_sdcardfs_fs(void) diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c index c807d7f18f8b..e21f64675a80 100644 --- a/fs/sdcardfs/mmap.c +++ b/fs/sdcardfs/mmap.c @@ -48,9 +48,8 @@ static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return err; } -static ssize_t sdcardfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) +static ssize_t sdcardfs_direct_IO(struct kiocb *iocb, + struct iov_iter *iter, loff_t pos) { /* * This function returns zero on purpose in order to support direct IO. diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index c786d8f92203..d7ba8d4a423e 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -20,7 +20,7 @@ #include "sdcardfs.h" #include "strtok.h" -#include "hashtable.h" +#include #include #include #include @@ -29,8 +29,8 @@ #define STRING_BUF_SIZE (512) struct hashtable_entry { - struct hlist_node hlist; - void *key; + struct hlist_node hlist; + void *key; int value; }; @@ -67,12 +67,12 @@ static unsigned int str_hash(void *key) { } static int contain_appid_key(struct packagelist_data *pkgl_dat, void *appid) { - struct hashtable_entry *hash_cur; - struct hlist_node *h_n; + struct hashtable_entry *hash_cur; - hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, (unsigned int)appid, h_n) - if (appid == hash_cur->key) - return 1; + hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, (unsigned int)appid) + + if (appid == hash_cur->key) + return 1; return 0; } @@ -87,7 +87,7 @@ int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) { return 1; } - appid = multiuser_get_app_id(current_fsuid()); + appid = multiuser_get_app_id(from_kuid(&init_user_ns, current_fsuid())); mutex_lock(&pkgl_dat->hashtable_lock); ret = contain_appid_key(pkgl_dat, (void *)appid); mutex_unlock(&pkgl_dat->hashtable_lock); @@ -98,13 +98,12 @@ appid_t get_appid(void *pkgl_id, const char *app_name) { struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; struct hashtable_entry *hash_cur; - struct hlist_node *h_n; unsigned int hash = str_hash((void *)app_name); appid_t ret_id; //printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash); mutex_lock(&pkgl_dat->hashtable_lock); - hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) { + hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { //printk(KERN_INFO "sdcardfs: %s: %s\n", __func__, (char *)hash_cur->key); if (!strcasecmp(app_name, hash_cur->key)) { ret_id = (appid_t)hash_cur->value; @@ -140,7 +139,7 @@ int check_caller_access_to_name(struct inode *parent_node, const char* name, /* Root always has access; access for any other UIDs should always * be controlled through packages.list. */ - if (current_fsuid() == 0) { + if (from_kuid(&init_user_ns, current_fsuid()) == 0) { return 1; } @@ -148,7 +147,8 @@ int check_caller_access_to_name(struct inode *parent_node, const char* name, * parent or holds sdcard_rw. */ if (w_ok) { if (parent_node && - (current_fsuid() == SDCARDFS_I(parent_node)->d_uid)) { + (from_kuid(&init_user_ns, current_fsuid()) == + SDCARDFS_I(parent_node)->d_uid)) { return 1; } return has_rw; @@ -174,11 +174,10 @@ int open_flags_to_access_mode(int open_flags) { static int insert_str_to_int(struct packagelist_data *pkgl_dat, void *key, int value) { struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; - struct hlist_node *h_n; unsigned int hash = str_hash(key); //printk(KERN_INFO "sdcardfs: %s: %s: %d, %u\n", __func__, (char *)key, value, hash); - hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) { + hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { if (!strcasecmp(key, hash_cur->key)) { hash_cur->value = value; return 0; @@ -202,11 +201,10 @@ static void remove_str_to_int(struct hashtable_entry *h_entry) { static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int value) { struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; - struct hlist_node *h_n; //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value); hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, - (unsigned int)key, h_n) { + (unsigned int)key) { if (key == hash_cur->key) { hash_cur->value = value; return 0; @@ -230,14 +228,13 @@ static void remove_int_to_null(struct hashtable_entry *h_entry) { static void remove_all_hashentrys(struct packagelist_data *pkgl_dat) { struct hashtable_entry *hash_cur; - struct hlist_node *h_n; struct hlist_node *h_t; int i; - hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist, h_n) + hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist) remove_str_to_int(hash_cur); - hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist, h_n) - remove_int_to_null(hash_cur); + hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist) + remove_int_to_null(hash_cur); hash_init(pkgl_dat->package_to_appid); hash_init(pkgl_dat->appid_with_rw); diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 90f8b24e4a52..51f6c7912584 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -69,8 +69,8 @@ #define fix_derived_permission(x) \ do { \ - (x)->i_uid = SDCARDFS_I(x)->d_uid; \ - (x)->i_gid = SDCARDFS_I(x)->d_gid; \ + (x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid); \ + (x)->i_gid = make_kgid(&init_user_ns, SDCARDFS_I(x)->d_gid); \ (x)->i_mode = ((x)->i_mode & S_IFMT) | SDCARDFS_I(x)->d_mode;\ } while (0) @@ -159,7 +159,9 @@ extern void sdcardfs_destroy_dentry_cache(void); extern int new_dentry_private_data(struct dentry *dentry); extern void free_dentry_private_data(struct dentry *dentry); extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd); + unsigned int flags); +extern struct inode *sdcardfs_iget(struct super_block *sb, + struct inode *lower_inode); extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, struct path *lower_path); @@ -387,13 +389,13 @@ extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path); static inline struct dentry *lock_parent(struct dentry *dentry) { struct dentry *dir = dget_parent(dentry); - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); return dir; } static inline void unlock_dir(struct dentry *dir) { - mutex_unlock(&dir->d_inode->i_mutex); + mutex_unlock(&d_inode(dir)->i_mutex); dput(dir); } @@ -402,16 +404,9 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m int err; struct dentry *dent; struct iattr attrs; - struct nameidata nd; + struct path parent; - err = kern_path_parent(path_s, &nd); - if (err) { - if (err == -EEXIST) - err = 0; - goto out; - } - - dent = lookup_create(&nd, 1); + dent = kern_path_locked(path_s, &parent); if (IS_ERR(dent)) { err = PTR_ERR(dent); if (err == -EEXIST) @@ -419,29 +414,27 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m goto out_unlock; } - err = vfs_mkdir(nd.path.dentry->d_inode, dent, mode); + err = vfs_mkdir(d_inode(parent.dentry), dent, mode); if (err) { if (err == -EEXIST) err = 0; goto out_dput; } - attrs.ia_uid = uid; - attrs.ia_gid = gid; + attrs.ia_uid = make_kuid(&init_user_ns, uid); + attrs.ia_gid = make_kgid(&init_user_ns, gid); attrs.ia_valid = ATTR_UID | ATTR_GID; - mutex_lock(&dent->d_inode->i_mutex); - notify_change(dent, &attrs); - mutex_unlock(&dent->d_inode->i_mutex); + mutex_lock(&d_inode(dent)->i_mutex); + notify_change(dent, &attrs, NULL); + mutex_unlock(&d_inode(dent)->i_mutex); out_dput: dput(dent); out_unlock: /* parent dentry locked by lookup_create */ - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); - -out: + mutex_unlock(&d_inode(parent.dentry)->i_mutex); + path_put(&parent); return err; } diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index 1d206c82dfdf..f153ce1b8cf3 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -122,7 +122,7 @@ static void sdcardfs_evict_inode(struct inode *inode) struct inode *lower_inode; truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); /* * Decrement a reference to a lower_inode, which was incremented * by our read_inode when it was created initially. @@ -193,9 +193,9 @@ static void sdcardfs_umount_begin(struct super_block *sb) lower_sb->s_op->umount_begin(lower_sb); } -static int sdcardfs_show_options(struct seq_file *m, struct vfsmount *mnt) +static int sdcardfs_show_options(struct seq_file *m, struct dentry *root) { - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(mnt->mnt_sb); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb); struct sdcardfs_mount_options *opts = &sbi->options; if (opts->fs_low_uid != 0) diff --git a/include/linux/namei.h b/include/linux/namei.h index ef3b4f74eaf0..f2b8acbdb928 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -78,6 +78,8 @@ extern struct dentry *user_path_create(int, const char __user *, struct path *, extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int); +extern int vfs_path_lookup(struct dentry *, struct vfsmount *, + const char *, unsigned int, struct path *); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); From ef5f9857ff64f07f41bc7fca9b1f402e4638f9a5 Mon Sep 17 00:00:00 2001 From: Daniel Campello Date: Mon, 20 Jul 2015 16:33:46 -0700 Subject: [PATCH 039/797] Changed type-casting in packagelist management Fixed existing type-casting in packagelist management code. All warnings at compile time were taken care of. Change-Id: I1ea97786d1d1325f31b9f09ae966af1f896a2af5 Signed-off-by: Daniel Campello --- fs/sdcardfs/packagelist.c | 40 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index d7ba8d4a423e..f11591da141d 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -31,7 +31,7 @@ struct hashtable_entry { struct hlist_node hlist; void *key; - int value; + unsigned int value; }; struct packagelist_data { @@ -54,7 +54,7 @@ static const char* const kpackageslist_file = "/data/system/packages.list"; /* Supplementary groups to execute with */ static const gid_t kgroups[1] = { AID_PACKAGE_INFO }; -static unsigned int str_hash(void *key) { +static unsigned int str_hash(const char *key) { int i; unsigned int h = strlen(key); char *data = (char *)key; @@ -66,13 +66,13 @@ static unsigned int str_hash(void *key) { return h; } -static int contain_appid_key(struct packagelist_data *pkgl_dat, void *appid) { +static int contain_appid_key(struct packagelist_data *pkgl_dat, unsigned int appid) { struct hashtable_entry *hash_cur; - hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, (unsigned int)appid) - - if (appid == hash_cur->key) + hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, appid) + if ((void *)(uintptr_t)appid == hash_cur->key) return 1; + return 0; } @@ -89,7 +89,7 @@ int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) { appid = multiuser_get_app_id(from_kuid(&init_user_ns, current_fsuid())); mutex_lock(&pkgl_dat->hashtable_lock); - ret = contain_appid_key(pkgl_dat, (void *)appid); + ret = contain_appid_key(pkgl_dat, appid); mutex_unlock(&pkgl_dat->hashtable_lock); return ret; } @@ -98,7 +98,7 @@ appid_t get_appid(void *pkgl_id, const char *app_name) { struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; struct hashtable_entry *hash_cur; - unsigned int hash = str_hash((void *)app_name); + unsigned int hash = str_hash(app_name); appid_t ret_id; //printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash); @@ -171,7 +171,9 @@ int open_flags_to_access_mode(int open_flags) { } } -static int insert_str_to_int(struct packagelist_data *pkgl_dat, void *key, int value) { +static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, + unsigned int value) +{ struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; unsigned int hash = str_hash(key); @@ -198,14 +200,15 @@ static void remove_str_to_int(struct hashtable_entry *h_entry) { kmem_cache_free(hashtable_entry_cachep, h_entry); } -static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int value) { +static int insert_int_to_null(struct packagelist_data *pkgl_dat, unsigned int key, + unsigned int value) +{ struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value); - hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, - (unsigned int)key) { - if (key == hash_cur->key) { + hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, key) { + if ((void *)(uintptr_t)key == hash_cur->key) { hash_cur->value = value; return 0; } @@ -213,10 +216,9 @@ static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); if (!new_entry) return -ENOMEM; - new_entry->key = key; + new_entry->key = (void *)(uintptr_t)key; new_entry->value = value; - hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist, - (unsigned int)new_entry->key); + hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist, key); return 0; } @@ -260,7 +262,7 @@ static int read_package_list(struct packagelist_data *pkgl_dat) { while ((read_amount = sys_read(fd, pkgl_dat->read_buf, sizeof(pkgl_dat->read_buf))) > 0) { - int appid; + unsigned int appid; char *token; int one_line_len = 0; int additional_read; @@ -277,7 +279,7 @@ static int read_package_list(struct packagelist_data *pkgl_dat) { if (additional_read > 0) sys_lseek(fd, -additional_read, SEEK_CUR); - if (sscanf(pkgl_dat->read_buf, "%s %d %*d %*s %*s %s", + if (sscanf(pkgl_dat->read_buf, "%s %u %*d %*s %*s %s", pkgl_dat->app_name_buf, &appid, pkgl_dat->gids_buf) == 3) { ret = insert_str_to_int(pkgl_dat, pkgl_dat->app_name_buf, appid); @@ -291,7 +293,7 @@ static int read_package_list(struct packagelist_data *pkgl_dat) { while (token != NULL) { if (!kstrtoul(token, 10, &ret_gid) && (ret_gid == pkgl_dat->write_gid)) { - ret = insert_int_to_null(pkgl_dat, (void *)appid, 1); + ret = insert_int_to_null(pkgl_dat, appid, 1); if (ret) { sys_close(fd); mutex_unlock(&pkgl_dat->hashtable_lock); From b0f3f87bc1533c64aa1b1a86dd583c92919462ca Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 3 Feb 2016 21:08:21 -0800 Subject: [PATCH 040/797] sdcardfs: Bring up to date with Android M permissions: In M, the workings of sdcardfs were changed significantly. This brings sdcardfs into line with the changes. Change-Id: I10e91a84a884c838feef7aa26c0a2b21f02e052e --- fs/sdcardfs/Kconfig | 1 + fs/sdcardfs/derived_perm.c | 119 ++++---- fs/sdcardfs/file.c | 10 +- fs/sdcardfs/inode.c | 78 +++--- fs/sdcardfs/lookup.c | 40 +-- fs/sdcardfs/main.c | 141 +++++----- fs/sdcardfs/packagelist.c | 538 ++++++++++++++++++------------------- fs/sdcardfs/sdcardfs.h | 134 ++++++--- fs/sdcardfs/strtok.h | 75 ------ fs/sdcardfs/super.c | 11 +- 10 files changed, 518 insertions(+), 629 deletions(-) delete mode 100644 fs/sdcardfs/strtok.h diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig index d995f3eaae6d..ab25f88ebb37 100644 --- a/fs/sdcardfs/Kconfig +++ b/fs/sdcardfs/Kconfig @@ -1,5 +1,6 @@ config SDCARD_FS tristate "sdcard file system" + depends on CONFIGFS_FS default n help Sdcardfs is based on Wrapfs file system. diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 00c33a471dcc..128b3e56851f 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -29,24 +29,23 @@ static void inherit_derived_state(struct inode *parent, struct inode *child) ci->perm = PERM_INHERIT; ci->userid = pi->userid; ci->d_uid = pi->d_uid; - ci->d_gid = pi->d_gid; - ci->d_mode = pi->d_mode; + ci->under_android = pi->under_android; } /* helper function for derived state */ void setup_derived_state(struct inode *inode, perm_t perm, - userid_t userid, uid_t uid, gid_t gid, mode_t mode) + userid_t userid, uid_t uid, bool under_android) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); info->perm = perm; info->userid = userid; info->d_uid = uid; - info->d_gid = gid; - info->d_mode = mode; + info->under_android = under_android; } -void get_derived_permission(struct dentry *parent, struct dentry *dentry) +/* While renaming, there is a point where we want the path from dentry, but the name from newdentry */ +void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry) { struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct sdcardfs_inode_info *info = SDCARDFS_I(dentry->d_inode); @@ -63,86 +62,68 @@ void get_derived_permission(struct dentry *parent, struct dentry *dentry) inherit_derived_state(parent->d_inode, dentry->d_inode); - //printk(KERN_INFO "sdcardfs: derived: %s, %s, %d\n", parent->d_name.name, - // dentry->d_name.name, parent_info->perm); - - if (sbi->options.derive == DERIVE_NONE) { - return; - } - /* Derive custom permissions based on parent and current node */ switch (parent_info->perm) { case PERM_INHERIT: /* Already inherited above */ break; - case PERM_LEGACY_PRE_ROOT: + case PERM_PRE_ROOT: /* Legacy internal layout places users at top level */ info->perm = PERM_ROOT; - info->userid = simple_strtoul(dentry->d_name.name, NULL, 10); + info->userid = simple_strtoul(newdentry->d_name.name, NULL, 10); break; case PERM_ROOT: /* Assume masked off by default. */ - info->d_mode = 00770; - if (!strcasecmp(dentry->d_name.name, "Android")) { + if (!strcasecmp(newdentry->d_name.name, "Android")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID; - info->d_mode = 00771; - } else if (sbi->options.split_perms) { - if (!strcasecmp(dentry->d_name.name, "DCIM") - || !strcasecmp(dentry->d_name.name, "Pictures")) { - info->d_gid = AID_SDCARD_PICS; - } else if (!strcasecmp(dentry->d_name.name, "Alarms") - || !strcasecmp(dentry->d_name.name, "Movies") - || !strcasecmp(dentry->d_name.name, "Music") - || !strcasecmp(dentry->d_name.name, "Notifications") - || !strcasecmp(dentry->d_name.name, "Podcasts") - || !strcasecmp(dentry->d_name.name, "Ringtones")) { - info->d_gid = AID_SDCARD_AV; - } + info->under_android = true; } break; case PERM_ANDROID: - if (!strcasecmp(dentry->d_name.name, "data")) { + if (!strcasecmp(newdentry->d_name.name, "data")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID_DATA; - info->d_mode = 00771; - } else if (!strcasecmp(dentry->d_name.name, "obb")) { + } else if (!strcasecmp(newdentry->d_name.name, "obb")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID_OBB; - info->d_mode = 00771; - // FIXME : this feature will be implemented later. /* Single OBB directory is always shared */ - } else if (!strcasecmp(dentry->d_name.name, "user")) { - /* User directories must only be accessible to system, protected - * by sdcard_all. Zygote will bind mount the appropriate user- - * specific path. */ - info->perm = PERM_ANDROID_USER; - info->d_gid = AID_SDCARD_ALL; - info->d_mode = 00770; + } else if (!strcasecmp(newdentry->d_name.name, "media")) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_MEDIA; } break; - /* same policy will be applied on PERM_ANDROID_DATA - * and PERM_ANDROID_OBB */ case PERM_ANDROID_DATA: case PERM_ANDROID_OBB: - appid = get_appid(sbi->pkgl_id, dentry->d_name.name); + case PERM_ANDROID_MEDIA: + appid = get_appid(sbi->pkgl_id, newdentry->d_name.name); if (appid != 0) { info->d_uid = multiuser_get_uid(parent_info->userid, appid); } - info->d_mode = 00770; - break; - case PERM_ANDROID_USER: - /* Root of a secondary user */ - info->perm = PERM_ROOT; - info->userid = simple_strtoul(dentry->d_name.name, NULL, 10); - info->d_gid = AID_SDCARD_R; - info->d_mode = 00771; break; } } +void get_derived_permission(struct dentry *parent, struct dentry *dentry) +{ + get_derived_permission_new(parent, dentry, dentry); +} + +void get_derive_permissions_recursive(struct dentry *parent) { + struct dentry *dentry; + list_for_each_entry(dentry, &parent->d_subdirs, d_child) { + if (dentry && dentry->d_inode) { + mutex_lock(&dentry->d_inode->i_mutex); + get_derived_permission(parent, dentry); + fix_derived_permission(dentry->d_inode); + get_derive_permissions_recursive(dentry); + mutex_unlock(&dentry->d_inode->i_mutex); + } + } +} + /* main function for updating derived permission */ -inline void update_derived_permission(struct dentry *dentry) +inline void update_derived_permission_lock(struct dentry *dentry) { struct dentry *parent; @@ -154,6 +135,7 @@ inline void update_derived_permission(struct dentry *dentry) * 1. need to check whether the dentry is updated or not * 2. remove the root dentry update */ + mutex_lock(&dentry->d_inode->i_mutex); if(IS_ROOT(dentry)) { //setup_default_pre_root_state(dentry->d_inode); } else { @@ -164,6 +146,7 @@ inline void update_derived_permission(struct dentry *dentry) } } fix_derived_permission(dentry->d_inode); + mutex_unlock(&dentry->d_inode->i_mutex); } int need_graft_path(struct dentry *dentry) @@ -177,7 +160,7 @@ int need_graft_path(struct dentry *dentry) !strcasecmp(dentry->d_name.name, "obb")) { /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ - if(!(sbi->options.derive == DERIVE_UNIFIED + if(!(sbi->options.multiuser == false && parent_info->userid == 0)) { ret = 1; } @@ -207,8 +190,7 @@ int is_obbpath_invalid(struct dentry *dent) path_buf = kmalloc(PATH_MAX, GFP_ATOMIC); if(!path_buf) { ret = 1; - printk(KERN_ERR "sdcardfs: " - "fail to allocate path_buf in %s.\n", __func__); + printk(KERN_ERR "sdcardfs: fail to allocate path_buf in %s.\n", __func__); } else { obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX); if (d_unhashed(di->lower_path.dentry) || @@ -234,21 +216,16 @@ int is_base_obbpath(struct dentry *dentry) struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); spin_lock(&SDCARDFS_D(dentry)->lock); - /* DERIVED_LEGACY */ - if(parent_info->perm == PERM_LEGACY_PRE_ROOT && + if (sbi->options.multiuser) { + if(parent_info->perm == PERM_PRE_ROOT && + !strcasecmp(dentry->d_name.name, "obb")) { + ret = 1; + } + } else if (parent_info->perm == PERM_ANDROID && !strcasecmp(dentry->d_name.name, "obb")) { ret = 1; } - /* DERIVED_UNIFIED :/Android/obb is the base obbpath */ - else if (parent_info->perm == PERM_ANDROID && - !strcasecmp(dentry->d_name.name, "obb")) { - if((sbi->options.derive == DERIVE_UNIFIED - && parent_info->userid == 0)) { - ret = 1; - } - } spin_unlock(&SDCARDFS_D(dentry)->lock); - dput(parent); return ret; } @@ -272,8 +249,7 @@ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) if(!err) { /* the obbpath base has been found */ - printk(KERN_INFO "sdcardfs: " - "the sbi->obbpath is found\n"); + printk(KERN_INFO "sdcardfs: the sbi->obbpath is found\n"); pathcpy(lower_path, &obbpath); } else { /* if the sbi->obbpath is not available, we can optionally @@ -281,8 +257,7 @@ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) * but, the current implementation just returns an error * because the sdcard daemon also regards this case as * a lookup fail. */ - printk(KERN_INFO "sdcardfs: " - "the sbi->obbpath is not available\n"); + printk(KERN_INFO "sdcardfs: the sbi->obbpath is not available\n"); } return err; } diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index f9c5eaafc619..c249fa982d3c 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -209,7 +209,6 @@ static int sdcardfs_open(struct inode *inode, struct file *file) struct dentry *parent = dget_parent(dentry); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; - int has_rw; /* don't open unhashed/deleted files */ if (d_unhashed(dentry)) { @@ -217,11 +216,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) goto out_err; } - has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, - sbi->options.derive, - open_flags_to_access_mode(file->f_flags), has_rw)) { + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -257,8 +252,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) if (err) kfree(SDCARDFS_F(file)); else { - fsstack_copy_attr_all(inode, sdcardfs_lower_inode(inode)); - fix_derived_permission(inode); + sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode)); } out_revert_cred: diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 75c622bac2f5..2528da0d3ae1 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -55,11 +55,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, struct dentry *lower_dentry; struct dentry *lower_parent_dentry = NULL; struct path lower_path; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; - int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -80,7 +78,7 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, if (err) goto out; - err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, SDCARDFS_I(dir)->userid); if (err) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); @@ -143,11 +141,9 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) struct inode *lower_dir_inode = sdcardfs_lower_inode(dir); struct dentry *lower_dir_dentry; struct path lower_path; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; - int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -255,8 +251,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode int fullpath_namelen; int touch_err = 0; - int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -293,19 +288,19 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode if(err) { /* if the sbi->obbpath is not available, the lower_path won't be * changed by setup_obb_dentry() but the lower path is saved to - * its orig_path. this dentry will be revalidated later. + * its orig_path. this dentry will be revalidated later. * but now, the lower_path should be NULL */ sdcardfs_put_reset_lower_path(dentry); /* the newly created lower path which saved to its orig_path or * the lower_path is the base obbpath. - * therefore, an additional path_get is required */ + * therefore, an additional path_get is required */ path_get(&lower_path); } else make_nomedia_in_obb = 1; } - err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pi->userid); if (err) goto out; @@ -314,7 +309,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* update number of links on parent directory */ set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink); - if ((sbi->options.derive == DERIVE_UNIFIED) && (!strcasecmp(dentry->d_name.name, "obb")) + if ((!sbi->options.multiuser) && (!strcasecmp(dentry->d_name.name, "obb")) && (pi->perm == PERM_ANDROID) && (pi->userid == 0)) make_nomedia_in_obb = 1; @@ -371,12 +366,9 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) struct dentry *lower_dir_dentry; int err; struct path lower_path; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; - //char *path_s = NULL; - int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -461,14 +453,10 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *trap = NULL; struct dentry *new_parent = NULL; struct path lower_old_path, lower_new_path; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(old_dentry->d_sb); const struct cred *saved_cred = NULL; - int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name, - sbi->options.derive, 1, has_rw) || - !check_caller_access_to_name(new_dir, new_dentry->d_name.name, - sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name) || + !check_caller_access_to_name(new_dir, new_dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " new_dentry: %s, task:%s\n", __func__, new_dentry->d_name.name, current->comm); @@ -505,26 +493,31 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; /* Copy attrs from lower dir, but i_uid/i_gid */ - fsstack_copy_attr_all(new_dir, d_inode(lower_new_dir_dentry)); + sdcardfs_copy_and_fix_attrs(new_dir, d_inode(lower_new_dir_dentry)); fsstack_copy_inode_size(new_dir, d_inode(lower_new_dir_dentry)); - fix_derived_permission(new_dir); + if (new_dir != old_dir) { - fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry)); + sdcardfs_copy_and_fix_attrs(old_dir, d_inode(lower_old_dir_dentry)); fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry)); - fix_derived_permission(old_dir); + /* update the derived permission of the old_dentry * with its new parent */ new_parent = dget_parent(new_dentry); if(new_parent) { if(d_inode(old_dentry)) { - get_derived_permission(new_parent, old_dentry); - fix_derived_permission(d_inode(old_dentry)); + update_derived_permission_lock(old_dentry); } dput(new_parent); } } - + /* At this point, not all dentry information has been moved, so + * we pass along new_dentry for the name.*/ + mutex_lock(&d_inode(old_dentry)->i_mutex); + get_derived_permission_new(new_dentry->d_parent, old_dentry, new_dentry); + fix_derived_permission(d_inode(old_dentry)); + get_derive_permissions_recursive(old_dentry); + mutex_unlock(&d_inode(old_dentry)->i_mutex); out: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); dput(lower_old_dir_dentry); @@ -639,9 +632,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) struct inode *lower_inode; struct path lower_path; struct iattr lower_ia; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct dentry *parent; - int has_rw; inode = d_inode(dentry); @@ -655,10 +646,8 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) /* no vfs_XXX operations required, cred overriding will be skipped. wj*/ if (!err) { /* check the Android group ID */ - has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); parent = dget_parent(dentry); - if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name, - sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -723,10 +712,8 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) if (err) goto out; - /* get attributes from the lower inode */ - fsstack_copy_attr_all(inode, lower_inode); - /* update derived permission of the upper inode */ - fix_derived_permission(inode); + /* get attributes from the lower inode and update derived permissions */ + sdcardfs_copy_and_fix_attrs(inode, lower_inode); /* * Not running fsstack_copy_inode_size(inode, lower_inode), because @@ -748,11 +735,9 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct inode *lower_inode; struct path lower_path; struct dentry *parent; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); parent = dget_parent(dentry); - if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name, - sbi->options.derive, 0, 0)) { + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -767,13 +752,10 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, lower_dentry = lower_path.dentry; lower_inode = sdcardfs_lower_inode(inode); - fsstack_copy_attr_all(inode, lower_inode); + + sdcardfs_copy_and_fix_attrs(inode, lower_inode); fsstack_copy_inode_size(inode, lower_inode); - /* if the dentry has been moved from other location - * so, on this stage, its derived permission must be - * rechecked from its private field. - */ - fix_derived_permission(inode); + generic_fillattr(inode, stat); sdcardfs_put_lower_path(dentry, &lower_path); diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index a4b94df99f32..f80abcb6b467 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -64,10 +64,17 @@ int new_dentry_private_data(struct dentry *dentry) return 0; } -static int sdcardfs_inode_test(struct inode *inode, void *candidate_lower_inode) +struct inode_data { + struct inode *lower_inode; + userid_t id; +}; + +static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void *candidate_lower_inode*/) { struct inode *current_lower_inode = sdcardfs_lower_inode(inode); - if (current_lower_inode == (struct inode *)candidate_lower_inode) + userid_t current_userid = SDCARDFS_I(inode)->userid; + if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode && + current_userid == ((struct inode_data *)candidate_data)->id) return 1; /* found a match */ else return 0; /* no match */ @@ -79,12 +86,15 @@ static int sdcardfs_inode_set(struct inode *inode, void *lower_inode) return 0; } -struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode) +struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, userid_t id) { struct sdcardfs_inode_info *info; + struct inode_data data; struct inode *inode; /* the new inode to return */ int err; + data.id = id; + data.lower_inode = lower_inode; inode = iget5_locked(sb, /* our superblock */ /* * hashval: we use inode number, but we can @@ -94,7 +104,7 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode) lower_inode->i_ino, /* hashval */ sdcardfs_inode_test, /* inode comparison function */ sdcardfs_inode_set, /* inode init function */ - lower_inode); /* data passed to test+set fxns */ + &data); /* data passed to test+set fxns */ if (!inode) { err = -EACCES; iput(lower_inode); @@ -146,11 +156,9 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode) lower_inode->i_rdev); /* all well, copy inode attributes */ - fsstack_copy_attr_all(inode, lower_inode); + sdcardfs_copy_and_fix_attrs(inode, lower_inode); fsstack_copy_inode_size(inode, lower_inode); - fix_derived_permission(inode); - unlock_new_inode(inode); return inode; } @@ -164,7 +172,7 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode) * @lower_path: the lower path (caller does path_get/put) */ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, - struct path *lower_path) + struct path *lower_path, userid_t id) { int err = 0; struct inode *inode; @@ -186,14 +194,14 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, */ /* inherit lower inode number for sdcardfs's inode */ - inode = sdcardfs_iget(sb, lower_inode); + inode = sdcardfs_iget(sb, lower_inode, id); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out; } d_add(dentry, inode); - update_derived_permission(dentry); + update_derived_permission_lock(dentry); out: return err; } @@ -205,7 +213,7 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, * Fills in lower_parent_path with on success. */ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, - unsigned int flags, struct path *lower_parent_path) + unsigned int flags, struct path *lower_parent_path, userid_t id) { int err = 0; struct vfsmount *lower_dir_mnt; @@ -266,7 +274,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, } sdcardfs_set_lower_path(dentry, &lower_path); - err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path); + err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id); if (err) /* path_put underlying path on error */ sdcardfs_put_reset_lower_path(dentry); goto out; @@ -328,13 +336,11 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, struct dentry *ret = NULL, *parent; struct path lower_parent_path; int err = 0; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; parent = dget_parent(dentry); - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, - sbi->options.derive, 0, 0)) { + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) { ret = ERR_PTR(-EACCES); printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", @@ -354,7 +360,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path); + ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, SDCARDFS_I(dir)->userid); if (IS_ERR(ret)) { goto out; @@ -365,8 +371,10 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, fsstack_copy_attr_times(dentry->d_inode, sdcardfs_lower_inode(dentry->d_inode)); /* get drived permission */ + mutex_lock(&dentry->d_inode->i_mutex); get_derived_permission(parent, dentry); fix_derived_permission(dentry->d_inode); + mutex_unlock(&dentry->d_inode->i_mutex); } /* update parent directory's atime */ fsstack_copy_attr_atime(parent->d_inode, diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 9d04ae8ceb46..80aa355d801e 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -24,25 +24,27 @@ #include enum { - Opt_uid, + Opt_fsuid, + Opt_fsgid, Opt_gid, - Opt_wgid, Opt_debug, - Opt_split, - Opt_derive, Opt_lower_fs, + Opt_mask, + Opt_multiuser, // May need? + Opt_userid, Opt_reserved_mb, Opt_err, }; static const match_table_t sdcardfs_tokens = { - {Opt_uid, "uid=%u"}, + {Opt_fsuid, "fsuid=%u"}, + {Opt_fsgid, "fsgid=%u"}, {Opt_gid, "gid=%u"}, - {Opt_wgid, "wgid=%u"}, {Opt_debug, "debug"}, - {Opt_split, "split"}, - {Opt_derive, "derive=%s"}, {Opt_lower_fs, "lower_fs=%s"}, + {Opt_mask, "mask=%u"}, + {Opt_userid, "userid=%d"}, + {Opt_multiuser, "multiuser"}, {Opt_reserved_mb, "reserved_mb=%u"}, {Opt_err, NULL} }; @@ -58,12 +60,10 @@ static int parse_options(struct super_block *sb, char *options, int silent, /* by default, we use AID_MEDIA_RW as uid, gid */ opts->fs_low_uid = AID_MEDIA_RW; opts->fs_low_gid = AID_MEDIA_RW; - /* by default, we use AID_SDCARD_RW as write_gid */ - opts->write_gid = AID_SDCARD_RW; - /* default permission policy - * (DERIVE_NONE | DERIVE_LEGACY | DERIVE_UNIFIED) */ - opts->derive = DERIVE_NONE; - opts->split_perms = 0; + opts->mask = 0; + opts->multiuser = false; + opts->fs_user_id = 0; + opts->gid = 0; /* by default, we use LOWER_FS_EXT4 as lower fs type */ opts->lower_fs = LOWER_FS_EXT4; /* by default, 0MB is reserved */ @@ -85,37 +85,33 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_debug: *debug = 1; break; - case Opt_uid: + case Opt_fsuid: if (match_int(&args[0], &option)) return 0; opts->fs_low_uid = option; break; - case Opt_gid: + case Opt_fsgid: if (match_int(&args[0], &option)) return 0; opts->fs_low_gid = option; break; - case Opt_wgid: + case Opt_gid: if (match_int(&args[0], &option)) return 0; - opts->write_gid = option; + opts->gid = option; break; - case Opt_split: - opts->split_perms=1; + case Opt_userid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_user_id = option; break; - case Opt_derive: - string_option = match_strdup(&args[0]); - if (!strcmp("none", string_option)) { - opts->derive = DERIVE_NONE; - } else if (!strcmp("legacy", string_option)) { - opts->derive = DERIVE_LEGACY; - } else if (!strcmp("unified", string_option)) { - opts->derive = DERIVE_UNIFIED; - } else { - kfree(string_option); - goto invalid_option; - } - kfree(string_option); + case Opt_mask: + if (match_int(&args[0], &option)) + return 0; + opts->mask = option; + break; + case Opt_multiuser: + opts->multiuser = true; break; case Opt_lower_fs: string_option = match_strdup(&args[0]); @@ -184,6 +180,11 @@ static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb) } #endif +DEFINE_MUTEX(sdcardfs_super_list_lock); +LIST_HEAD(sdcardfs_super_list); +EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock); +EXPORT_SYMBOL_GPL(sdcardfs_super_list); + /* * There is no need to lock the sdcardfs_super_info's rwsem as there is no * way anyone can have a reference to the superblock at this point in time. @@ -196,7 +197,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, struct super_block *lower_sb; struct path lower_path; struct sdcardfs_sb_info *sb_info; - void *pkgl_id; struct inode *inode; printk(KERN_INFO "sdcardfs version 2.0\n"); @@ -215,8 +215,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &lower_path); if (err) { - printk(KERN_ERR "sdcardfs: error accessing " - "lower directory '%s'\n", dev_name); + printk(KERN_ERR "sdcardfs: error accessing lower directory '%s'\n", dev_name); goto out; } @@ -229,7 +228,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, } sb_info = sb->s_fs_info; - /* parse options */ err = parse_options(sb, raw_data, silent, &debug, &sb_info->options); if (err) { @@ -237,14 +235,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, goto out_freesbi; } - if (sb_info->options.derive != DERIVE_NONE) { - pkgl_id = packagelist_create(sb_info->options.write_gid); - if(IS_ERR(pkgl_id)) - goto out_freesbi; - else - sb_info->pkgl_id = pkgl_id; - } - /* set the lower superblock field of upper superblock */ lower_sb = lower_path.dentry->d_sb; atomic_inc(&lower_sb->s_active); @@ -263,7 +253,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb->s_op = &sdcardfs_sops; /* get a new inode and allocate our root dentry */ - inode = sdcardfs_iget(sb, lower_path.dentry->d_inode); + inode = sdcardfs_iget(sb, lower_path.dentry->d_inode, 0); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_sput; @@ -292,41 +282,22 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, d_rehash(sb->s_root); /* setup permission policy */ - switch(sb_info->options.derive) { - case DERIVE_NONE: - setup_derived_state(sb->s_root->d_inode, - PERM_ROOT, 0, AID_ROOT, AID_SDCARD_RW, 00775); - sb_info->obbpath_s = NULL; - break; - case DERIVE_LEGACY: - /* Legacy behavior used to support internal multiuser layout which - * places user_id at the top directory level, with the actual roots - * just below that. Shared OBB path is also at top level. */ - setup_derived_state(sb->s_root->d_inode, - PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); - /* initialize the obbpath string and lookup the path - * sb_info->obb_path will be deactivated by path_put - * on sdcardfs_put_super */ - sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); - snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); - err = prepare_dir(sb_info->obbpath_s, + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + mutex_lock(&sdcardfs_super_list_lock); + if(sb_info->options.multiuser) { + setup_derived_state(sb->s_root->d_inode, PERM_PRE_ROOT, sb_info->options.fs_user_id, AID_ROOT, false); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); + /*err = prepare_dir(sb_info->obbpath_s, sb_info->options.fs_low_uid, - sb_info->options.fs_low_gid, 00755); - if(err) - printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n", - __func__,__LINE__, sb_info->obbpath_s); - break; - case DERIVE_UNIFIED: - /* Unified multiuser layout which places secondary user_id under - * /Android/user and shared OBB path under /Android/obb. */ - setup_derived_state(sb->s_root->d_inode, - PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); - - sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); - snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); - break; + sb_info->options.fs_low_gid, 00755);*/ + } else { + setup_derived_state(sb->s_root->d_inode, PERM_ROOT, sb_info->options.fs_low_uid, AID_ROOT, false); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } fix_derived_permission(sb->s_root->d_inode); + sb_info->sb = sb; + list_add(&sb_info->list, &sdcardfs_super_list); + mutex_unlock(&sdcardfs_super_list_lock); if (!silent) printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", @@ -341,7 +312,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, out_sput: /* drop refs we took earlier */ atomic_dec(&lower_sb->s_active); - packagelist_destroy(sb_info->pkgl_id); out_freesbi: kfree(SDCARDFS_SB(sb)); sb->s_fs_info = NULL; @@ -386,11 +356,22 @@ struct dentry *sdcardfs_mount(struct file_system_type *fs_type, int flags, raw_data, sdcardfs_read_super); } +void sdcardfs_kill_sb(struct super_block *sb) { + struct sdcardfs_sb_info *sbi; + if (sb->s_magic == SDCARDFS_SUPER_MAGIC) { + sbi = SDCARDFS_SB(sb); + mutex_lock(&sdcardfs_super_list_lock); + list_del(&sbi->list); + mutex_unlock(&sdcardfs_super_list_lock); + } + generic_shutdown_super(sb); +} + static struct file_system_type sdcardfs_fs_type = { .owner = THIS_MODULE, .name = SDCARDFS_NAME, .mount = sdcardfs_mount, - .kill_sb = generic_shutdown_super, + .kill_sb = sdcardfs_kill_sb, .fs_flags = 0, }; diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index f11591da141d..ba3478d94107 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -19,13 +19,16 @@ */ #include "sdcardfs.h" -#include "strtok.h" #include -#include -#include -#include #include + +#include +#include +#include + +#include + #define STRING_BUF_SIZE (512) struct hashtable_entry { @@ -34,25 +37,20 @@ struct hashtable_entry { unsigned int value; }; -struct packagelist_data { - DECLARE_HASHTABLE(package_to_appid,8); - DECLARE_HASHTABLE(appid_with_rw,7); - struct mutex hashtable_lock; - struct task_struct *thread_id; - gid_t write_gid; - char *strtok_last; - char read_buf[STRING_BUF_SIZE]; - char event_buf[STRING_BUF_SIZE]; - char app_name_buf[STRING_BUF_SIZE]; - char gids_buf[STRING_BUF_SIZE]; +struct sb_list { + struct super_block *sb; + struct list_head list; }; -static struct kmem_cache *hashtable_entry_cachep; +struct packagelist_data { + DECLARE_HASHTABLE(package_to_appid,8); + struct mutex hashtable_lock; -/* Path to system-provided mapping of package name to appIds */ -static const char* const kpackageslist_file = "/data/system/packages.list"; -/* Supplementary groups to execute with */ -static const gid_t kgroups[1] = { AID_PACKAGE_INFO }; +}; + +static struct packagelist_data *pkgl_data_all; + +static struct kmem_cache *hashtable_entry_cachep; static unsigned int str_hash(const char *key) { int i; @@ -66,62 +64,29 @@ static unsigned int str_hash(const char *key) { return h; } -static int contain_appid_key(struct packagelist_data *pkgl_dat, unsigned int appid) { - struct hashtable_entry *hash_cur; - - hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, appid) - if ((void *)(uintptr_t)appid == hash_cur->key) - return 1; - - return 0; -} - -/* Return if the calling UID holds sdcard_rw. */ -int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) { - struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; - appid_t appid; - int ret; - - /* No additional permissions enforcement */ - if (derive == DERIVE_NONE) { - return 1; - } - - appid = multiuser_get_app_id(from_kuid(&init_user_ns, current_fsuid())); - mutex_lock(&pkgl_dat->hashtable_lock); - ret = contain_appid_key(pkgl_dat, appid); - mutex_unlock(&pkgl_dat->hashtable_lock); - return ret; -} - appid_t get_appid(void *pkgl_id, const char *app_name) { - struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; + struct packagelist_data *pkgl_dat = pkgl_data_all; struct hashtable_entry *hash_cur; unsigned int hash = str_hash(app_name); appid_t ret_id; - //printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash); mutex_lock(&pkgl_dat->hashtable_lock); hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { - //printk(KERN_INFO "sdcardfs: %s: %s\n", __func__, (char *)hash_cur->key); if (!strcasecmp(app_name, hash_cur->key)) { ret_id = (appid_t)hash_cur->value; mutex_unlock(&pkgl_dat->hashtable_lock); - //printk(KERN_INFO "=> app_id: %d\n", (int)ret_id); return ret_id; } } mutex_unlock(&pkgl_dat->hashtable_lock); - //printk(KERN_INFO "=> app_id: %d\n", 0); return 0; } /* Kernel has already enforced everything we returned through * derive_permissions_locked(), so this is used to lock down access * even further, such as enforcing that apps hold sdcard_rw. */ -int check_caller_access_to_name(struct inode *parent_node, const char* name, - derive_t derive, int w_ok, int has_rw) { +int check_caller_access_to_name(struct inode *parent_node, const char* name) { /* Always block security-sensitive files at root */ if (parent_node && SDCARDFS_I(parent_node)->perm == PERM_ROOT) { @@ -132,28 +97,12 @@ int check_caller_access_to_name(struct inode *parent_node, const char* name, } } - /* No additional permissions enforcement */ - if (derive == DERIVE_NONE) { - return 1; - } - /* Root always has access; access for any other UIDs should always * be controlled through packages.list. */ if (from_kuid(&init_user_ns, current_fsuid()) == 0) { return 1; } - /* If asking to write, verify that caller either owns the - * parent or holds sdcard_rw. */ - if (w_ok) { - if (parent_node && - (from_kuid(&init_user_ns, current_fsuid()) == - SDCARDFS_I(parent_node)->d_uid)) { - return 1; - } - return has_rw; - } - /* No extra permissions to enforce */ return 1; } @@ -171,14 +120,13 @@ int open_flags_to_access_mode(int open_flags) { } } -static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, +static int insert_str_to_int_lock(struct packagelist_data *pkgl_dat, char *key, unsigned int value) { struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; unsigned int hash = str_hash(key); - //printk(KERN_INFO "sdcardfs: %s: %s: %d, %u\n", __func__, (char *)key, value, hash); hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { if (!strcasecmp(key, hash_cur->key)) { hash_cur->value = value; @@ -194,247 +142,277 @@ static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, return 0; } -static void remove_str_to_int(struct hashtable_entry *h_entry) { - //printk(KERN_INFO "sdcardfs: %s: %s: %d\n", __func__, (char *)h_entry->key, h_entry->value); - kfree(h_entry->key); - kmem_cache_free(hashtable_entry_cachep, h_entry); +static void fixup_perms(struct super_block *sb) { + if (sb && sb->s_magic == SDCARDFS_SUPER_MAGIC) { + mutex_lock(&sb->s_root->d_inode->i_mutex); + get_derive_permissions_recursive(sb->s_root); + mutex_unlock(&sb->s_root->d_inode->i_mutex); + } } -static int insert_int_to_null(struct packagelist_data *pkgl_dat, unsigned int key, - unsigned int value) -{ - struct hashtable_entry *hash_cur; - struct hashtable_entry *new_entry; +static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, + unsigned int value) { + int ret; + struct sdcardfs_sb_info *sbinfo; + mutex_lock(&sdcardfs_super_list_lock); + mutex_lock(&pkgl_dat->hashtable_lock); + ret = insert_str_to_int_lock(pkgl_dat, key, value); + mutex_unlock(&pkgl_dat->hashtable_lock); - //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value); - hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, key) { - if ((void *)(uintptr_t)key == hash_cur->key) { - hash_cur->value = value; - return 0; + list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { + if (sbinfo) { + fixup_perms(sbinfo->sb); } } - new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); - if (!new_entry) - return -ENOMEM; - new_entry->key = (void *)(uintptr_t)key; - new_entry->value = value; - hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist, key); - return 0; + mutex_unlock(&sdcardfs_super_list_lock); + return ret; } -static void remove_int_to_null(struct hashtable_entry *h_entry) { - //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)h_entry->key, h_entry->value); +static void remove_str_to_int_lock(struct hashtable_entry *h_entry) { + kfree(h_entry->key); + hash_del(&h_entry->hlist); kmem_cache_free(hashtable_entry_cachep, h_entry); } +static void remove_str_to_int(struct packagelist_data *pkgl_dat, const char *key) +{ + struct sdcardfs_sb_info *sbinfo; + struct hashtable_entry *hash_cur; + unsigned int hash = str_hash(key); + mutex_lock(&sdcardfs_super_list_lock); + mutex_lock(&pkgl_dat->hashtable_lock); + hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { + if (!strcasecmp(key, hash_cur->key)) { + remove_str_to_int_lock(hash_cur); + break; + } + } + mutex_unlock(&pkgl_dat->hashtable_lock); + list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { + if (sbinfo) { + fixup_perms(sbinfo->sb); + } + } + mutex_unlock(&sdcardfs_super_list_lock); + return; +} + static void remove_all_hashentrys(struct packagelist_data *pkgl_dat) { struct hashtable_entry *hash_cur; struct hlist_node *h_t; int i; - - hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist) - remove_str_to_int(hash_cur); - hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist) - remove_int_to_null(hash_cur); - - hash_init(pkgl_dat->package_to_appid); - hash_init(pkgl_dat->appid_with_rw); -} - -static int read_package_list(struct packagelist_data *pkgl_dat) { - int ret; - int fd; - int read_amount; - - printk(KERN_INFO "sdcardfs: read_package_list\n"); - mutex_lock(&pkgl_dat->hashtable_lock); - - remove_all_hashentrys(pkgl_dat); - - fd = sys_open(kpackageslist_file, O_RDONLY, 0); - if (fd < 0) { - printk(KERN_ERR "sdcardfs: failed to open package list\n"); - mutex_unlock(&pkgl_dat->hashtable_lock); - return fd; - } - - while ((read_amount = sys_read(fd, pkgl_dat->read_buf, - sizeof(pkgl_dat->read_buf))) > 0) { - unsigned int appid; - char *token; - int one_line_len = 0; - int additional_read; - unsigned long ret_gid; - - while (one_line_len < read_amount) { - if (pkgl_dat->read_buf[one_line_len] == '\n') { - one_line_len++; - break; - } - one_line_len++; - } - additional_read = read_amount - one_line_len; - if (additional_read > 0) - sys_lseek(fd, -additional_read, SEEK_CUR); - - if (sscanf(pkgl_dat->read_buf, "%s %u %*d %*s %*s %s", - pkgl_dat->app_name_buf, &appid, - pkgl_dat->gids_buf) == 3) { - ret = insert_str_to_int(pkgl_dat, pkgl_dat->app_name_buf, appid); - if (ret) { - sys_close(fd); - mutex_unlock(&pkgl_dat->hashtable_lock); - return ret; - } - - token = strtok_r(pkgl_dat->gids_buf, ",", &pkgl_dat->strtok_last); - while (token != NULL) { - if (!kstrtoul(token, 10, &ret_gid) && - (ret_gid == pkgl_dat->write_gid)) { - ret = insert_int_to_null(pkgl_dat, appid, 1); - if (ret) { - sys_close(fd); - mutex_unlock(&pkgl_dat->hashtable_lock); - return ret; - } - break; - } - token = strtok_r(NULL, ",", &pkgl_dat->strtok_last); - } - } - } - - sys_close(fd); + hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist) + remove_str_to_int_lock(hash_cur); mutex_unlock(&pkgl_dat->hashtable_lock); - return 0; + hash_init(pkgl_dat->package_to_appid); } -static int packagelist_reader(void *thread_data) -{ - struct packagelist_data *pkgl_dat = (struct packagelist_data *)thread_data; - struct inotify_event *event; - bool active = false; - int event_pos; - int event_size; - int res = 0; - int nfd; - - allow_signal(SIGINT); - - nfd = sys_inotify_init(); - if (nfd < 0) { - printk(KERN_ERR "sdcardfs: inotify_init failed: %d\n", nfd); - return nfd; - } - - while (!kthread_should_stop()) { - if (signal_pending(current)) { - ssleep(1); - continue; - } - - if (!active) { - res = sys_inotify_add_watch(nfd, kpackageslist_file, IN_DELETE_SELF); - if (res < 0) { - if (res == -ENOENT || res == -EACCES) { - /* Framework may not have created yet, sleep and retry */ - printk(KERN_ERR "sdcardfs: missing packages.list; retrying\n"); - ssleep(2); - printk(KERN_ERR "sdcardfs: missing packages.list_end; retrying\n"); - continue; - } else { - printk(KERN_ERR "sdcardfs: inotify_add_watch failed: %d\n", res); - goto interruptable_sleep; - } - } - /* Watch above will tell us about any future changes, so - * read the current state. */ - res = read_package_list(pkgl_dat); - if (res) { - printk(KERN_ERR "sdcardfs: read_package_list failed: %d\n", res); - goto interruptable_sleep; - } - active = true; - } - - event_pos = 0; - res = sys_read(nfd, pkgl_dat->event_buf, sizeof(pkgl_dat->event_buf)); - if (res < (int) sizeof(*event)) { - if (res == -EINTR) - continue; - printk(KERN_ERR "sdcardfs: failed to read inotify event: %d\n", res); - goto interruptable_sleep; - } - - while (res >= (int) sizeof(*event)) { - event = (struct inotify_event *) (pkgl_dat->event_buf + event_pos); - - printk(KERN_INFO "sdcardfs: inotify event: %08x\n", event->mask); - if ((event->mask & IN_IGNORED) == IN_IGNORED) { - /* Previously watched file was deleted, probably due to move - * that swapped in new data; re-arm the watch and read. */ - active = false; - } - - event_size = sizeof(*event) + event->len; - res -= event_size; - event_pos += event_size; - } - continue; - -interruptable_sleep: - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - } - flush_signals(current); - sys_close(nfd); - return res; -} - -void * packagelist_create(gid_t write_gid) +static struct packagelist_data * packagelist_create(void) { struct packagelist_data *pkgl_dat; - struct task_struct *packagelist_thread; pkgl_dat = kmalloc(sizeof(*pkgl_dat), GFP_KERNEL | __GFP_ZERO); if (!pkgl_dat) { - printk(KERN_ERR "sdcardfs: creating kthread failed\n"); + printk(KERN_ERR "sdcardfs: Failed to create hash\n"); return ERR_PTR(-ENOMEM); } mutex_init(&pkgl_dat->hashtable_lock); hash_init(pkgl_dat->package_to_appid); - hash_init(pkgl_dat->appid_with_rw); - pkgl_dat->write_gid = write_gid; - packagelist_thread = kthread_run(packagelist_reader, (void *)pkgl_dat, "pkgld"); - if (IS_ERR(packagelist_thread)) { - printk(KERN_ERR "sdcardfs: creating kthread failed\n"); - kfree(pkgl_dat); - return packagelist_thread; - } - pkgl_dat->thread_id = packagelist_thread; - - printk(KERN_INFO "sdcardfs: created packagelist pkgld/%d\n", - (int)pkgl_dat->thread_id->pid); - - return (void *)pkgl_dat; + return pkgl_dat; } -void packagelist_destroy(void *pkgl_id) +static void packagelist_destroy(struct packagelist_data *pkgl_dat) { - struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; - pid_t pkgl_pid = pkgl_dat->thread_id->pid; - - force_sig_info(SIGINT, SEND_SIG_PRIV, pkgl_dat->thread_id); - kthread_stop(pkgl_dat->thread_id); remove_all_hashentrys(pkgl_dat); - printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld/%d\n", (int)pkgl_pid); + printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld\n"); kfree(pkgl_dat); } +struct package_appid { + struct config_item item; + int add_pid; +}; + +static inline struct package_appid *to_package_appid(struct config_item *item) +{ + return item ? container_of(item, struct package_appid, item) : NULL; +} + +static ssize_t package_appid_attr_show(struct config_item *item, + char *page) +{ + ssize_t count; + count = sprintf(page, "%d\n", get_appid(pkgl_data_all, item->ci_name)); + return count; +} + +static ssize_t package_appid_attr_store(struct config_item *item, + const char *page, size_t count) +{ + struct package_appid *package_appid = to_package_appid(item); + unsigned long tmp; + char *p = (char *) page; + int ret; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + ret = insert_str_to_int(pkgl_data_all, item->ci_name, (unsigned int)tmp); + package_appid->add_pid = tmp; + if (ret) + return ret; + + return count; +} + +static struct configfs_attribute package_appid_attr_add_pid = { + .ca_owner = THIS_MODULE, + .ca_name = "appid", + .ca_mode = S_IRUGO | S_IWUGO, + .show = package_appid_attr_show, + .store = package_appid_attr_store, +}; + +static struct configfs_attribute *package_appid_attrs[] = { + &package_appid_attr_add_pid, + NULL, +}; + +static void package_appid_release(struct config_item *item) +{ + printk(KERN_INFO "sdcardfs: removing %s\n", item->ci_dentry->d_name.name); + /* item->ci_name is freed already, so we rely on the dentry */ + remove_str_to_int(pkgl_data_all, item->ci_dentry->d_name.name); + kfree(to_package_appid(item)); +} + +static struct configfs_item_operations package_appid_item_ops = { + .release = package_appid_release, +}; + +static struct config_item_type package_appid_type = { + .ct_item_ops = &package_appid_item_ops, + .ct_attrs = package_appid_attrs, + .ct_owner = THIS_MODULE, +}; + + +struct sdcardfs_packages { + struct config_group group; +}; + +static inline struct sdcardfs_packages *to_sdcardfs_packages(struct config_item *item) +{ + return item ? container_of(to_config_group(item), struct sdcardfs_packages, group) : NULL; +} + +static struct config_item *sdcardfs_packages_make_item(struct config_group *group, const char *name) +{ + struct package_appid *package_appid; + + package_appid = kzalloc(sizeof(struct package_appid), GFP_KERNEL); + if (!package_appid) + return ERR_PTR(-ENOMEM); + + config_item_init_type_name(&package_appid->item, name, + &package_appid_type); + + package_appid->add_pid = 0; + + return &package_appid->item; +} + +static ssize_t packages_attr_show(struct config_item *item, + char *page) +{ + struct hashtable_entry *hash_cur; + struct hlist_node *h_t; + int i; + int count = 0; + mutex_lock(&pkgl_data_all->hashtable_lock); + hash_for_each_safe(pkgl_data_all->package_to_appid, i, h_t, hash_cur, hlist) + count += snprintf(page + count, PAGE_SIZE - count, "%s %d\n", (char *)hash_cur->key, hash_cur->value); + mutex_unlock(&pkgl_data_all->hashtable_lock); + + + return count; +} + +static struct configfs_attribute sdcardfs_packages_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "packages_gid.list", + .ca_mode = S_IRUGO, + .show = packages_attr_show, +}; + +static struct configfs_attribute *sdcardfs_packages_attrs[] = { + &sdcardfs_packages_attr_description, + NULL, +}; + +static void sdcardfs_packages_release(struct config_item *item) +{ + + printk(KERN_INFO "sdcardfs: destroyed something?\n"); + kfree(to_sdcardfs_packages(item)); +} + +static struct configfs_item_operations sdcardfs_packages_item_ops = { + .release = sdcardfs_packages_release, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations sdcardfs_packages_group_ops = { + .make_item = sdcardfs_packages_make_item, +}; + +static struct config_item_type sdcardfs_packages_type = { + .ct_item_ops = &sdcardfs_packages_item_ops, + .ct_group_ops = &sdcardfs_packages_group_ops, + .ct_attrs = sdcardfs_packages_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem sdcardfs_packages_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "sdcardfs", + .ci_type = &sdcardfs_packages_type, + }, + }, +}; + +static int __init configfs_sdcardfs_init(void) +{ + int ret; + struct configfs_subsystem *subsys = &sdcardfs_packages_subsys; + + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + ret = configfs_register_subsystem(subsys); + if (ret) { + printk(KERN_ERR "Error %d while registering subsystem %s\n", + ret, + subsys->su_group.cg_item.ci_namebuf); + } + return ret; +} + +static void __exit configfs_sdcardfs_exit(void) +{ + configfs_unregister_subsystem(&sdcardfs_packages_subsys); +} + int packagelist_init(void) { hashtable_entry_cachep = @@ -445,13 +423,15 @@ int packagelist_init(void) return -ENOMEM; } + pkgl_data_all = packagelist_create(); + configfs_sdcardfs_init(); return 0; } void packagelist_exit(void) { + configfs_sdcardfs_exit(); + packagelist_destroy(pkgl_data_all); if (hashtable_entry_cachep) kmem_cache_destroy(hashtable_entry_cachep); } - - diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 51f6c7912584..1b85f4e70324 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -42,6 +42,7 @@ #include #include #include +#include #include "multiuser.h" /* the file system name */ @@ -70,10 +71,11 @@ #define fix_derived_permission(x) \ do { \ (x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid); \ - (x)->i_gid = make_kgid(&init_user_ns, SDCARDFS_I(x)->d_gid); \ - (x)->i_mode = ((x)->i_mode & S_IFMT) | SDCARDFS_I(x)->d_mode;\ + (x)->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(x))); \ + (x)->i_mode = ((x)->i_mode & S_IFMT) | get_mode(SDCARDFS_I(x));\ } while (0) + /* OVERRIDE_CRED() and REVERT_CRED() * OVERRID_CRED() * backup original task->cred @@ -99,35 +101,28 @@ (int)current->cred->fsuid, \ (int)current->cred->fsgid); -/* Android 4.4 support */ +/* Android 5.0 support */ /* Permission mode for a specific node. Controls how file permissions * are derived for children nodes. */ typedef enum { - /* Nothing special; this node should just inherit from its parent. */ - PERM_INHERIT, - /* This node is one level above a normal root; used for legacy layouts - * which use the first level to represent user_id. */ - PERM_LEGACY_PRE_ROOT, - /* This node is "/" */ - PERM_ROOT, - /* This node is "/Android" */ - PERM_ANDROID, - /* This node is "/Android/data" */ - PERM_ANDROID_DATA, - /* This node is "/Android/obb" */ - PERM_ANDROID_OBB, - /* This node is "/Android/user" */ - PERM_ANDROID_USER, + /* Nothing special; this node should just inherit from its parent. */ + PERM_INHERIT, + /* This node is one level above a normal root; used for legacy layouts + * which use the first level to represent user_id. */ + PERM_PRE_ROOT, + /* This node is "/" */ + PERM_ROOT, + /* This node is "/Android" */ + PERM_ANDROID, + /* This node is "/Android/data" */ + PERM_ANDROID_DATA, + /* This node is "/Android/obb" */ + PERM_ANDROID_OBB, + /* This node is "/Android/media" */ + PERM_ANDROID_MEDIA, } perm_t; -/* Permissions structure to derive */ -typedef enum { - DERIVE_NONE, - DERIVE_LEGACY, - DERIVE_UNIFIED, -} derive_t; - typedef enum { LOWER_FS_EXT4, LOWER_FS_FAT, @@ -161,9 +156,9 @@ extern void free_dentry_private_data(struct dentry *dentry); extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern struct inode *sdcardfs_iget(struct super_block *sb, - struct inode *lower_inode); + struct inode *lower_inode, userid_t id); extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, - struct path *lower_path); + struct path *lower_path, userid_t id); /* file private data */ struct sdcardfs_file_info { @@ -174,18 +169,16 @@ struct sdcardfs_file_info { /* sdcardfs inode data in memory */ struct sdcardfs_inode_info { struct inode *lower_inode; - /* state derived based on current position in hierachy - * caution: d_mode does not include file types - */ + /* state derived based on current position in hierachy */ perm_t perm; userid_t userid; uid_t d_uid; - gid_t d_gid; - mode_t d_mode; + bool under_android; struct inode vfs_inode; }; + /* sdcardfs dentry data in memory */ struct sdcardfs_dentry_info { spinlock_t lock; /* protects lower_path */ @@ -196,15 +189,17 @@ struct sdcardfs_dentry_info { struct sdcardfs_mount_options { uid_t fs_low_uid; gid_t fs_low_gid; - gid_t write_gid; - int split_perms; - derive_t derive; + userid_t fs_user_id; + gid_t gid; lower_fs_t lower_fs; + mode_t mask; + bool multiuser; unsigned int reserved_mb; }; /* sdcardfs super-block data in memory */ struct sdcardfs_sb_info { + struct super_block *sb; struct super_block *lower_sb; /* derived perm policy : some of options have been added * to sdcardfs_mount_options (Android 4.4 support) */ @@ -213,6 +208,7 @@ struct sdcardfs_sb_info { char *obbpath_s; struct path obbpath; void *pkgl_id; + struct list_head list; }; /* @@ -331,6 +327,44 @@ static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \ SDCARDFS_DENT_FUNC(lower_path) SDCARDFS_DENT_FUNC(orig_path) +static inline int get_gid(struct sdcardfs_inode_info *info) { + struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); + if (sb_info->options.gid == AID_SDCARD_RW) { + /* As an optimization, certain trusted system components only run + * as owner but operate across all users. Since we're now handing + * out the sdcard_rw GID only to trusted apps, we're okay relaxing + * the user boundary enforcement for the default view. The UIDs + * assigned to app directories are still multiuser aware. */ + return AID_SDCARD_RW; + } else { + return multiuser_get_uid(info->userid, sb_info->options.gid); + } +} +static inline int get_mode(struct sdcardfs_inode_info *info) { + int owner_mode; + int filtered_mode; + struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); + int visible_mode = 0775 & ~sb_info->options.mask; + + if (info->perm == PERM_PRE_ROOT) { + /* Top of multi-user view should always be visible to ensure + * secondary users can traverse inside. */ + visible_mode = 0711; + } else if (info->under_android) { + /* Block "other" access to Android directories, since only apps + * belonging to a specific user should be in there; we still + * leave +x open for the default view. */ + if (sb_info->options.gid == AID_SDCARD_RW) { + visible_mode = visible_mode & ~0006; + } else { + visible_mode = visible_mode & ~0007; + } + } + owner_mode = info->lower_inode->i_mode & 0700; + filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6)); + return filtered_mode; +} + static inline int has_graft_path(const struct dentry *dent) { int ret = 0; @@ -364,22 +398,24 @@ static inline void sdcardfs_put_real_lower(const struct dentry *dent, sdcardfs_put_lower_path(dent, real_lower); } +extern struct mutex sdcardfs_super_list_lock; +extern struct list_head sdcardfs_super_list; + /* for packagelist.c */ -extern int get_caller_has_rw_locked(void *pkgl_id, derive_t derive); extern appid_t get_appid(void *pkgl_id, const char *app_name); -extern int check_caller_access_to_name(struct inode *parent_node, const char* name, - derive_t derive, int w_ok, int has_rw); +extern int check_caller_access_to_name(struct inode *parent_node, const char* name); extern int open_flags_to_access_mode(int open_flags); -extern void * packagelist_create(gid_t write_gid); -extern void packagelist_destroy(void *pkgl_id); extern int packagelist_init(void); extern void packagelist_exit(void); /* for derived_perm.c */ extern void setup_derived_state(struct inode *inode, perm_t perm, - userid_t userid, uid_t uid, gid_t gid, mode_t mode); + userid_t userid, uid_t uid, bool under_android); extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); -extern void update_derived_permission(struct dentry *dentry); +extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry); +extern void get_derive_permissions_recursive(struct dentry *parent); + +extern void update_derived_permission_lock(struct dentry *dentry); extern int need_graft_path(struct dentry *dentry); extern int is_base_obbpath(struct dentry *dentry); extern int is_obbpath_invalid(struct dentry *dentry); @@ -483,4 +519,18 @@ static inline int check_min_free_space(struct dentry *dentry, size_t size, int d return 1; } +/* Copies attrs and maintains sdcardfs managed attrs */ +static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src) +{ + dest->i_mode = (src->i_mode & S_IFMT) | get_mode(SDCARDFS_I(dest)); + dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->d_uid); + dest->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(dest))); + dest->i_rdev = src->i_rdev; + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; + dest->i_blkbits = src->i_blkbits; + dest->i_flags = src->i_flags; + set_nlink(dest, src->i_nlink); +} #endif /* not _SDCARDFS_H_ */ diff --git a/fs/sdcardfs/strtok.h b/fs/sdcardfs/strtok.h deleted file mode 100644 index 50ab25aa0bc4..000000000000 --- a/fs/sdcardfs/strtok.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * fs/sdcardfs/strtok.h - * - * Copyright (c) 2013 Samsung Electronics Co. Ltd - * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, - * Sunghwan Yun, Sungjong Seo - * - * This program has been developed as a stackable file system based on - * the WrapFS which written by - * - * Copyright (c) 1998-2011 Erez Zadok - * Copyright (c) 2009 Shrikar Archak - * Copyright (c) 2003-2011 Stony Brook University - * Copyright (c) 2003-2011 The Research Foundation of SUNY - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - */ - -static char * -strtok_r(char *s, const char *delim, char **last) -{ - char *spanp; - int c, sc; - char *tok; - - - /* if (s == NULL && (s = *last) == NULL) - return NULL; */ - if (s == NULL) { - s = *last; - if (s == NULL) - return NULL; - } - - /* - * Skip (span) leading delimiters (s += strspn(s, delim), sort of). - */ -cont: - c = *s++; - for (spanp = (char *)delim; (sc = *spanp++) != 0;) { - if (c == sc) - goto cont; - } - - if (c == 0) { /* no non-delimiter characters */ - *last = NULL; - return NULL; - } - tok = s - 1; - - /* - * Scan token (scan for delimiters: s += strcspn(s, delim), sort of). - * Note that delim must have one NUL; we stop if we see that, too. - */ - for (;;) { - c = *s++; - spanp = (char *)delim; - do { - sc = *spanp++; - if (sc == c) { - if (c == 0) - s = NULL; - else - s[-1] = 0; - *last = s; - return tok; - } - } while (sc != 0); - } - - /* NOTREACHED */ -} - diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index f153ce1b8cf3..1d6490128c99 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -46,9 +46,6 @@ static void sdcardfs_put_super(struct super_block *sb) sdcardfs_set_lower_super(sb, NULL); atomic_dec(&s->s_active); - if(spd->pkgl_id) - packagelist_destroy(spd->pkgl_id); - kfree(spd); sb->s_fs_info = NULL; } @@ -203,12 +200,8 @@ static int sdcardfs_show_options(struct seq_file *m, struct dentry *root) if (opts->fs_low_gid != 0) seq_printf(m, ",gid=%u", opts->fs_low_gid); - if (opts->derive == DERIVE_NONE) - seq_printf(m, ",derive=none"); - else if (opts->derive == DERIVE_LEGACY) - seq_printf(m, ",derive=legacy"); - else if (opts->derive == DERIVE_UNIFIED) - seq_printf(m, ",derive=unified"); + if (opts->multiuser) + seq_printf(m, ",multiuser"); if (opts->reserved_mb != 0) seq_printf(m, ",reserved=%uMB", opts->reserved_mb); From 735236039d54a5a426e774ee275f9cb2585f30dc Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 11 Feb 2016 16:44:15 -0800 Subject: [PATCH 041/797] vfs: add d_canonical_path for stacked filesystem support Inotify does not currently know when a filesystem is acting as a wrapper around another fs. This means that inotify watchers will miss any modifications to the base file, as well as any made in a separate stacked fs that points to the same file. d_canonical_path solves this problem by allowing the fs to map a dentry to a path in the lower fs. Inotify can use it to find the appropriate place to watch to be informed of all changes to a file. Change-Id: I09563baffad1711a045e45c1bd0bd8713c2cc0b6 Signed-off-by: Daniel Rosenberg --- fs/notify/inotify/inotify_user.c | 17 ++++++++++++++--- include/linux/dcache.h | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index b8d08d0d0a4d..9a0cd02cf3dc 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -702,6 +702,8 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; + struct path alteredpath; + struct path *canonical_path = &path; struct fd f; int ret; unsigned flags = 0; @@ -741,13 +743,22 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (ret) goto fput_and_out; + /* support stacked filesystems */ + if(path.dentry && path.dentry->d_op) { + if (path.dentry->d_op->d_canonical_path) { + path.dentry->d_op->d_canonical_path(path.dentry, &alteredpath); + canonical_path = &alteredpath; + path_put(&path); + } + } + /* inode held in place by reference to path; group by fget on fd */ - inode = path.dentry->d_inode; + inode = canonical_path->dentry->d_inode; group = f.file->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); - path_put(&path); + path_put(canonical_path); fput_and_out: fdput(f); return ret; @@ -814,7 +825,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); - BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); + BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22); inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8a2e009c8a5a..e7041ec48af8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -161,6 +161,7 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); struct inode *(*d_select_inode)(struct dentry *, unsigned); + void (*d_canonical_path)(const struct dentry *, struct path *); } ____cacheline_aligned; /* From 815899d23490b00d21bdc4e1d4c9bfff41670270 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 11 Feb 2016 16:53:36 -0800 Subject: [PATCH 042/797] sdcardfs: Add support for d_canonical_path Change-Id: I5d6f0e71b8ca99aec4b0894412f1dfd1cfe12add Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/dentry.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index dbbcfd091fc7..ba165ef11e27 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -177,5 +177,6 @@ const struct dentry_operations sdcardfs_ci_dops = { .d_release = sdcardfs_d_release, .d_hash = sdcardfs_hash_ci, .d_compare = sdcardfs_cmp_ci, + .d_canonical_path = sdcardfs_get_real_lower, }; From 7f2bcf32a33b8b4e91a0cbf3b3b735fa8e561458 Mon Sep 17 00:00:00 2001 From: Thierry Strudel Date: Wed, 23 Mar 2016 10:02:15 -0700 Subject: [PATCH 043/797] trace: cpufreq: fix typo in min/max cpufreq Change-Id: Ieed402d3a912b7a318826e101efe2c24b07ebfe4 Signed-off-by: Thierry Strudel --- include/trace/events/power.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/power.h b/include/trace/events/power.h index de77035567c4..9af0d898016a 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -135,7 +135,7 @@ TRACE_EVENT(cpu_frequency_limits, TP_fast_assign( __entry->min_freq = min_freq; - __entry->max_freq = min_freq; + __entry->max_freq = max_freq; __entry->cpu_id = cpu_id; ), From 317b853517bb19ee58894f211d962267755a5f0d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 23 Mar 2016 08:32:23 -0700 Subject: [PATCH 044/797] fs: sdcardfs: Declare LOOKUP_CASE_INSENSITIVE unconditionally Attempts to build sdcardfs as module fail with fs/sdcardfs/lookup.c: In function '__sdcardfs_lookup': fs/sdcardfs/lookup.c:243:5: error: 'LOOKUP_CASE_INSENSITIVE' undeclared This occurs because the define is enclosed with #ifdef CONFIG_SDCARD_FS_CI_SEARCH. If SDCARD_FS_CI_SEARCH is configured to be built as module, this does not work. Alternatives would be to use #if IS_ENABLED(CONFIG_SDCARD_FS_CI_SEARCH), or to declare SDCARD_FS_CI_SEARCH as bool, but that does not work because the define is used unconditionally in the source. Note that LOOKUP_CASE_INSENSITIVE is only set but not evaluated in the current source code, so setting the flag has no real effect. Fixes: 84a1b7d3d312 ("Included sdcardfs source code for kernel 3.0") Cc: Daniel Rosenberg Signed-off-by: Guenter Roeck --- include/linux/namei.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/namei.h b/include/linux/namei.h index f2b8acbdb928..47b53673ec61 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -43,9 +43,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_JUMPED 0x1000 #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 -#ifdef CONFIG_SDCARD_FS_CI_SEARCH #define LOOKUP_CASE_INSENSITIVE 0x8000 -#endif extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); From ce15174caf1ce927a9255b23438977411082d20d Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 23 Mar 2016 12:09:25 -0700 Subject: [PATCH 045/797] inotify: Fix erroneous update of bit count Patch "vfs: add d_canonical_path for stacked filesystem support" erroneously updated the ALL_INOTIFY_BITS count. This changes it back Change-Id: Idb04edc736da276159d30f04c40cff9d6b1e070f --- fs/notify/inotify/inotify_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 9a0cd02cf3dc..f72f3b25b3f2 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -825,7 +825,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); - BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22); + BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); From 1a98a1b1e6af9dbd7a0d4253b13f9d2d83124053 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 23 Mar 2016 16:39:30 -0700 Subject: [PATCH 046/797] sdcardfs: remove effectless config option CONFIG_SDCARD_FS_CI_SEARCH only guards a define for LOOKUP_CASE_INSENSITIVE, which is never used in the kernel. Remove both, along with the option matching that supports it. Change-Id: I363a8f31de8ee7a7a934d75300cc9ba8176e2edf Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/Kconfig | 5 ----- fs/sdcardfs/lookup.c | 7 +------ fs/sdcardfs/main.c | 15 --------------- fs/sdcardfs/sdcardfs.h | 6 ------ include/linux/namei.h | 1 - 5 files changed, 1 insertion(+), 33 deletions(-) diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig index ab25f88ebb37..a1c103316ac7 100644 --- a/fs/sdcardfs/Kconfig +++ b/fs/sdcardfs/Kconfig @@ -11,8 +11,3 @@ config SDCARD_FS_FADV_NOACTIVE default y help Sdcardfs supports fadvise noactive mode. - -config SDCARD_FS_CI_SEARCH - tristate "sdcardfs case-insensitive search support" - depends on SDCARD_FS - default y diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index f80abcb6b467..a01b06a514fd 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -238,13 +238,8 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, lower_dir_mnt = lower_parent_path->mnt; /* Use vfs_path_lookup to check if the dentry exists or not */ - if (sbi->options.lower_fs == LOWER_FS_EXT4) { - err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, - LOOKUP_CASE_INSENSITIVE, &lower_path); - } else if (sbi->options.lower_fs == LOWER_FS_FAT) { - err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, + err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, &lower_path); - } /* no error: handle positive dentries */ if (!err) { diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 80aa355d801e..fa11a0458b84 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -41,7 +41,6 @@ static const match_table_t sdcardfs_tokens = { {Opt_fsgid, "fsgid=%u"}, {Opt_gid, "gid=%u"}, {Opt_debug, "debug"}, - {Opt_lower_fs, "lower_fs=%s"}, {Opt_mask, "mask=%u"}, {Opt_userid, "userid=%d"}, {Opt_multiuser, "multiuser"}, @@ -64,8 +63,6 @@ static int parse_options(struct super_block *sb, char *options, int silent, opts->multiuser = false; opts->fs_user_id = 0; opts->gid = 0; - /* by default, we use LOWER_FS_EXT4 as lower fs type */ - opts->lower_fs = LOWER_FS_EXT4; /* by default, 0MB is reserved */ opts->reserved_mb = 0; @@ -113,18 +110,6 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_multiuser: opts->multiuser = true; break; - case Opt_lower_fs: - string_option = match_strdup(&args[0]); - if (!strcmp("ext4", string_option)) { - opts->lower_fs = LOWER_FS_EXT4; - } else if (!strcmp("fat", string_option)) { - opts->lower_fs = LOWER_FS_FAT; - } else { - kfree(string_option); - goto invalid_option; - } - kfree(string_option); - break; case Opt_reserved_mb: if (match_int(&args[0], &option)) return 0; diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 1b85f4e70324..f111f898b630 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -123,11 +123,6 @@ typedef enum { PERM_ANDROID_MEDIA, } perm_t; -typedef enum { - LOWER_FS_EXT4, - LOWER_FS_FAT, -} lower_fs_t; - struct sdcardfs_sb_info; struct sdcardfs_mount_options; @@ -191,7 +186,6 @@ struct sdcardfs_mount_options { gid_t fs_low_gid; userid_t fs_user_id; gid_t gid; - lower_fs_t lower_fs; mode_t mask; bool multiuser; unsigned int reserved_mb; diff --git a/include/linux/namei.h b/include/linux/namei.h index 47b53673ec61..d53c25453aca 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -43,7 +43,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_JUMPED 0x1000 #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 -#define LOOKUP_CASE_INSENSITIVE 0x8000 extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); From d023b7c5eef07aa8273710ff7625e7a512aa90a8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 24 Mar 2016 10:32:35 -0700 Subject: [PATCH 047/797] fs: Export d_absolute_path The 0-day build bot reports the following build error, seen if SDCARD_FS is built as module. ERROR: "d_absolute_path" undefined! Fixes: 84a1b7d3d312 ("Included sdcardfs source code for kernel 3.0") Reported-by: Fengguang Wu Signed-off-by: Guenter Roeck --- fs/dcache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/dcache.c b/fs/dcache.c index 877bcbbd03ff..24190e8b7860 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3017,6 +3017,7 @@ char *d_absolute_path(const struct path *path, return ERR_PTR(error); return res; } +EXPORT_SYMBOL(d_absolute_path); /* * same as __d_path but appends "(deleted)" for unlinked files. From c4fcf3f1c10ce5cfd7ce57197798035ba0436799 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 28 Mar 2016 15:00:20 -0700 Subject: [PATCH 048/797] sdcardfs: Remove unused code Change-Id: Ie97cba27ce44818ac56cfe40954f164ad44eccf6 --- fs/sdcardfs/main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index fa11a0458b84..a6522286d731 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -54,7 +54,6 @@ static int parse_options(struct super_block *sb, char *options, int silent, char *p; substring_t args[MAX_OPT_ARGS]; int option; - char *string_option; /* by default, we use AID_MEDIA_RW as uid, gid */ opts->fs_low_uid = AID_MEDIA_RW; @@ -117,7 +116,6 @@ static int parse_options(struct super_block *sb, char *options, int silent, break; /* unknown option */ default: -invalid_option: if (!silent) { printk( KERN_ERR "Unrecognized mount option \"%s\" " "or missing value", p); From 30e5f6e073ced638dc973819d09362002275f476 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 28 Mar 2016 16:00:34 -0700 Subject: [PATCH 049/797] sdcardfs: remove unneeded __init and __exit Change-Id: I2a2d45d52f891332174c3000e8681c5167c1564f --- fs/sdcardfs/packagelist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index ba3478d94107..10f0d6be718b 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -392,7 +392,7 @@ static struct configfs_subsystem sdcardfs_packages_subsys = { }, }; -static int __init configfs_sdcardfs_init(void) +static int configfs_sdcardfs_init(void) { int ret; struct configfs_subsystem *subsys = &sdcardfs_packages_subsys; @@ -408,7 +408,7 @@ static int __init configfs_sdcardfs_init(void) return ret; } -static void __exit configfs_sdcardfs_exit(void) +static void configfs_sdcardfs_exit(void) { configfs_unregister_subsystem(&sdcardfs_packages_subsys); } From a7b7a225c1ba4ea849a74e90b937b64c10c35e1f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 24 Mar 2016 10:39:14 -0700 Subject: [PATCH 050/797] mm: Export do_munmap The 0-day build bot reports the following build error, seen if SDCARD_FS is built as module. ERROR: "do_munmap" undefined! Fixes: 84a1b7d3d312 ("Included sdcardfs source code for kernel 3.0") Reported-by: Fengguang Wu Signed-off-by: Guenter Roeck --- mm/mmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/mmap.c b/mm/mmap.c index 6c561acdca92..a089cca8d79a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2634,6 +2634,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) return 0; } +EXPORT_SYMBOL(do_munmap); int vm_munmap(unsigned long start, size_t len) { From 47733418c908d9139692f286304c9db36fbe1a8d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 25 Nov 2015 16:03:31 -0500 Subject: [PATCH 051/797] UPSTREAM: dm: don't save and restore bi_private Device mapper used the field bi_private to point to dm_target_io. However, since kernel 3.15, the bi_private field is unused, and so the targets do not need to save and restore this field. This patch removes code that saves and restores bi_private from dm-cache, dm-snapshot and dm-verity. Change-Id: Ic72905ccb6d58ff94eafaa47ba54b2688d92d3d1 Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit fe3265b180d6282648f03bc6ac3958c733df01c2) --- drivers/md/dm-cache-target.c | 3 --- drivers/md/dm-snap.c | 6 +----- drivers/md/dm-verity.c | 5 +---- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 2fd4c8296144..5780accffa30 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -118,14 +118,12 @@ static void iot_io_end(struct io_tracker *iot, sector_t len) */ struct dm_hook_info { bio_end_io_t *bi_end_io; - void *bi_private; }; static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio, bio_end_io_t *bi_end_io, void *bi_private) { h->bi_end_io = bio->bi_end_io; - h->bi_private = bio->bi_private; bio->bi_end_io = bi_end_io; bio->bi_private = bi_private; @@ -134,7 +132,6 @@ static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio, static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) { bio->bi_end_io = h->bi_end_io; - bio->bi_private = h->bi_private; } /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 61f184ad081c..3766386080a4 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -207,7 +207,6 @@ struct dm_snap_pending_exception { */ struct bio *full_bio; bio_end_io_t *full_bio_end_io; - void *full_bio_private; }; /* @@ -1486,10 +1485,8 @@ static void pending_complete(void *context, int success) snapshot_bios = bio_list_get(&pe->snapshot_bios); origin_bios = bio_list_get(&pe->origin_bios); full_bio = pe->full_bio; - if (full_bio) { + if (full_bio) full_bio->bi_end_io = pe->full_bio_end_io; - full_bio->bi_private = pe->full_bio_private; - } increment_pending_exceptions_done_count(); up_write(&s->lock); @@ -1595,7 +1592,6 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; - pe->full_bio_private = bio->bi_private; callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index ccf41886ebcf..9e8891507c1c 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -83,9 +83,8 @@ struct dm_verity { struct dm_verity_io { struct dm_verity *v; - /* original values of bio->bi_end_io and bio->bi_private */ + /* original value of bio->bi_end_io */ bio_end_io_t *orig_bi_end_io; - void *orig_bi_private; sector_t block; unsigned n_blocks; @@ -453,7 +452,6 @@ static void verity_finish_io(struct dm_verity_io *io, int error) struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); bio->bi_end_io = io->orig_bi_end_io; - bio->bi_private = io->orig_bi_private; bio->bi_error = error; bio_endio(bio); @@ -566,7 +564,6 @@ static int verity_map(struct dm_target *ti, struct bio *bio) io = dm_per_bio_data(bio, ti->per_bio_data_size); io->v = v; io->orig_bi_end_io = bio->bi_end_io; - io->orig_bi_private = bio->bi_private; io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits; From 00e28c2ab88ad154a38a4b8233f94b4e1a4e35f0 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 5 Nov 2015 02:02:31 +0000 Subject: [PATCH 052/797] UPSTREAM: dm verity: clean up duplicate hashing code Handle dm-verity salting in one place to simplify the code. Change-Id: If923a01dc63ae5123af13ba1b0863b73e33ddf46 Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Mike Snitzer (cherry picked from commit 6dbeda3469ced777bc3138ed5918f7ae79670b7b) --- drivers/md/dm-verity.c | 266 +++++++++++++++++++++++------------------ 1 file changed, 149 insertions(+), 117 deletions(-) diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 9e8891507c1c..24517055bd8e 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -172,6 +172,84 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block, return block >> (level * v->hash_per_block_bits); } +/* + * Wrapper for crypto_shash_init, which handles verity salting. + */ +static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc) +{ + int r; + + desc->tfm = v->tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + r = crypto_shash_init(desc); + + if (unlikely(r < 0)) { + DMERR("crypto_shash_init failed: %d", r); + return r; + } + + if (likely(v->version >= 1)) { + r = crypto_shash_update(desc, v->salt, v->salt_size); + + if (unlikely(r < 0)) { + DMERR("crypto_shash_update failed: %d", r); + return r; + } + } + + return 0; +} + +static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc, + const u8 *data, size_t len) +{ + int r = crypto_shash_update(desc, data, len); + + if (unlikely(r < 0)) + DMERR("crypto_shash_update failed: %d", r); + + return r; +} + +static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc, + u8 *digest) +{ + int r; + + if (unlikely(!v->version)) { + r = crypto_shash_update(desc, v->salt, v->salt_size); + + if (r < 0) { + DMERR("crypto_shash_update failed: %d", r); + return r; + } + } + + r = crypto_shash_final(desc, digest); + + if (unlikely(r < 0)) + DMERR("crypto_shash_final failed: %d", r); + + return r; +} + +static int verity_hash(struct dm_verity *v, struct shash_desc *desc, + const u8 *data, size_t len, u8 *digest) +{ + int r; + + r = verity_hash_init(v, desc); + if (unlikely(r < 0)) + return r; + + r = verity_hash_update(v, desc, data, len); + if (unlikely(r < 0)) + return r; + + return verity_hash_final(v, desc, digest); +} + static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level, sector_t *hash_block, unsigned *offset) { @@ -252,10 +330,10 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, * If "skip_unverified" is false, unverified buffer is hashed and verified * against current value of io_want_digest(v, io). */ -static int verity_verify_level(struct dm_verity_io *io, sector_t block, - int level, bool skip_unverified) +static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, + sector_t block, int level, bool skip_unverified, + u8 *want_digest) { - struct dm_verity *v = io->v; struct dm_buffer *buf; struct buffer_aux *aux; u8 *data; @@ -272,74 +350,71 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block, aux = dm_bufio_get_aux_data(buf); if (!aux->hash_verified) { - struct shash_desc *desc; - u8 *result; - if (skip_unverified) { r = 1; goto release_ret_r; } - desc = io_hash_desc(v, io); - desc->tfm = v->tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - r = crypto_shash_init(desc); - if (r < 0) { - DMERR("crypto_shash_init failed: %d", r); + r = verity_hash(v, io_hash_desc(v, io), + data, 1 << v->hash_dev_block_bits, + io_real_digest(v, io)); + if (unlikely(r < 0)) goto release_ret_r; - } - if (likely(v->version >= 1)) { - r = crypto_shash_update(desc, v->salt, v->salt_size); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - goto release_ret_r; - } - } - - r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - goto release_ret_r; - } - - if (!v->version) { - r = crypto_shash_update(desc, v->salt, v->salt_size); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - goto release_ret_r; - } - } - - result = io_real_digest(v, io); - r = crypto_shash_final(desc, result); - if (r < 0) { - DMERR("crypto_shash_final failed: %d", r); - goto release_ret_r; - } - if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { - if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, - hash_block)) { - r = -EIO; - goto release_ret_r; - } - } else + if (likely(memcmp(io_real_digest(v, io), want_digest, + v->digest_size) == 0)) aux->hash_verified = 1; + else if (verity_handle_err(v, + DM_VERITY_BLOCK_TYPE_METADATA, + hash_block)) { + r = -EIO; + goto release_ret_r; + } } data += offset; - - memcpy(io_want_digest(v, io), data, v->digest_size); - - dm_bufio_release(buf); - return 0; + memcpy(want_digest, data, v->digest_size); + r = 0; release_ret_r: dm_bufio_release(buf); - return r; } +/* + * Find a hash for a given block, write it to digest and verify the integrity + * of the hash tree if necessary. + */ +static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, + sector_t block, u8 *digest) +{ + int i; + int r; + + if (likely(v->levels)) { + /* + * First, we try to get the requested hash for + * the current block. If the hash block itself is + * verified, zero is returned. If it isn't, this + * function returns 1 and we fall back to whole + * chain verification. + */ + r = verity_verify_level(v, io, block, 0, true, digest); + if (likely(r <= 0)) + return r; + } + + memcpy(digest, v->root_digest, v->digest_size); + + for (i = v->levels - 1; i >= 0; i--) { + r = verity_verify_level(v, io, block, i, false, digest); + if (unlikely(r)) + return r; + } + + return 0; +} + /* * Verify one "dm_verity_io" structure. */ @@ -349,54 +424,21 @@ static int verity_verify_io(struct dm_verity_io *io) struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); unsigned b; - int i; for (b = 0; b < io->n_blocks; b++) { - struct shash_desc *desc; - u8 *result; int r; unsigned todo; + struct shash_desc *desc = io_hash_desc(v, io); - if (likely(v->levels)) { - /* - * First, we try to get the requested hash for - * the current block. If the hash block itself is - * verified, zero is returned. If it isn't, this - * function returns 0 and we fall back to whole - * chain verification. - */ - int r = verity_verify_level(io, io->block + b, 0, true); - if (likely(!r)) - goto test_block_hash; - if (r < 0) - return r; - } - - memcpy(io_want_digest(v, io), v->root_digest, v->digest_size); - - for (i = v->levels - 1; i >= 0; i--) { - int r = verity_verify_level(io, io->block + b, i, false); - if (unlikely(r)) - return r; - } - -test_block_hash: - desc = io_hash_desc(v, io); - desc->tfm = v->tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - r = crypto_shash_init(desc); - if (r < 0) { - DMERR("crypto_shash_init failed: %d", r); + r = verity_hash_for_block(v, io, io->block + b, + io_want_digest(v, io)); + if (unlikely(r < 0)) + return r; + + r = verity_hash_init(v, desc); + if (unlikely(r < 0)) return r; - } - if (likely(v->version >= 1)) { - r = crypto_shash_update(desc, v->salt, v->salt_size); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - return r; - } - } todo = 1 << v->data_dev_block_bits; do { u8 *page; @@ -407,37 +449,27 @@ static int verity_verify_io(struct dm_verity_io *io) len = bv.bv_len; if (likely(len >= todo)) len = todo; - r = crypto_shash_update(desc, page + bv.bv_offset, len); + r = verity_hash_update(v, desc, page + bv.bv_offset, + len); kunmap_atomic(page); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); + if (unlikely(r < 0)) return r; - } bio_advance_iter(bio, &io->iter, len); todo -= len; } while (todo); - if (!v->version) { - r = crypto_shash_update(desc, v->salt, v->salt_size); - if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - return r; - } - } - - result = io_real_digest(v, io); - r = crypto_shash_final(desc, result); - if (r < 0) { - DMERR("crypto_shash_final failed: %d", r); + r = verity_hash_final(v, desc, io_real_digest(v, io)); + if (unlikely(r < 0)) return r; - } - if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { - if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b)) - return -EIO; - } + + if (likely(memcmp(io_real_digest(v, io), + io_want_digest(v, io), v->digest_size) == 0)) + continue; + else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, + io->block + b)) + return -EIO; } return 0; From e6d1b9a713f90c87a5a2b2393c42ef13c392e047 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 5 Nov 2015 02:02:32 +0000 Subject: [PATCH 053/797] UPSTREAM: dm verity: separate function for parsing opt args Move optional argument parsing into a separate function to make it easier to add more of them without making verity_ctr even longer. Change-Id: I9cd9df41c3326824f8cca5764075501987e78a52 Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Mike Snitzer (cherry picked from commit 753c1fd02807cb43a1c5d01d75d454054d46bdad) --- drivers/md/dm-verity.c | 71 +++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 24517055bd8e..b0a53c3b926d 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -34,6 +34,8 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" +#define DM_VERITY_OPTS_MAX 1 + static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); @@ -721,6 +723,44 @@ static void verity_dtr(struct dm_target *ti) kfree(v); } +static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) +{ + int r; + unsigned argc; + struct dm_target *ti = v->ti; + const char *arg_name; + + static struct dm_arg _args[] = { + {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"}, + }; + + r = dm_read_arg_group(_args, as, &argc, &ti->error); + if (r) + return -EINVAL; + + if (!argc) + return 0; + + do { + arg_name = dm_shift_arg(as); + argc--; + + if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) { + v->mode = DM_VERITY_MODE_LOGGING; + continue; + + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) { + v->mode = DM_VERITY_MODE_RESTART; + continue; + } + + ti->error = "Unrecognized verity feature request"; + return -EINVAL; + } while (argc && !r); + + return r; +} + /* * Target parameters: * The current format is version 1. @@ -739,18 +779,13 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) { struct dm_verity *v; struct dm_arg_set as; - const char *opt_string; - unsigned int num, opt_params; + unsigned int num; unsigned long long num_ll; int r; int i; sector_t hash_position; char dummy; - static struct dm_arg _args[] = { - {0, 1, "Invalid number of feature args"}, - }; - v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL); if (!v) { ti->error = "Cannot allocate verity structure"; @@ -895,29 +930,9 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) as.argc = argc; as.argv = argv; - r = dm_read_arg_group(_args, &as, &opt_params, &ti->error); - if (r) + r = verity_parse_opt_args(&as, v); + if (r < 0) goto bad; - - while (opt_params) { - opt_params--; - opt_string = dm_shift_arg(&as); - if (!opt_string) { - ti->error = "Not enough feature arguments"; - r = -EINVAL; - goto bad; - } - - if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING)) - v->mode = DM_VERITY_MODE_LOGGING; - else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART)) - v->mode = DM_VERITY_MODE_RESTART; - else { - ti->error = "Invalid feature arguments"; - r = -EINVAL; - goto bad; - } - } } v->hash_per_block_bits = From 612ce5278f2070c7d5d85b7e82b17e0231d5ede7 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 3 Dec 2015 15:36:00 -0500 Subject: [PATCH 054/797] UPSTREAM: dm verity: move dm-verity.c to dm-verity-target.c Prepare for extending dm-verity with an optional object. Follows the naming convention used by other DM targets (e.g. dm-cache and dm-era). Change-Id: If6d2f27b290adf14fa77f3745fdc13aaa417c8dc Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer (cherry picked from commit 03045cbafa2d663ad8d0a583ac219d202d824344) --- drivers/md/Makefile | 1 + drivers/md/{dm-verity.c => dm-verity-target.c} | 0 2 files changed, 1 insertion(+) rename drivers/md/{dm-verity.c => dm-verity-target.c} (100%) diff --git a/drivers/md/Makefile b/drivers/md/Makefile index f34979cd141a..94e9f6bb33d1 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -16,6 +16,7 @@ dm-cache-mq-y += dm-cache-policy-mq.o dm-cache-smq-y += dm-cache-policy-smq.o dm-cache-cleaner-y += dm-cache-policy-cleaner.o dm-era-y += dm-era-target.o +dm-verity-y += dm-verity-target.o md-mod-y += md.o bitmap.o raid456-y += raid5.o raid5-cache.o diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity-target.c similarity index 100% rename from drivers/md/dm-verity.c rename to drivers/md/dm-verity-target.c From 0be563d6136d94c126ecc5bb352dfa6088d6d8dd Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 3 Dec 2015 16:01:51 -0500 Subject: [PATCH 055/797] UPSTREAM: dm verity: factor out structures and functions useful to separate object Prepare for an optional verity object to make use of existing dm-verity structures and functions. Change-Id: Ib14c3834bfed222b33e068908fb5f71a53e1187b Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer (cherry picked from commit ffa393807cd69656d5b6bc9d9622e205071cbab8) --- drivers/md/dm-verity-target.c | 116 +++++----------------------------- drivers/md/dm-verity.h | 112 ++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 100 deletions(-) create mode 100644 drivers/md/dm-verity.h diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index b0a53c3b926d..7e200ba631fb 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -14,12 +14,10 @@ * access behavior. */ -#include "dm-bufio.h" +#include "dm-verity.h" #include -#include #include -#include #define DM_MSG_PREFIX "verity" @@ -28,7 +26,6 @@ #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 -#define DM_VERITY_MAX_LEVELS 63 #define DM_VERITY_MAX_CORRUPTED_ERRS 100 #define DM_VERITY_OPT_LOGGING "ignore_corruption" @@ -40,72 +37,6 @@ static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); -enum verity_mode { - DM_VERITY_MODE_EIO, - DM_VERITY_MODE_LOGGING, - DM_VERITY_MODE_RESTART -}; - -enum verity_block_type { - DM_VERITY_BLOCK_TYPE_DATA, - DM_VERITY_BLOCK_TYPE_METADATA -}; - -struct dm_verity { - struct dm_dev *data_dev; - struct dm_dev *hash_dev; - struct dm_target *ti; - struct dm_bufio_client *bufio; - char *alg_name; - struct crypto_shash *tfm; - u8 *root_digest; /* digest of the root block */ - u8 *salt; /* salt: its size is salt_size */ - unsigned salt_size; - sector_t data_start; /* data offset in 512-byte sectors */ - sector_t hash_start; /* hash start in blocks */ - sector_t data_blocks; /* the number of data blocks */ - sector_t hash_blocks; /* the number of hash blocks */ - unsigned char data_dev_block_bits; /* log2(data blocksize) */ - unsigned char hash_dev_block_bits; /* log2(hash blocksize) */ - unsigned char hash_per_block_bits; /* log2(hashes in hash block) */ - unsigned char levels; /* the number of tree levels */ - unsigned char version; - unsigned digest_size; /* digest size for the current hash algorithm */ - unsigned shash_descsize;/* the size of temporary space for crypto */ - int hash_failed; /* set to 1 if hash of any block failed */ - enum verity_mode mode; /* mode for handling verification errors */ - unsigned corrupted_errs;/* Number of errors for corrupted blocks */ - - struct workqueue_struct *verify_wq; - - /* starting blocks for each tree level. 0 is the lowest level. */ - sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; -}; - -struct dm_verity_io { - struct dm_verity *v; - - /* original value of bio->bi_end_io */ - bio_end_io_t *orig_bi_end_io; - - sector_t block; - unsigned n_blocks; - - struct bvec_iter iter; - - struct work_struct work; - - /* - * Three variably-size fields follow this struct: - * - * u8 hash_desc[v->shash_descsize]; - * u8 real_digest[v->digest_size]; - * u8 want_digest[v->digest_size]; - * - * To access them use: io_hash_desc(), io_real_digest() and io_want_digest(). - */ -}; - struct dm_verity_prefetch_work { struct work_struct work; struct dm_verity *v; @@ -113,21 +44,6 @@ struct dm_verity_prefetch_work { unsigned n_blocks; }; -static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io) -{ - return (struct shash_desc *)(io + 1); -} - -static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io) -{ - return (u8 *)(io + 1) + v->shash_descsize; -} - -static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io) -{ - return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; -} - /* * Auxiliary structure appended to each dm-bufio buffer. If the value * hash_verified is nonzero, hash of the block has been verified. @@ -236,8 +152,8 @@ static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc, return r; } -static int verity_hash(struct dm_verity *v, struct shash_desc *desc, - const u8 *data, size_t len, u8 *digest) +int verity_hash(struct dm_verity *v, struct shash_desc *desc, + const u8 *data, size_t len, u8 *digest) { int r; @@ -325,12 +241,12 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, * Verify hash of a metadata block pertaining to the specified data block * ("block" argument) at a specified level ("level" argument). * - * On successful return, io_want_digest(v, io) contains the hash value for - * a lower tree level or for the data block (if we're at the lowest leve). + * On successful return, verity_io_want_digest(v, io) contains the hash value + * for a lower tree level or for the data block (if we're at the lowest level). * * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned. * If "skip_unverified" is false, unverified buffer is hashed and verified - * against current value of io_want_digest(v, io). + * against current value of verity_io_want_digest(v, io). */ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, sector_t block, int level, bool skip_unverified, @@ -357,13 +273,13 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, goto release_ret_r; } - r = verity_hash(v, io_hash_desc(v, io), + r = verity_hash(v, verity_io_hash_desc(v, io), data, 1 << v->hash_dev_block_bits, - io_real_digest(v, io)); + verity_io_real_digest(v, io)); if (unlikely(r < 0)) goto release_ret_r; - if (likely(memcmp(io_real_digest(v, io), want_digest, + if (likely(memcmp(verity_io_real_digest(v, io), want_digest, v->digest_size) == 0)) aux->hash_verified = 1; else if (verity_handle_err(v, @@ -387,8 +303,8 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, * Find a hash for a given block, write it to digest and verify the integrity * of the hash tree if necessary. */ -static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, - sector_t block, u8 *digest) +int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, + sector_t block, u8 *digest) { int i; int r; @@ -430,10 +346,10 @@ static int verity_verify_io(struct dm_verity_io *io) for (b = 0; b < io->n_blocks; b++) { int r; unsigned todo; - struct shash_desc *desc = io_hash_desc(v, io); + struct shash_desc *desc = verity_io_hash_desc(v, io); r = verity_hash_for_block(v, io, io->block + b, - io_want_digest(v, io)); + verity_io_want_digest(v, io)); if (unlikely(r < 0)) return r; @@ -462,12 +378,12 @@ static int verity_verify_io(struct dm_verity_io *io) todo -= len; } while (todo); - r = verity_hash_final(v, desc, io_real_digest(v, io)); + r = verity_hash_final(v, desc, verity_io_real_digest(v, io)); if (unlikely(r < 0)) return r; - if (likely(memcmp(io_real_digest(v, io), - io_want_digest(v, io), v->digest_size) == 0)) + if (likely(memcmp(verity_io_real_digest(v, io), + verity_io_want_digest(v, io), v->digest_size) == 0)) continue; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, io->block + b)) diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h new file mode 100644 index 000000000000..c7ad4fd05188 --- /dev/null +++ b/drivers/md/dm-verity.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * Copyright (C) 2015 Google, Inc. + * + * Author: Mikulas Patocka + * + * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors + * + * This file is released under the GPLv2. + */ + +#ifndef DM_VERITY_H +#define DM_VERITY_H + +#include "dm-bufio.h" +#include +#include + +#define DM_VERITY_MAX_LEVELS 63 + +enum verity_mode { + DM_VERITY_MODE_EIO, + DM_VERITY_MODE_LOGGING, + DM_VERITY_MODE_RESTART +}; + +enum verity_block_type { + DM_VERITY_BLOCK_TYPE_DATA, + DM_VERITY_BLOCK_TYPE_METADATA +}; + +struct dm_verity { + struct dm_dev *data_dev; + struct dm_dev *hash_dev; + struct dm_target *ti; + struct dm_bufio_client *bufio; + char *alg_name; + struct crypto_shash *tfm; + u8 *root_digest; /* digest of the root block */ + u8 *salt; /* salt: its size is salt_size */ + unsigned salt_size; + sector_t data_start; /* data offset in 512-byte sectors */ + sector_t hash_start; /* hash start in blocks */ + sector_t data_blocks; /* the number of data blocks */ + sector_t hash_blocks; /* the number of hash blocks */ + unsigned char data_dev_block_bits; /* log2(data blocksize) */ + unsigned char hash_dev_block_bits; /* log2(hash blocksize) */ + unsigned char hash_per_block_bits; /* log2(hashes in hash block) */ + unsigned char levels; /* the number of tree levels */ + unsigned char version; + unsigned digest_size; /* digest size for the current hash algorithm */ + unsigned shash_descsize;/* the size of temporary space for crypto */ + int hash_failed; /* set to 1 if hash of any block failed */ + enum verity_mode mode; /* mode for handling verification errors */ + unsigned corrupted_errs;/* Number of errors for corrupted blocks */ + + struct workqueue_struct *verify_wq; + + /* starting blocks for each tree level. 0 is the lowest level. */ + sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; +}; + +struct dm_verity_io { + struct dm_verity *v; + + /* original value of bio->bi_end_io */ + bio_end_io_t *orig_bi_end_io; + + sector_t block; + unsigned n_blocks; + + struct bvec_iter iter; + + struct work_struct work; + + /* + * Three variably-size fields follow this struct: + * + * u8 hash_desc[v->shash_descsize]; + * u8 real_digest[v->digest_size]; + * u8 want_digest[v->digest_size]; + * + * To access them use: verity_io_hash_desc(), verity_io_real_digest() + * and verity_io_want_digest(). + */ +}; + +static inline struct shash_desc *verity_io_hash_desc(struct dm_verity *v, + struct dm_verity_io *io) +{ + return (struct shash_desc *)(io + 1); +} + +static inline u8 *verity_io_real_digest(struct dm_verity *v, + struct dm_verity_io *io) +{ + return (u8 *)(io + 1) + v->shash_descsize; +} + +static inline u8 *verity_io_want_digest(struct dm_verity *v, + struct dm_verity_io *io) +{ + return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; +} + +extern int verity_hash(struct dm_verity *v, struct shash_desc *desc, + const u8 *data, size_t len, u8 *digest); + +extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, + sector_t block, u8 *digest); + +#endif /* DM_VERITY_H */ From c81331d2f4248c76f8d68a0efedb691fe1343596 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 3 Dec 2015 16:30:36 -0500 Subject: [PATCH 056/797] UPSTREAM: dm verity: factor out verity_for_bv_block() verity_for_bv_block() will be re-used by optional dm-verity object. Change-Id: I80e0f8e7c9f234fce3fbdf21cb05aba3041d7f98 Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer (cherry picked from commit bb4d73ac5e4f0a6c4853f35824f6cb2d396a2f9c) --- drivers/md/dm-verity-target.c | 72 ++++++++++++++++++++++++----------- drivers/md/dm-verity.h | 6 +++ 2 files changed, 55 insertions(+), 23 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 7e200ba631fb..2b0ee52d1ad8 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -333,19 +333,61 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, return 0; } +/* + * Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec + * starting from iter. + */ +int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter *iter, + int (*process)(struct dm_verity *v, + struct dm_verity_io *io, u8 *data, + size_t len)) +{ + unsigned todo = 1 << v->data_dev_block_bits; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); + + do { + int r; + u8 *page; + unsigned len; + struct bio_vec bv = bio_iter_iovec(bio, *iter); + + page = kmap_atomic(bv.bv_page); + len = bv.bv_len; + + if (likely(len >= todo)) + len = todo; + + r = process(v, io, page + bv.bv_offset, len); + kunmap_atomic(page); + + if (r < 0) + return r; + + bio_advance_iter(bio, iter, len); + todo -= len; + } while (todo); + + return 0; +} + +static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + return verity_hash_update(v, verity_io_hash_desc(v, io), data, len); +} + /* * Verify one "dm_verity_io" structure. */ static int verity_verify_io(struct dm_verity_io *io) { struct dm_verity *v = io->v; - struct bio *bio = dm_bio_from_per_bio_data(io, - v->ti->per_bio_data_size); + struct bvec_iter start; unsigned b; for (b = 0; b < io->n_blocks; b++) { int r; - unsigned todo; struct shash_desc *desc = verity_io_hash_desc(v, io); r = verity_hash_for_block(v, io, io->block + b, @@ -357,26 +399,10 @@ static int verity_verify_io(struct dm_verity_io *io) if (unlikely(r < 0)) return r; - todo = 1 << v->data_dev_block_bits; - do { - u8 *page; - unsigned len; - struct bio_vec bv = bio_iter_iovec(bio, io->iter); - - page = kmap_atomic(bv.bv_page); - len = bv.bv_len; - if (likely(len >= todo)) - len = todo; - r = verity_hash_update(v, desc, page + bv.bv_offset, - len); - kunmap_atomic(page); - - if (unlikely(r < 0)) - return r; - - bio_advance_iter(bio, &io->iter, len); - todo -= len; - } while (todo); + start = io->iter; + r = verity_for_bv_block(v, io, &io->iter, verity_bv_hash_update); + if (unlikely(r < 0)) + return r; r = verity_hash_final(v, desc, verity_io_real_digest(v, io)); if (unlikely(r < 0)) diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index c7ad4fd05188..f5af52df8e38 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -103,6 +103,12 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v, return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; } +extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter *iter, + int (*process)(struct dm_verity *v, + struct dm_verity_io *io, + u8 *data, size_t len)); + extern int verity_hash(struct dm_verity *v, struct shash_desc *desc, const u8 *data, size_t len, u8 *digest); From d5fe9722ab2ef9ac6232164204d3b1d04d3ab75c Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 3 Dec 2015 14:26:30 +0000 Subject: [PATCH 057/797] UPSTREAM: dm verity: add support for forward error correction Add support for correcting corrupted blocks using Reed-Solomon. This code uses RS(255, N) interleaved across data and hash blocks. Each error-correcting block covers N bytes evenly distributed across the combined total data, so that each byte is a maximum distance away from the others. This makes it possible to recover from several consecutive corrupted blocks with relatively small space overhead. In addition, using verity hashes to locate erasures nearly doubles the effectiveness of error correction. Being able to detect corrupted blocks also improves performance, because only corrupted blocks need to corrected. For a 2 GiB partition, RS(255, 253) (two parity bytes for each 253-byte block) can correct up to 16 MiB of consecutive corrupted blocks if erasures can be located, and 8 MiB if they cannot, with 16 MiB space overhead. Change-Id: Ife4f8889f7fbf0974bf3ed4be6d3322ae9b4cb0e Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer (cherry picked from commit a739ff3f543afbb4a041c16cd0182c8e8d366e70) --- Documentation/device-mapper/verity.txt | 35 +- drivers/md/Kconfig | 12 + drivers/md/Makefile | 4 + drivers/md/dm-verity-fec.c | 812 +++++++++++++++++++++++++ drivers/md/dm-verity-fec.h | 152 +++++ drivers/md/dm-verity-target.c | 55 +- drivers/md/dm-verity.h | 10 + 7 files changed, 1071 insertions(+), 9 deletions(-) create mode 100644 drivers/md/dm-verity-fec.c create mode 100644 drivers/md/dm-verity-fec.h diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index e15bc1a0fb98..d602c801ff59 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt @@ -18,11 +18,11 @@ Construction Parameters 0 is the original format used in the Chromium OS. The salt is appended when hashing, digests are stored continuously and - the rest of the block is padded with zeros. + the rest of the block is padded with zeroes. 1 is the current format that should be used for new devices. The salt is prepended when hashing and each digest is - padded with zeros to the power of two. + padded with zeroes to the power of two. This is the device containing data, the integrity of which needs to be @@ -79,6 +79,32 @@ restart_on_corruption not compatible with ignore_corruption and requires user space support to avoid restart loops. +use_fec_from_device + Use forward error correction (FEC) to recover from corruption if hash + verification fails. Use encoding data from the specified device. This + may be the same device where data and hash blocks reside, in which case + fec_start must be outside data and hash areas. + + If the encoding data covers additional metadata, it must be accessible + on the hash device after the hash blocks. + + Note: block sizes for data and hash devices must match. Also, if the + verity is encrypted the should be too. + +fec_roots + Number of generator roots. This equals to the number of parity bytes in + the encoding data. For example, in RS(M, N) encoding, the number of roots + is M-N. + +fec_blocks + The number of encoding data blocks on the FEC device. The block size for + the FEC device is . + +fec_start + This is the offset, in blocks, from the start of the + FEC device to the beginning of the encoding data. + + Theory of operation =================== @@ -98,6 +124,11 @@ per-block basis. This allows for a lightweight hash computation on first read into the page cache. Block hashes are stored linearly, aligned to the nearest block size. +If forward error correction (FEC) support is enabled any recovery of +corrupted data will be verified using the cryptographic hash of the +corresponding data. This is why combining error correction with +integrity checking is essential. + Hash Tree --------- diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 7913fdcfc849..d8b0ab6f3753 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -458,6 +458,18 @@ config DM_VERITY If unsure, say N. +config DM_VERITY_FEC + bool "Verity forward error correction support" + depends on DM_VERITY + select REED_SOLOMON + select REED_SOLOMON_DEC8 + ---help--- + Add forward error correction support to dm-verity. This option + makes it possible to use pre-generated error correction data to + recover from corrupted blocks. + + If unsure, say N. + config DM_SWITCH tristate "Switch target support (EXPERIMENTAL)" depends on BLK_DEV_DM diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 94e9f6bb33d1..62a65764e8e0 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -64,3 +64,7 @@ obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o ifeq ($(CONFIG_DM_UEVENT),y) dm-mod-objs += dm-uevent.o endif + +ifeq ($(CONFIG_DM_VERITY_FEC),y) +dm-verity-objs += dm-verity-fec.o +endif diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c new file mode 100644 index 000000000000..88143d36a1d2 --- /dev/null +++ b/drivers/md/dm-verity-fec.c @@ -0,0 +1,812 @@ +/* + * Copyright (C) 2015 Google, Inc. + * + * Author: Sami Tolvanen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include "dm-verity-fec.h" +#include + +#define DM_MSG_PREFIX "verity-fec" + +/* + * If error correction has been configured, returns true. + */ +bool verity_fec_is_enabled(struct dm_verity *v) +{ + return v->fec && v->fec->dev; +} + +/* + * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable + * length fields. + */ +static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io) +{ + return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io); +} + +/* + * Return an interleaved offset for a byte in RS block. + */ +static inline u64 fec_interleave(struct dm_verity *v, u64 offset) +{ + u32 mod; + + mod = do_div(offset, v->fec->rsn); + return offset + mod * (v->fec->rounds << v->data_dev_block_bits); +} + +/* + * Decode an RS block using Reed-Solomon. + */ +static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio, + u8 *data, u8 *fec, int neras) +{ + int i; + uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN]; + + for (i = 0; i < v->fec->roots; i++) + par[i] = fec[i]; + + return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras, + fio->erasures, 0, NULL); +} + +/* + * Read error-correcting codes for the requested RS block. Returns a pointer + * to the data block. Caller is responsible for releasing buf. + */ +static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, + unsigned *offset, struct dm_buffer **buf) +{ + u64 position, block; + u8 *res; + + position = (index + rsb) * v->fec->roots; + block = position >> v->data_dev_block_bits; + *offset = (unsigned)(position - (block << v->data_dev_block_bits)); + + res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf); + if (unlikely(IS_ERR(res))) { + DMERR("%s: FEC %llu: parity read failed (block %llu): %ld", + v->data_dev->name, (unsigned long long)rsb, + (unsigned long long)(v->fec->start + block), + PTR_ERR(res)); + *buf = NULL; + } + + return res; +} + +/* Loop over each preallocated buffer slot. */ +#define fec_for_each_prealloc_buffer(__i) \ + for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++) + +/* Loop over each extra buffer slot. */ +#define fec_for_each_extra_buffer(io, __i) \ + for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++) + +/* Loop over each allocated buffer. */ +#define fec_for_each_buffer(io, __i) \ + for (__i = 0; __i < (io)->nbufs; __i++) + +/* Loop over each RS block in each allocated buffer. */ +#define fec_for_each_buffer_rs_block(io, __i, __j) \ + fec_for_each_buffer(io, __i) \ + for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++) + +/* + * Return a pointer to the current RS block when called inside + * fec_for_each_buffer_rs_block. + */ +static inline u8 *fec_buffer_rs_block(struct dm_verity *v, + struct dm_verity_fec_io *fio, + unsigned i, unsigned j) +{ + return &fio->bufs[i][j * v->fec->rsn]; +} + +/* + * Return an index to the current RS block when called inside + * fec_for_each_buffer_rs_block. + */ +static inline unsigned fec_buffer_rs_index(unsigned i, unsigned j) +{ + return (i << DM_VERITY_FEC_BUF_RS_BITS) + j; +} + +/* + * Decode all RS blocks from buffers and copy corrected bytes into fio->output + * starting from block_offset. + */ +static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, + u64 rsb, int byte_index, unsigned block_offset, + int neras) +{ + int r, corrected = 0, res; + struct dm_buffer *buf; + unsigned n, i, offset; + u8 *par, *block; + + par = fec_read_parity(v, rsb, block_offset, &offset, &buf); + if (IS_ERR(par)) + return PTR_ERR(par); + + /* + * Decode the RS blocks we have in bufs. Each RS block results in + * one corrected target byte and consumes fec->roots parity bytes. + */ + fec_for_each_buffer_rs_block(fio, n, i) { + block = fec_buffer_rs_block(v, fio, n, i); + res = fec_decode_rs8(v, fio, block, &par[offset], neras); + if (res < 0) { + dm_bufio_release(buf); + + r = res; + goto error; + } + + corrected += res; + fio->output[block_offset] = block[byte_index]; + + block_offset++; + if (block_offset >= 1 << v->data_dev_block_bits) + goto done; + + /* read the next block when we run out of parity bytes */ + offset += v->fec->roots; + if (offset >= 1 << v->data_dev_block_bits) { + dm_bufio_release(buf); + + par = fec_read_parity(v, rsb, block_offset, &offset, &buf); + if (unlikely(IS_ERR(par))) + return PTR_ERR(par); + } + } +done: + r = corrected; +error: + if (r < 0 && neras) + DMERR_LIMIT("%s: FEC %llu: failed to correct: %d", + v->data_dev->name, (unsigned long long)rsb, r); + else if (r > 0) + DMWARN_LIMIT("%s: FEC %llu: corrected %d errors", + v->data_dev->name, (unsigned long long)rsb, r); + + return r; +} + +/* + * Locate data block erasures using verity hashes. + */ +static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io, + u8 *want_digest, u8 *data) +{ + if (unlikely(verity_hash(v, verity_io_hash_desc(v, io), + data, 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io)))) + return 0; + + return memcmp(verity_io_real_digest(v, io), want_digest, + v->digest_size) != 0; +} + +/* + * Read data blocks that are part of the RS block and deinterleave as much as + * fits into buffers. Check for erasure locations if @neras is non-NULL. + */ +static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, + u64 rsb, u64 target, unsigned block_offset, + int *neras) +{ + int i, j, target_index = -1; + struct dm_buffer *buf; + struct dm_bufio_client *bufio; + struct dm_verity_fec_io *fio = fec_io(io); + u64 block, ileaved; + u8 *bbuf, *rs_block; + u8 want_digest[v->digest_size]; + unsigned n, k; + + if (neras) + *neras = 0; + + /* + * read each of the rsn data blocks that are part of the RS block, and + * interleave contents to available bufs + */ + for (i = 0; i < v->fec->rsn; i++) { + ileaved = fec_interleave(v, rsb * v->fec->rsn + i); + + /* + * target is the data block we want to correct, target_index is + * the index of this block within the rsn RS blocks + */ + if (ileaved == target) + target_index = i; + + block = ileaved >> v->data_dev_block_bits; + bufio = v->fec->data_bufio; + + if (block >= v->data_blocks) { + block -= v->data_blocks; + + /* + * blocks outside the area were assumed to contain + * zeros when encoding data was generated + */ + if (unlikely(block >= v->fec->hash_blocks)) + continue; + + block += v->hash_start; + bufio = v->bufio; + } + + bbuf = dm_bufio_read(bufio, block, &buf); + if (unlikely(IS_ERR(bbuf))) { + DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld", + v->data_dev->name, + (unsigned long long)rsb, + (unsigned long long)block, PTR_ERR(bbuf)); + + /* assume the block is corrupted */ + if (neras && *neras <= v->fec->roots) + fio->erasures[(*neras)++] = i; + + continue; + } + + /* locate erasures if the block is on the data device */ + if (bufio == v->fec->data_bufio && + verity_hash_for_block(v, io, block, want_digest) == 0) { + /* + * skip if we have already found the theoretical + * maximum number (i.e. fec->roots) of erasures + */ + if (neras && *neras <= v->fec->roots && + fec_is_erasure(v, io, want_digest, bbuf)) + fio->erasures[(*neras)++] = i; + } + + /* + * deinterleave and copy the bytes that fit into bufs, + * starting from block_offset + */ + fec_for_each_buffer_rs_block(fio, n, j) { + k = fec_buffer_rs_index(n, j) + block_offset; + + if (k >= 1 << v->data_dev_block_bits) + goto done; + + rs_block = fec_buffer_rs_block(v, fio, n, j); + rs_block[i] = bbuf[k]; + } +done: + dm_bufio_release(buf); + } + + return target_index; +} + +/* + * Allocate RS control structure and FEC buffers from preallocated mempools, + * and attempt to allocate as many extra buffers as available. + */ +static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio) +{ + unsigned n; + + if (!fio->rs) { + fio->rs = mempool_alloc(v->fec->rs_pool, 0); + if (unlikely(!fio->rs)) { + DMERR("failed to allocate RS"); + return -ENOMEM; + } + } + + fec_for_each_prealloc_buffer(n) { + if (fio->bufs[n]) + continue; + + fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOIO); + if (unlikely(!fio->bufs[n])) { + DMERR("failed to allocate FEC buffer"); + return -ENOMEM; + } + } + + /* try to allocate the maximum number of buffers */ + fec_for_each_extra_buffer(fio, n) { + if (fio->bufs[n]) + continue; + + fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOIO); + /* we can manage with even one buffer if necessary */ + if (unlikely(!fio->bufs[n])) + break; + } + fio->nbufs = n; + + if (!fio->output) { + fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO); + + if (!fio->output) { + DMERR("failed to allocate FEC page"); + return -ENOMEM; + } + } + + return 0; +} + +/* + * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are + * zeroed before deinterleaving. + */ +static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio) +{ + unsigned n; + + fec_for_each_buffer(fio, n) + memset(fio->bufs[n], 0, v->fec->rsn << DM_VERITY_FEC_BUF_RS_BITS); + + memset(fio->erasures, 0, sizeof(fio->erasures)); +} + +/* + * Decode all RS blocks in a single data block and return the target block + * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses + * hashes to locate erasures. + */ +static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, + struct dm_verity_fec_io *fio, u64 rsb, u64 offset, + bool use_erasures) +{ + int r, neras = 0; + unsigned pos; + + r = fec_alloc_bufs(v, fio); + if (unlikely(r < 0)) + return r; + + for (pos = 0; pos < 1 << v->data_dev_block_bits; ) { + fec_init_bufs(v, fio); + + r = fec_read_bufs(v, io, rsb, offset, pos, + use_erasures ? &neras : NULL); + if (unlikely(r < 0)) + return r; + + r = fec_decode_bufs(v, fio, rsb, r, pos, neras); + if (r < 0) + return r; + + pos += fio->nbufs << DM_VERITY_FEC_BUF_RS_BITS; + } + + /* Always re-validate the corrected block against the expected hash */ + r = verity_hash(v, verity_io_hash_desc(v, io), fio->output, + 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io)); + if (unlikely(r < 0)) + return r; + + if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io), + v->digest_size)) { + DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)", + v->data_dev->name, (unsigned long long)rsb, neras); + return -EILSEQ; + } + + return 0; +} + +static int fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io, u8 *data, + size_t len) +{ + struct dm_verity_fec_io *fio = fec_io(io); + + memcpy(data, &fio->output[fio->output_pos], len); + fio->output_pos += len; + + return 0; +} + +/* + * Correct errors in a block. Copies corrected block to dest if non-NULL, + * otherwise to a bio_vec starting from iter. + */ +int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, + enum verity_block_type type, sector_t block, u8 *dest, + struct bvec_iter *iter) +{ + int r; + struct dm_verity_fec_io *fio = fec_io(io); + u64 offset, res, rsb; + + if (!verity_fec_is_enabled(v)) + return -EOPNOTSUPP; + + if (type == DM_VERITY_BLOCK_TYPE_METADATA) + block += v->data_blocks; + + /* + * For RS(M, N), the continuous FEC data is divided into blocks of N + * bytes. Since block size may not be divisible by N, the last block + * is zero padded when decoding. + * + * Each byte of the block is covered by a different RS(M, N) code, + * and each code is interleaved over N blocks to make it less likely + * that bursty corruption will leave us in unrecoverable state. + */ + + offset = block << v->data_dev_block_bits; + + res = offset; + div64_u64(res, v->fec->rounds << v->data_dev_block_bits); + + /* + * The base RS block we can feed to the interleaver to find out all + * blocks required for decoding. + */ + rsb = offset - res * (v->fec->rounds << v->data_dev_block_bits); + + /* + * Locating erasures is slow, so attempt to recover the block without + * them first. Do a second attempt with erasures if the corruption is + * bad enough. + */ + r = fec_decode_rsb(v, io, fio, rsb, offset, false); + if (r < 0) { + r = fec_decode_rsb(v, io, fio, rsb, offset, true); + if (r < 0) + return r; + } + + if (dest) + memcpy(dest, fio->output, 1 << v->data_dev_block_bits); + else if (iter) { + fio->output_pos = 0; + r = verity_for_bv_block(v, io, iter, fec_bv_copy); + } + + return r; +} + +/* + * Clean up per-bio data. + */ +void verity_fec_finish_io(struct dm_verity_io *io) +{ + unsigned n; + struct dm_verity_fec *f = io->v->fec; + struct dm_verity_fec_io *fio = fec_io(io); + + if (!verity_fec_is_enabled(io->v)) + return; + + mempool_free(fio->rs, f->rs_pool); + + fec_for_each_prealloc_buffer(n) + mempool_free(fio->bufs[n], f->prealloc_pool); + + fec_for_each_extra_buffer(fio, n) + mempool_free(fio->bufs[n], f->extra_pool); + + mempool_free(fio->output, f->output_pool); +} + +/* + * Initialize per-bio data. + */ +void verity_fec_init_io(struct dm_verity_io *io) +{ + struct dm_verity_fec_io *fio = fec_io(io); + + if (!verity_fec_is_enabled(io->v)) + return; + + fio->rs = NULL; + memset(fio->bufs, 0, sizeof(fio->bufs)); + fio->nbufs = 0; + fio->output = NULL; +} + +/* + * Append feature arguments and values to the status table. + */ +unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz, + char *result, unsigned maxlen) +{ + if (!verity_fec_is_enabled(v)) + return sz; + + DMEMIT(" " DM_VERITY_OPT_FEC_DEV " %s " + DM_VERITY_OPT_FEC_BLOCKS " %llu " + DM_VERITY_OPT_FEC_START " %llu " + DM_VERITY_OPT_FEC_ROOTS " %d", + v->fec->dev->name, + (unsigned long long)v->fec->blocks, + (unsigned long long)v->fec->start, + v->fec->roots); + + return sz; +} + +void verity_fec_dtr(struct dm_verity *v) +{ + struct dm_verity_fec *f = v->fec; + + if (!verity_fec_is_enabled(v)) + goto out; + + mempool_destroy(f->rs_pool); + mempool_destroy(f->prealloc_pool); + mempool_destroy(f->extra_pool); + kmem_cache_destroy(f->cache); + + if (f->data_bufio) + dm_bufio_client_destroy(f->data_bufio); + if (f->bufio) + dm_bufio_client_destroy(f->bufio); + + if (f->dev) + dm_put_device(v->ti, f->dev); +out: + kfree(f); + v->fec = NULL; +} + +static void *fec_rs_alloc(gfp_t gfp_mask, void *pool_data) +{ + struct dm_verity *v = (struct dm_verity *)pool_data; + + return init_rs(8, 0x11d, 0, 1, v->fec->roots); +} + +static void fec_rs_free(void *element, void *pool_data) +{ + struct rs_control *rs = (struct rs_control *)element; + + if (rs) + free_rs(rs); +} + +bool verity_is_fec_opt_arg(const char *arg_name) +{ + return (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV) || + !strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS) || + !strcasecmp(arg_name, DM_VERITY_OPT_FEC_START) || + !strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)); +} + +int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, + unsigned *argc, const char *arg_name) +{ + int r; + struct dm_target *ti = v->ti; + const char *arg_value; + unsigned long long num_ll; + unsigned char num_c; + char dummy; + + if (!*argc) { + ti->error = "FEC feature arguments require a value"; + return -EINVAL; + } + + arg_value = dm_shift_arg(as); + (*argc)--; + + if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) { + r = dm_get_device(ti, arg_value, FMODE_READ, &v->fec->dev); + if (r) { + ti->error = "FEC device lookup failed"; + return r; + } + + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS)) { + if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 || + ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) + >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) { + ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS; + return -EINVAL; + } + v->fec->blocks = num_ll; + + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_START)) { + if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 || + ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) >> + (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) { + ti->error = "Invalid " DM_VERITY_OPT_FEC_START; + return -EINVAL; + } + v->fec->start = num_ll; + + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)) { + if (sscanf(arg_value, "%hhu%c", &num_c, &dummy) != 1 || !num_c || + num_c < (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MAX_RSN) || + num_c > (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN)) { + ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS; + return -EINVAL; + } + v->fec->roots = num_c; + + } else { + ti->error = "Unrecognized verity FEC feature request"; + return -EINVAL; + } + + return 0; +} + +/* + * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr. + */ +int verity_fec_ctr_alloc(struct dm_verity *v) +{ + struct dm_verity_fec *f; + + f = kzalloc(sizeof(struct dm_verity_fec), GFP_KERNEL); + if (!f) { + v->ti->error = "Cannot allocate FEC structure"; + return -ENOMEM; + } + v->fec = f; + + return 0; +} + +/* + * Validate arguments and preallocate memory. Must be called after arguments + * have been parsed using verity_fec_parse_opt_args. + */ +int verity_fec_ctr(struct dm_verity *v) +{ + struct dm_verity_fec *f = v->fec; + struct dm_target *ti = v->ti; + u64 hash_blocks; + + if (!verity_fec_is_enabled(v)) { + verity_fec_dtr(v); + return 0; + } + + /* + * FEC is computed over data blocks, possible metadata, and + * hash blocks. In other words, FEC covers total of fec_blocks + * blocks consisting of the following: + * + * data blocks | hash blocks | metadata (optional) + * + * We allow metadata after hash blocks to support a use case + * where all data is stored on the same device and FEC covers + * the entire area. + * + * If metadata is included, we require it to be available on the + * hash device after the hash blocks. + */ + + hash_blocks = v->hash_blocks - v->hash_start; + + /* + * Require matching block sizes for data and hash devices for + * simplicity. + */ + if (v->data_dev_block_bits != v->hash_dev_block_bits) { + ti->error = "Block sizes must match to use FEC"; + return -EINVAL; + } + + if (!f->roots) { + ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS; + return -EINVAL; + } + f->rsn = DM_VERITY_FEC_RSM - f->roots; + + if (!f->blocks) { + ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS; + return -EINVAL; + } + + f->rounds = f->blocks; + if (sector_div(f->rounds, f->rsn)) + f->rounds++; + + /* + * Due to optional metadata, f->blocks can be larger than + * data_blocks and hash_blocks combined. + */ + if (f->blocks < v->data_blocks + hash_blocks || !f->rounds) { + ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS; + return -EINVAL; + } + + /* + * Metadata is accessed through the hash device, so we require + * it to be large enough. + */ + f->hash_blocks = f->blocks - v->data_blocks; + if (dm_bufio_get_device_size(v->bufio) < f->hash_blocks) { + ti->error = "Hash device is too small for " + DM_VERITY_OPT_FEC_BLOCKS; + return -E2BIG; + } + + f->bufio = dm_bufio_client_create(f->dev->bdev, + 1 << v->data_dev_block_bits, + 1, 0, NULL, NULL); + if (IS_ERR(f->bufio)) { + ti->error = "Cannot initialize FEC bufio client"; + return PTR_ERR(f->bufio); + } + + if (dm_bufio_get_device_size(f->bufio) < + ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) { + ti->error = "FEC device is too small"; + return -E2BIG; + } + + f->data_bufio = dm_bufio_client_create(v->data_dev->bdev, + 1 << v->data_dev_block_bits, + 1, 0, NULL, NULL); + if (IS_ERR(f->data_bufio)) { + ti->error = "Cannot initialize FEC data bufio client"; + return PTR_ERR(f->data_bufio); + } + + if (dm_bufio_get_device_size(f->data_bufio) < v->data_blocks) { + ti->error = "Data device is too small"; + return -E2BIG; + } + + /* Preallocate an rs_control structure for each worker thread */ + f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc, + fec_rs_free, (void *) v); + if (!f->rs_pool) { + ti->error = "Cannot allocate RS pool"; + return -ENOMEM; + } + + f->cache = kmem_cache_create("dm_verity_fec_buffers", + f->rsn << DM_VERITY_FEC_BUF_RS_BITS, + 0, 0, NULL); + if (!f->cache) { + ti->error = "Cannot create FEC buffer cache"; + return -ENOMEM; + } + + /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */ + f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() * + DM_VERITY_FEC_BUF_PREALLOC, + f->cache); + if (!f->prealloc_pool) { + ti->error = "Cannot allocate FEC buffer prealloc pool"; + return -ENOMEM; + } + + f->extra_pool = mempool_create_slab_pool(0, f->cache); + if (!f->extra_pool) { + ti->error = "Cannot allocate FEC buffer extra pool"; + return -ENOMEM; + } + + /* Preallocate an output buffer for each thread */ + f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(), + 1 << v->data_dev_block_bits); + if (!f->output_pool) { + ti->error = "Cannot allocate FEC output pool"; + return -ENOMEM; + } + + /* Reserve space for our per-bio data */ + ti->per_bio_data_size += sizeof(struct dm_verity_fec_io); + + return 0; +} diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h new file mode 100644 index 000000000000..7fa0298b995e --- /dev/null +++ b/drivers/md/dm-verity-fec.h @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2015 Google, Inc. + * + * Author: Sami Tolvanen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#ifndef DM_VERITY_FEC_H +#define DM_VERITY_FEC_H + +#include "dm-verity.h" +#include + +/* Reed-Solomon(M, N) parameters */ +#define DM_VERITY_FEC_RSM 255 +#define DM_VERITY_FEC_MAX_RSN 253 +#define DM_VERITY_FEC_MIN_RSN 231 /* ~10% space overhead */ + +/* buffers for deinterleaving and decoding */ +#define DM_VERITY_FEC_BUF_PREALLOC 1 /* buffers to preallocate */ +#define DM_VERITY_FEC_BUF_RS_BITS 4 /* 1 << RS blocks per buffer */ +/* we need buffers for at most 1 << block size RS blocks */ +#define DM_VERITY_FEC_BUF_MAX \ + (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS)) + +#define DM_VERITY_OPT_FEC_DEV "use_fec_from_device" +#define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks" +#define DM_VERITY_OPT_FEC_START "fec_start" +#define DM_VERITY_OPT_FEC_ROOTS "fec_roots" + +/* configuration */ +struct dm_verity_fec { + struct dm_dev *dev; /* parity data device */ + struct dm_bufio_client *data_bufio; /* for data dev access */ + struct dm_bufio_client *bufio; /* for parity data access */ + sector_t start; /* parity data start in blocks */ + sector_t blocks; /* number of blocks covered */ + sector_t rounds; /* number of interleaving rounds */ + sector_t hash_blocks; /* blocks covered after v->hash_start */ + unsigned char roots; /* number of parity bytes, M-N of RS(M, N) */ + unsigned char rsn; /* N of RS(M, N) */ + mempool_t *rs_pool; /* mempool for fio->rs */ + mempool_t *prealloc_pool; /* mempool for preallocated buffers */ + mempool_t *extra_pool; /* mempool for extra buffers */ + mempool_t *output_pool; /* mempool for output */ + struct kmem_cache *cache; /* cache for buffers */ +}; + +/* per-bio data */ +struct dm_verity_fec_io { + struct rs_control *rs; /* Reed-Solomon state */ + int erasures[DM_VERITY_FEC_MAX_RSN]; /* erasures for decode_rs8 */ + u8 *bufs[DM_VERITY_FEC_BUF_MAX]; /* bufs for deinterleaving */ + unsigned nbufs; /* number of buffers allocated */ + u8 *output; /* buffer for corrected output */ + size_t output_pos; +}; + +#ifdef CONFIG_DM_VERITY_FEC + +/* each feature parameter requires a value */ +#define DM_VERITY_OPTS_FEC 8 + +extern bool verity_fec_is_enabled(struct dm_verity *v); + +extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, + enum verity_block_type type, sector_t block, + u8 *dest, struct bvec_iter *iter); + +extern unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz, + char *result, unsigned maxlen); + +extern void verity_fec_finish_io(struct dm_verity_io *io); +extern void verity_fec_init_io(struct dm_verity_io *io); + +extern bool verity_is_fec_opt_arg(const char *arg_name); +extern int verity_fec_parse_opt_args(struct dm_arg_set *as, + struct dm_verity *v, unsigned *argc, + const char *arg_name); + +extern void verity_fec_dtr(struct dm_verity *v); + +extern int verity_fec_ctr_alloc(struct dm_verity *v); +extern int verity_fec_ctr(struct dm_verity *v); + +#else /* !CONFIG_DM_VERITY_FEC */ + +#define DM_VERITY_OPTS_FEC 0 + +static inline bool verity_fec_is_enabled(struct dm_verity *v) +{ + return false; +} + +static inline int verity_fec_decode(struct dm_verity *v, + struct dm_verity_io *io, + enum verity_block_type type, + sector_t block, u8 *dest, + struct bvec_iter *iter) +{ + return -EOPNOTSUPP; +} + +static inline unsigned verity_fec_status_table(struct dm_verity *v, + unsigned sz, char *result, + unsigned maxlen) +{ + return sz; +} + +static inline void verity_fec_finish_io(struct dm_verity_io *io) +{ +} + +static inline void verity_fec_init_io(struct dm_verity_io *io) +{ +} + +static inline bool verity_is_fec_opt_arg(const char *arg_name) +{ + return false; +} + +static inline int verity_fec_parse_opt_args(struct dm_arg_set *as, + struct dm_verity *v, + unsigned *argc, + const char *arg_name) +{ + return -EINVAL; +} + +static inline void verity_fec_dtr(struct dm_verity *v) +{ +} + +static inline int verity_fec_ctr_alloc(struct dm_verity *v) +{ + return 0; +} + +static inline int verity_fec_ctr(struct dm_verity *v) +{ + return 0; +} + +#endif /* CONFIG_DM_VERITY_FEC */ + +#endif /* DM_VERITY_FEC_H */ diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 2b0ee52d1ad8..4f90ec2c6b7a 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -15,6 +15,7 @@ */ #include "dm-verity.h" +#include "dm-verity-fec.h" #include #include @@ -31,7 +32,7 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" -#define DM_VERITY_OPTS_MAX 1 +#define DM_VERITY_OPTS_MAX (1 + DM_VERITY_OPTS_FEC) static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; @@ -282,6 +283,10 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, if (likely(memcmp(verity_io_real_digest(v, io), want_digest, v->digest_size) == 0)) aux->hash_verified = 1; + else if (verity_fec_decode(v, io, + DM_VERITY_BLOCK_TYPE_METADATA, + hash_block, data, NULL) == 0) + aux->hash_verified = 1; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, hash_block)) { @@ -411,8 +416,11 @@ static int verity_verify_io(struct dm_verity_io *io) if (likely(memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io), v->digest_size) == 0)) continue; + else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, + io->block + b, NULL, &start) == 0) + continue; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b)) + io->block + b)) return -EIO; } @@ -430,6 +438,8 @@ static void verity_finish_io(struct dm_verity_io *io, int error) bio->bi_end_io = io->orig_bi_end_io; bio->bi_error = error; + verity_fec_finish_io(io); + bio_endio(bio); } @@ -444,7 +454,7 @@ static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; - if (bio->bi_error) { + if (bio->bi_error && !verity_fec_is_enabled(io->v)) { verity_finish_io(io, bio->bi_error); return; } @@ -547,6 +557,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio) bio->bi_private = io; io->iter = bio->bi_iter; + verity_fec_init_io(io); + verity_submit_prefetch(v, io); generic_make_request(bio); @@ -561,6 +573,7 @@ static void verity_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; + unsigned args = 0; unsigned sz = 0; unsigned x; @@ -587,8 +600,15 @@ static void verity_status(struct dm_target *ti, status_type_t type, else for (x = 0; x < v->salt_size; x++) DMEMIT("%02x", v->salt[x]); + if (v->mode != DM_VERITY_MODE_EIO) + args++; + if (verity_fec_is_enabled(v)) + args += DM_VERITY_OPTS_FEC; + if (!args) + return; + DMEMIT(" %u", args); if (v->mode != DM_VERITY_MODE_EIO) { - DMEMIT(" 1 "); + DMEMIT(" "); switch (v->mode) { case DM_VERITY_MODE_LOGGING: DMEMIT(DM_VERITY_OPT_LOGGING); @@ -600,6 +620,7 @@ static void verity_status(struct dm_target *ti, status_type_t type, BUG(); } } + sz = verity_fec_status_table(v, sz, result, maxlen); break; } } @@ -662,6 +683,8 @@ static void verity_dtr(struct dm_target *ti) if (v->data_dev) dm_put_device(ti, v->data_dev); + verity_fec_dtr(v); + kfree(v); } @@ -694,6 +717,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) { v->mode = DM_VERITY_MODE_RESTART; continue; + + } else if (verity_is_fec_opt_arg(arg_name)) { + r = verity_fec_parse_opt_args(as, v, &argc, arg_name); + if (r) + return r; + continue; } ti->error = "Unrecognized verity feature request"; @@ -736,6 +765,10 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->private = v; v->ti = ti; + r = verity_fec_ctr_alloc(v); + if (r) + goto bad; + if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) { ti->error = "Device must be readonly"; r = -EINVAL; @@ -924,8 +957,6 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); - /* WQ_UNBOUND greatly improves performance when running on ramdisk */ v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus()); if (!v->verify_wq) { @@ -934,6 +965,16 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } + ti->per_bio_data_size = sizeof(struct dm_verity_io) + + v->shash_descsize + v->digest_size * 2; + + r = verity_fec_ctr(v); + if (r) + goto bad; + + ti->per_bio_data_size = roundup(ti->per_bio_data_size, + __alignof__(struct dm_verity_io)); + return 0; bad: @@ -944,7 +985,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) static struct target_type verity_target = { .name = "verity", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f5af52df8e38..8e853722f6c6 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -29,6 +29,8 @@ enum verity_block_type { DM_VERITY_BLOCK_TYPE_METADATA }; +struct dm_verity_fec; + struct dm_verity { struct dm_dev *data_dev; struct dm_dev *hash_dev; @@ -58,6 +60,8 @@ struct dm_verity { /* starting blocks for each tree level. 0 is the lowest level. */ sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; + + struct dm_verity_fec *fec; /* forward error correction */ }; struct dm_verity_io { @@ -103,6 +107,12 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v, return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; } +static inline u8 *verity_io_digest_end(struct dm_verity *v, + struct dm_verity_io *io) +{ + return verity_io_want_digest(v, io) + v->digest_size; +} + extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, struct bvec_iter *iter, int (*process)(struct dm_verity *v, From 4cd7f1ecf798206533dfc11279c9264198e5c3bf Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 3 Dec 2015 14:26:31 +0000 Subject: [PATCH 058/797] UPSTREAM: dm verity: add ignore_zero_blocks feature If ignore_zero_blocks is enabled dm-verity will return zeroes for blocks matching a zero hash without validating the content. Change-Id: I728fa4b2586b29f2793ea5cb014289892819d249 Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer (cherry picked from commit 0cc37c2df4fa0aa702f9662edce4b7ce12c86b7a) --- Documentation/device-mapper/verity.txt | 5 ++ drivers/md/dm-verity-fec.c | 8 ++- drivers/md/dm-verity-target.c | 87 +++++++++++++++++++++++--- drivers/md/dm-verity.h | 3 +- 4 files changed, 93 insertions(+), 10 deletions(-) diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index d602c801ff59..89fd8f9a259f 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt @@ -79,6 +79,11 @@ restart_on_corruption not compatible with ignore_corruption and requires user space support to avoid restart loops. +ignore_zero_blocks + Do not verify blocks that are expected to contain zeroes and always return + zeroes instead. This may be useful if the partition contains unused blocks + that are not guaranteed to contain zeroes. + use_fec_from_device Use forward error correction (FEC) to recover from corruption if hash verification fails. Use encoding data from the specified device. This diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 88143d36a1d2..1cc10c4de701 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -205,6 +205,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, u64 rsb, u64 target, unsigned block_offset, int *neras) { + bool is_zero; int i, j, target_index = -1; struct dm_buffer *buf; struct dm_bufio_client *bufio; @@ -264,7 +265,12 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, /* locate erasures if the block is on the data device */ if (bufio == v->fec->data_bufio && - verity_hash_for_block(v, io, block, want_digest) == 0) { + verity_hash_for_block(v, io, block, want_digest, + &is_zero) == 0) { + /* skip known zero blocks entirely */ + if (is_zero) + continue; + /* * skip if we have already found the theoretical * maximum number (i.e. fec->roots) of erasures diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 4f90ec2c6b7a..5c5d30cb6ec5 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -31,8 +31,9 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" +#define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks" -#define DM_VERITY_OPTS_MAX (1 + DM_VERITY_OPTS_FEC) +#define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC) static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; @@ -309,10 +310,9 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, * of the hash tree if necessary. */ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, - sector_t block, u8 *digest) + sector_t block, u8 *digest, bool *is_zero) { - int i; - int r; + int r = 0, i; if (likely(v->levels)) { /* @@ -324,7 +324,7 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, */ r = verity_verify_level(v, io, block, 0, true, digest); if (likely(r <= 0)) - return r; + goto out; } memcpy(digest, v->root_digest, v->digest_size); @@ -332,10 +332,15 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, for (i = v->levels - 1; i >= 0; i--) { r = verity_verify_level(v, io, block, i, false, digest); if (unlikely(r)) - return r; + goto out; } +out: + if (!r && v->zero_digest) + *is_zero = !memcmp(v->zero_digest, digest, v->digest_size); + else + *is_zero = false; - return 0; + return r; } /* @@ -382,11 +387,19 @@ static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io, return verity_hash_update(v, verity_io_hash_desc(v, io), data, len); } +static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + memset(data, 0, len); + return 0; +} + /* * Verify one "dm_verity_io" structure. */ static int verity_verify_io(struct dm_verity_io *io) { + bool is_zero; struct dm_verity *v = io->v; struct bvec_iter start; unsigned b; @@ -396,10 +409,24 @@ static int verity_verify_io(struct dm_verity_io *io) struct shash_desc *desc = verity_io_hash_desc(v, io); r = verity_hash_for_block(v, io, io->block + b, - verity_io_want_digest(v, io)); + verity_io_want_digest(v, io), + &is_zero); if (unlikely(r < 0)) return r; + if (is_zero) { + /* + * If we expect a zero block, don't validate, just + * return zeros. + */ + r = verity_for_bv_block(v, io, &io->iter, + verity_bv_zero); + if (unlikely(r < 0)) + return r; + + continue; + } + r = verity_hash_init(v, desc); if (unlikely(r < 0)) return r; @@ -604,6 +631,8 @@ static void verity_status(struct dm_target *ti, status_type_t type, args++; if (verity_fec_is_enabled(v)) args += DM_VERITY_OPTS_FEC; + if (v->zero_digest) + args++; if (!args) return; DMEMIT(" %u", args); @@ -620,6 +649,8 @@ static void verity_status(struct dm_target *ti, status_type_t type, BUG(); } } + if (v->zero_digest) + DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES); sz = verity_fec_status_table(v, sz, result, maxlen); break; } @@ -671,6 +702,7 @@ static void verity_dtr(struct dm_target *ti) kfree(v->salt); kfree(v->root_digest); + kfree(v->zero_digest); if (v->tfm) crypto_free_shash(v->tfm); @@ -688,6 +720,37 @@ static void verity_dtr(struct dm_target *ti) kfree(v); } +static int verity_alloc_zero_digest(struct dm_verity *v) +{ + int r = -ENOMEM; + struct shash_desc *desc; + u8 *zero_data; + + v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL); + + if (!v->zero_digest) + return r; + + desc = kmalloc(v->shash_descsize, GFP_KERNEL); + + if (!desc) + return r; /* verity_dtr will free zero_digest */ + + zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL); + + if (!zero_data) + goto out; + + r = verity_hash(v, desc, zero_data, 1 << v->data_dev_block_bits, + v->zero_digest); + +out: + kfree(desc); + kfree(zero_data); + + return r; +} + static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) { int r; @@ -718,6 +781,14 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) v->mode = DM_VERITY_MODE_RESTART; continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROES)) { + r = verity_alloc_zero_digest(v); + if (r) { + ti->error = "Cannot allocate zero digest"; + return r; + } + continue; + } else if (verity_is_fec_opt_arg(arg_name)) { r = verity_fec_parse_opt_args(as, v, &argc, arg_name); if (r) diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 8e853722f6c6..fb419f422d73 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -40,6 +40,7 @@ struct dm_verity { struct crypto_shash *tfm; u8 *root_digest; /* digest of the root block */ u8 *salt; /* salt: its size is salt_size */ + u8 *zero_digest; /* digest for a zero block */ unsigned salt_size; sector_t data_start; /* data offset in 512-byte sectors */ sector_t hash_start; /* hash start in blocks */ @@ -123,6 +124,6 @@ extern int verity_hash(struct dm_verity *v, struct shash_desc *desc, const u8 *data, size_t len, u8 *digest); extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, - sector_t block, u8 *digest); + sector_t block, u8 *digest, bool *is_zero); #endif /* DM_VERITY_H */ From dc6fc413a1efb257a5947ca2412407297b92268a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Dec 2015 16:23:49 +0000 Subject: [PATCH 059/797] ANDROID: android: base-cfg: enable CONFIG_DM_VERITY_FEC Bug: 21893453 Change-Id: Idd0dfe4e3e527df2eff2f0d734effc40dce294c7 Signed-off-by: Sami Tolvanen (cherry picked from commit 9408350ed80005174918ce5147490035b2cf451b) --- android/configs/android-base.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index a4cffb1840d7..220ddb5304df 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -21,6 +21,7 @@ CONFIG_CGROUP_SCHED=y CONFIG_CP15_BARRIER_EMULATION=y CONFIG_DM_CRYPT=y CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y CONFIG_EMBEDDED=y CONFIG_FB=y CONFIG_HIGH_RES_TIMERS=y From 73a20ae3132fc35ba83b6c622fc60b8fc9fe7589 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 30 Mar 2016 14:10:13 -0700 Subject: [PATCH 060/797] ANDROID: dm verity fec: add sysfs attribute fec/corrected Add a sysfs entry that allows user space to determine whether dm-verity has come across correctable errors on the underlying block device. Bug: 22655252 Bug: 27928374 Change-Id: I80547a2aa944af2fb9ffde002650482877ade31b Signed-off-by: Sami Tolvanen (cherry picked from commit 7911fad5f0a2cf5afc2215657219a21e6630e001) --- drivers/md/dm-verity-fec.c | 45 +++++++++++++++++++++++++++++++++++++- drivers/md/dm-verity-fec.h | 3 +++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 1cc10c4de701..ad10d6d8ed28 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -11,6 +11,7 @@ #include "dm-verity-fec.h" #include +#include #define DM_MSG_PREFIX "verity-fec" @@ -175,9 +176,11 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, if (r < 0 && neras) DMERR_LIMIT("%s: FEC %llu: failed to correct: %d", v->data_dev->name, (unsigned long long)rsb, r); - else if (r > 0) + else if (r > 0) { DMWARN_LIMIT("%s: FEC %llu: corrected %d errors", v->data_dev->name, (unsigned long long)rsb, r); + atomic_add_unless(&v->fec->corrected, 1, INT_MAX); + } return r; } @@ -548,6 +551,7 @@ unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz, void verity_fec_dtr(struct dm_verity *v) { struct dm_verity_fec *f = v->fec; + struct kobject *kobj = &f->kobj_holder.kobj; if (!verity_fec_is_enabled(v)) goto out; @@ -564,6 +568,12 @@ void verity_fec_dtr(struct dm_verity *v) if (f->dev) dm_put_device(v->ti, f->dev); + + if (kobj->state_initialized) { + kobject_put(kobj); + wait_for_completion(dm_get_completion_from_kobject(kobj)); + } + out: kfree(f); v->fec = NULL; @@ -652,6 +662,27 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, return 0; } +static ssize_t corrected_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct dm_verity_fec *f = container_of(kobj, struct dm_verity_fec, + kobj_holder.kobj); + + return sprintf(buf, "%d\n", atomic_read(&f->corrected)); +} + +static struct kobj_attribute attr_corrected = __ATTR_RO(corrected); + +static struct attribute *fec_attrs[] = { + &attr_corrected.attr, + NULL +}; + +static struct kobj_type fec_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .default_attrs = fec_attrs +}; + /* * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr. */ @@ -675,8 +706,10 @@ int verity_fec_ctr_alloc(struct dm_verity *v) */ int verity_fec_ctr(struct dm_verity *v) { + int r; struct dm_verity_fec *f = v->fec; struct dm_target *ti = v->ti; + struct mapped_device *md = dm_table_get_md(ti->table); u64 hash_blocks; if (!verity_fec_is_enabled(v)) { @@ -684,6 +717,16 @@ int verity_fec_ctr(struct dm_verity *v) return 0; } + /* Create a kobject and sysfs attributes */ + init_completion(&f->kobj_holder.completion); + + r = kobject_init_and_add(&f->kobj_holder.kobj, &fec_ktype, + &disk_to_dev(dm_disk(md))->kobj, "%s", "fec"); + if (r) { + ti->error = "Cannot create kobject"; + return r; + } + /* * FEC is computed over data blocks, possible metadata, and * hash blocks. In other words, FEC covers total of fec_blocks diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index 7fa0298b995e..8c4bee052a73 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h @@ -12,6 +12,7 @@ #ifndef DM_VERITY_FEC_H #define DM_VERITY_FEC_H +#include "dm.h" #include "dm-verity.h" #include @@ -48,6 +49,8 @@ struct dm_verity_fec { mempool_t *extra_pool; /* mempool for extra buffers */ mempool_t *output_pool; /* mempool for output */ struct kmem_cache *cache; /* cache for buffers */ + atomic_t corrected; /* corrected errors */ + struct dm_kobject_holder kobj_holder; /* for sysfs attributes */ }; /* per-bio data */ From 02018626853b6dd6dee77d1e1befeaf2d6a8098e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 4 Feb 2015 13:54:48 -0800 Subject: [PATCH 061/797] cpufreq: interactive: fix policy locking cpufreq_interactive_speedchange_task() is running as a separate kernel thread and is calling __cpufreq_driver_target(), which requires callers to hold policy->rwsem for writing to prevent racing with other parts of the kernel trying to adjust the frequency, for example kernel thermal throttling. Let's change the code to take policy->rwsem and while at it refactor the code a bit. This was originally 2 changes reviewed at: https://chromium-review.googlesource.com/246273 https://chromium-review.googlesource.com/256120 Change-Id: Icc2d97c6c1b929acd2ee32e8c81d81fd2af778ab Signed-off-by: Dmitry Torokhov Reviewed-by: Dylan Reid Reviewed-by: Douglas Anderson Signed-off-by: Dmitry Torokhov --- drivers/cpufreq/cpufreq_interactive.c | 99 ++++++++++++++++----------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 0be66df4a6e6..5224e897d460 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -511,6 +511,58 @@ static void cpufreq_interactive_idle_end(void) up_read(&pcpu->enable_sem); } +static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy, + unsigned int *pmax_freq, + u64 *phvt, u64 *pfvt) +{ + struct cpufreq_interactive_cpuinfo *pcpu; + unsigned int max_freq = 0; + u64 hvt = ~0ULL, fvt = 0; + unsigned int i; + + for_each_cpu(i, policy->cpus) { + pcpu = &per_cpu(cpuinfo, i); + + fvt = max(fvt, pcpu->loc_floor_val_time); + if (pcpu->target_freq > max_freq) { + max_freq = pcpu->target_freq; + hvt = pcpu->loc_hispeed_val_time; + } else if (pcpu->target_freq == max_freq) { + hvt = min(hvt, pcpu->loc_hispeed_val_time); + } + } + + *pmax_freq = max_freq; + *phvt = hvt; + *pfvt = fvt; +} + +static void cpufreq_interactive_adjust_cpu(unsigned int cpu, + struct cpufreq_policy *policy) +{ + struct cpufreq_interactive_cpuinfo *pcpu; + u64 hvt, fvt; + unsigned int max_freq; + int i; + + cpufreq_interactive_get_policy_info(policy, &max_freq, &hvt, &fvt); + + for_each_cpu(i, policy->cpus) { + pcpu = &per_cpu(cpuinfo, i); + pcpu->pol_floor_val_time = fvt; + } + + if (max_freq != policy->cur) { + __cpufreq_driver_target(policy, max_freq, CPUFREQ_RELATION_H); + for_each_cpu(i, policy->cpus) { + pcpu = &per_cpu(cpuinfo, i); + pcpu->pol_hispeed_val_time = hvt; + } + } + + trace_cpufreq_interactive_setspeed(cpu, max_freq, policy->cur); +} + static int cpufreq_interactive_speedchange_task(void *data) { unsigned int cpu; @@ -539,49 +591,18 @@ static int cpufreq_interactive_speedchange_task(void *data) spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); for_each_cpu(cpu, &tmp_mask) { - unsigned int j; - unsigned int max_freq = 0; - struct cpufreq_interactive_cpuinfo *pjcpu; - u64 hvt = ~0ULL, fvt = 0; - pcpu = &per_cpu(cpuinfo, cpu); - if (!down_read_trylock(&pcpu->enable_sem)) - continue; - if (!pcpu->governor_enabled) { + + down_write(&pcpu->policy->rwsem); + + if (likely(down_read_trylock(&pcpu->enable_sem))) { + if (likely(pcpu->governor_enabled)) + cpufreq_interactive_adjust_cpu(cpu, + pcpu->policy); up_read(&pcpu->enable_sem); - continue; } - for_each_cpu(j, pcpu->policy->cpus) { - pjcpu = &per_cpu(cpuinfo, j); - - fvt = max(fvt, pjcpu->loc_floor_val_time); - if (pjcpu->target_freq > max_freq) { - max_freq = pjcpu->target_freq; - hvt = pjcpu->loc_hispeed_val_time; - } else if (pjcpu->target_freq == max_freq) { - hvt = min(hvt, pjcpu->loc_hispeed_val_time); - } - } - for_each_cpu(j, pcpu->policy->cpus) { - pjcpu = &per_cpu(cpuinfo, j); - pjcpu->pol_floor_val_time = fvt; - } - - if (max_freq != pcpu->policy->cur) { - __cpufreq_driver_target(pcpu->policy, - max_freq, - CPUFREQ_RELATION_H); - for_each_cpu(j, pcpu->policy->cpus) { - pjcpu = &per_cpu(cpuinfo, j); - pjcpu->pol_hispeed_val_time = hvt; - } - } - trace_cpufreq_interactive_setspeed(cpu, - pcpu->target_freq, - pcpu->policy->cur); - - up_read(&pcpu->enable_sem); + up_write(&pcpu->policy->rwsem); } } From b2d18bdfa38e1f1d13f19aab9fc664b9dd3712b4 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Thu, 28 May 2015 12:08:11 +0800 Subject: [PATCH 062/797] cpufreq: interactive: only apply interactive boost when enabled Only apply the interactive boost when the interactive governor is enabled. This seems like the right thing to do. This was originally reviewed on https://chromium-review.googlesource.com/273501 Change-Id: I5f4a7320683eada099f9a4253e3d6b0f03057fe8 Signed-off-by: Daniel Kurtz Reviewed-by: Douglas Anderson Signed-off-by: Dmitry Torokhov --- drivers/cpufreq/cpufreq_interactive.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 5224e897d460..bd83be39f17b 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -622,9 +622,20 @@ static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunab for_each_online_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - if (tunables != pcpu->policy->governor_data) + + if (!down_read_trylock(&pcpu->enable_sem)) continue; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + continue; + } + + if (tunables != pcpu->policy->governor_data) { + up_read(&pcpu->enable_sem); + continue; + } + spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]); if (pcpu->target_freq < tunables->hispeed_freq) { pcpu->target_freq = tunables->hispeed_freq; @@ -634,6 +645,8 @@ static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunab anyboost = 1; } spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]); + + up_read(&pcpu->enable_sem); } spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]); From 48ad4abaff846bbe5e21a9d18b41c6f606ef84e4 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Thu, 31 Mar 2016 13:21:09 -0700 Subject: [PATCH 063/797] android: base-cfg: Add CONFIG_INET_DIAG_DESTROY Change-Id: I67430b05eca8fd520d2795d3db60faf2ec0fab9e Signed-off-by: Dmitry Shmidt --- android/configs/android-base.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 220ddb5304df..fa53af0c37ad 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -29,6 +29,7 @@ CONFIG_INET6_AH=y CONFIG_INET6_ESP=y CONFIG_INET6_IPCOMP=y CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y CONFIG_INET_ESP=y CONFIG_INET_XFRM_MODE_TUNNEL=y CONFIG_IP6_NF_FILTER=y From 7c328c732fcb44a3eec9e8701610a526b3348e54 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Thu, 28 Jan 2016 11:12:25 -0800 Subject: [PATCH 064/797] ANDROID: mmc: Add CONFIG_MMC_SIMULATE_MAX_SPEED When CONFIG_MMC_SIMULATE_MAX_SPEED is enabled, Expose max_read_speed, max_write_speed and cache_size default module parameters and sysfs controls to simulate a slow eMMC device. Default values are 0 (off), 0 (off) and 4 MB respectively. Signed-off-by: Mark Salyzyn Bug: 26976972 Change-Id: I342bfbd8b85f9b790e3f0e1e4e51a900ae07e05d --- Documentation/block/00-INDEX | 6 + Documentation/block/mmc-max-speed.txt | 38 ++++ drivers/mmc/card/Kconfig | 12 ++ drivers/mmc/card/block.c | 300 ++++++++++++++++++++++++++ drivers/mmc/card/queue.h | 8 + 5 files changed, 364 insertions(+) create mode 100644 Documentation/block/mmc-max-speed.txt diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX index e840b47613f7..bc5148757edb 100644 --- a/Documentation/block/00-INDEX +++ b/Documentation/block/00-INDEX @@ -26,3 +26,9 @@ switching-sched.txt - Switching I/O schedulers at runtime writeback_cache_control.txt - Control of volatile write back caches +mmc-max-speed.txt + - eMMC layer speed simulation, related to /sys/block/mmcblk*/ + attributes: + max_read_speed + max_write_speed + cache_size diff --git a/Documentation/block/mmc-max-speed.txt b/Documentation/block/mmc-max-speed.txt new file mode 100644 index 000000000000..3f052b9fb999 --- /dev/null +++ b/Documentation/block/mmc-max-speed.txt @@ -0,0 +1,38 @@ +eMMC Block layer simulation speed controls in /sys/block/mmcblk*/ +=============================================== + +Turned on with CONFIG_MMC_SIMULATE_MAX_SPEED which enables MMC device speed +limiting. Used to test and simulate the behavior of the system when +confronted with a slow MMC. + +Enables max_read_speed, max_write_speed and cache_size attributes and module +default parameters to control the write or read maximum KB/second speed +behaviors. + +NB: There is room for improving the algorithm for aspects tied directly to +eMMC specific behavior. For instance, wear leveling and stalls from an +exhausted erase pool. We would expect that if there was a need to provide +similar speed simulation controls to other types of block devices, aspects of +their behavior are modelled separately (e.g. head seek times, heat assist, +shingling and rotational latency). + +/sys/block/mmcblk0/max_read_speed: + +Number of KB/second reads allowed to the block device. Used to test and +simulate the behavior of the system when confronted with a slow reading MMC. +Set to 0 or "off" to place no speed limit. + +/sys/block/mmcblk0/max_write_speed: + +Number of KB/second writes allowed to the block device. Used to test and +simulate the behavior of the system when confronted with a slow writing MMC. +Set to 0 or "off" to place no speed limit. + +/sys/block/mmcblk0/cache_size: + +Number of MB of high speed memory or high speed SLC cache expected on the +eMMC device being simulated. Used to help simulate the write-back behavior +more accurately. The assumption is the cache has no delay, but draws down +in the background to the MLC/TLC primary store at the max_write_speed rate. +Any write speed delays will show up when the cache is full, or when an I/O +request to flush is issued. diff --git a/drivers/mmc/card/Kconfig b/drivers/mmc/card/Kconfig index 5562308699bc..6142ec1b9dfb 100644 --- a/drivers/mmc/card/Kconfig +++ b/drivers/mmc/card/Kconfig @@ -68,3 +68,15 @@ config MMC_TEST This driver is only of interest to those developing or testing a host driver. Most people should say N here. + +config MMC_SIMULATE_MAX_SPEED + bool "Turn on maximum speed control per block device" + depends on MMC_BLOCK + help + Say Y here to enable MMC device speed limiting. Used to test and + simulate the behavior of the system when confronted with a slow MMC. + + Enables max_read_speed, max_write_speed and cache_size attributes to + control the write or read maximum KB/second speed behaviors. + + If unsure, say N here. diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index f2ce13ab7ae6..c15d3f380524 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -288,6 +288,250 @@ static ssize_t force_ro_store(struct device *dev, struct device_attribute *attr, return ret; } +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + +static int max_read_speed, max_write_speed, cache_size = 4; + +module_param(max_read_speed, int, S_IRUSR | S_IRGRP); +MODULE_PARM_DESC(max_read_speed, "maximum KB/s read speed 0=off"); +module_param(max_write_speed, int, S_IRUSR | S_IRGRP); +MODULE_PARM_DESC(max_write_speed, "maximum KB/s write speed 0=off"); +module_param(cache_size, int, S_IRUSR | S_IRGRP); +MODULE_PARM_DESC(cache_size, "MB high speed memory or SLC cache"); + +/* + * helper macros and expectations: + * size - unsigned long number of bytes + * jiffies - unsigned long HZ timestamp difference + * speed - unsigned KB/s transfer rate + */ +#define size_and_speed_to_jiffies(size, speed) \ + ((size) * HZ / (speed) / 1024UL) +#define jiffies_and_speed_to_size(jiffies, speed) \ + (((speed) * (jiffies) * 1024UL) / HZ) +#define jiffies_and_size_to_speed(jiffies, size) \ + ((size) * HZ / (jiffies) / 1024UL) + +/* Limits to report warning */ +/* jiffies_and_size_to_speed(10*HZ, queue_max_hw_sectors(q) * 512UL) ~ 25 */ +#define MIN_SPEED(q) 250 /* 10 times faster than a floppy disk */ +#define MAX_SPEED(q) jiffies_and_size_to_speed(1, queue_max_sectors(q) * 512UL) + +#define speed_valid(speed) ((speed) > 0) + +static const char off[] = "off\n"; + +static int max_speed_show(int speed, char *buf) +{ + if (speed) + return scnprintf(buf, PAGE_SIZE, "%uKB/s\n", speed); + else + return scnprintf(buf, PAGE_SIZE, off); +} + +static int max_speed_store(const char *buf, struct request_queue *q) +{ + unsigned int limit, set = 0; + + if (!strncasecmp(off, buf, sizeof(off) - 2)) + return set; + if (kstrtouint(buf, 0, &set) || (set > INT_MAX)) + return -EINVAL; + if (set == 0) + return set; + limit = MAX_SPEED(q); + if (set > limit) + pr_warn("max speed %u ineffective above %u\n", set, limit); + limit = MIN_SPEED(q); + if (set < limit) + pr_warn("max speed %u painful below %u\n", set, limit); + return set; +} + +static ssize_t max_write_speed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + int ret = max_speed_show(atomic_read(&md->queue.max_write_speed), buf); + + mmc_blk_put(md); + return ret; +} + +static ssize_t max_write_speed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + int set = max_speed_store(buf, md->queue.queue); + + if (set < 0) { + mmc_blk_put(md); + return set; + } + + atomic_set(&md->queue.max_write_speed, set); + mmc_blk_put(md); + return count; +} + +static const DEVICE_ATTR(max_write_speed, S_IRUGO | S_IWUSR, + max_write_speed_show, max_write_speed_store); + +static ssize_t max_read_speed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + int ret = max_speed_show(atomic_read(&md->queue.max_read_speed), buf); + + mmc_blk_put(md); + return ret; +} + +static ssize_t max_read_speed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + int set = max_speed_store(buf, md->queue.queue); + + if (set < 0) { + mmc_blk_put(md); + return set; + } + + atomic_set(&md->queue.max_read_speed, set); + mmc_blk_put(md); + return count; +} + +static const DEVICE_ATTR(max_read_speed, S_IRUGO | S_IWUSR, + max_read_speed_show, max_read_speed_store); + +static ssize_t cache_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + struct mmc_queue *mq = &md->queue; + int cache_size = atomic_read(&mq->cache_size); + int ret; + + if (!cache_size) + ret = scnprintf(buf, PAGE_SIZE, off); + else { + int speed = atomic_read(&mq->max_write_speed); + + if (!speed_valid(speed)) + ret = scnprintf(buf, PAGE_SIZE, "%uMB\n", cache_size); + else { /* We accept race between cache_jiffies and cache_used */ + unsigned long size = jiffies_and_speed_to_size( + jiffies - mq->cache_jiffies, speed); + long used = atomic_long_read(&mq->cache_used); + + if (size >= used) + size = 0; + else + size = (used - size) * 100 / cache_size + / 1024UL / 1024UL; + + ret = scnprintf(buf, PAGE_SIZE, "%uMB %lu%% used\n", + cache_size, size); + } + } + + mmc_blk_put(md); + return ret; +} + +static ssize_t cache_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mmc_blk_data *md; + unsigned int set = 0; + + if (strncasecmp(off, buf, sizeof(off) - 2) + && (kstrtouint(buf, 0, &set) || (set > INT_MAX))) + return -EINVAL; + + md = mmc_blk_get(dev_to_disk(dev)); + atomic_set(&md->queue.cache_size, set); + mmc_blk_put(md); + return count; +} + +static const DEVICE_ATTR(cache_size, S_IRUGO | S_IWUSR, + cache_size_show, cache_size_store); + +/* correct for write-back */ +static long mmc_blk_cache_used(struct mmc_queue *mq, unsigned long waitfor) +{ + long used = 0; + int speed = atomic_read(&mq->max_write_speed); + + if (speed_valid(speed)) { + unsigned long size = jiffies_and_speed_to_size( + waitfor - mq->cache_jiffies, speed); + used = atomic_long_read(&mq->cache_used); + + if (size >= used) + used = 0; + else + used -= size; + } + + atomic_long_set(&mq->cache_used, used); + mq->cache_jiffies = waitfor; + + return used; +} + +static void mmc_blk_simulate_delay( + struct mmc_queue *mq, + struct request *req, + unsigned long waitfor) +{ + int max_speed; + + if (!req) + return; + + max_speed = (rq_data_dir(req) == READ) + ? atomic_read(&mq->max_read_speed) + : atomic_read(&mq->max_write_speed); + if (speed_valid(max_speed)) { + unsigned long bytes = blk_rq_bytes(req); + + if (rq_data_dir(req) != READ) { + int cache_size = atomic_read(&mq->cache_size); + + if (cache_size) { + unsigned long size = cache_size * 1024L * 1024L; + long used = mmc_blk_cache_used(mq, waitfor); + + used += bytes; + atomic_long_set(&mq->cache_used, used); + bytes = 0; + if (used > size) + bytes = used - size; + } + } + waitfor += size_and_speed_to_jiffies(bytes, max_speed); + if (time_is_after_jiffies(waitfor)) { + long msecs = jiffies_to_msecs(waitfor - jiffies); + + if (likely(msecs > 0)) + msleep(msecs); + } + } +} + +#else + +#define mmc_blk_simulate_delay(mq, req, waitfor) + +#endif + static int mmc_blk_open(struct block_device *bdev, fmode_t mode) { struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk); @@ -1264,6 +1508,23 @@ static int mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req) if (ret) ret = -EIO; +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + else if (atomic_read(&mq->cache_size)) { + long used = mmc_blk_cache_used(mq, jiffies); + + if (used) { + int speed = atomic_read(&mq->max_write_speed); + + if (speed_valid(speed)) { + unsigned long msecs = jiffies_to_msecs( + size_and_speed_to_jiffies( + used, speed)); + if (msecs) + msleep(msecs); + } + } + } +#endif blk_end_request_all(req, ret); return ret ? 0 : 1; @@ -1943,6 +2204,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) struct mmc_async_req *areq; const u8 packed_nr = 2; u8 reqs = 0; +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + unsigned long waitfor = jiffies; +#endif if (!rqc && !mq->mqrq_prev->req) return 0; @@ -1993,6 +2257,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) */ mmc_blk_reset_success(md, type); + mmc_blk_simulate_delay(mq, rqc, waitfor); + if (mmc_packed_cmd(mq_rq->cmd_type)) { ret = mmc_blk_end_packed_req(mq_rq); break; @@ -2411,6 +2677,14 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md) card->ext_csd.boot_ro_lockable) device_remove_file(disk_to_dev(md->disk), &md->power_ro_lock); +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + device_remove_file(disk_to_dev(md->disk), + &dev_attr_max_write_speed); + device_remove_file(disk_to_dev(md->disk), + &dev_attr_max_read_speed); + device_remove_file(disk_to_dev(md->disk), + &dev_attr_cache_size); +#endif del_gendisk(md->disk); } @@ -2446,6 +2720,24 @@ static int mmc_add_disk(struct mmc_blk_data *md) ret = device_create_file(disk_to_dev(md->disk), &md->force_ro); if (ret) goto force_ro_fail; +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + atomic_set(&md->queue.max_write_speed, max_write_speed); + ret = device_create_file(disk_to_dev(md->disk), + &dev_attr_max_write_speed); + if (ret) + goto max_write_speed_fail; + atomic_set(&md->queue.max_read_speed, max_read_speed); + ret = device_create_file(disk_to_dev(md->disk), + &dev_attr_max_read_speed); + if (ret) + goto max_read_speed_fail; + atomic_set(&md->queue.cache_size, cache_size); + atomic_long_set(&md->queue.cache_used, 0); + md->queue.cache_jiffies = jiffies; + ret = device_create_file(disk_to_dev(md->disk), &dev_attr_cache_size); + if (ret) + goto cache_size_fail; +#endif if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) && card->ext_csd.boot_ro_lockable) { @@ -2470,6 +2762,14 @@ static int mmc_add_disk(struct mmc_blk_data *md) return ret; power_ro_lock_fail: +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + device_remove_file(disk_to_dev(md->disk), &dev_attr_cache_size); +cache_size_fail: + device_remove_file(disk_to_dev(md->disk), &dev_attr_max_read_speed); +max_read_speed_fail: + device_remove_file(disk_to_dev(md->disk), &dev_attr_max_write_speed); +max_write_speed_fail: +#endif device_remove_file(disk_to_dev(md->disk), &md->force_ro); force_ro_fail: del_gendisk(md->disk); diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h index 36cddab57d77..d890d8832e21 100644 --- a/drivers/mmc/card/queue.h +++ b/drivers/mmc/card/queue.h @@ -58,6 +58,14 @@ struct mmc_queue { struct mmc_queue_req mqrq[2]; struct mmc_queue_req *mqrq_cur; struct mmc_queue_req *mqrq_prev; +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + atomic_t max_write_speed; + atomic_t max_read_speed; + atomic_t cache_size; + /* i/o tracking */ + atomic_long_t cache_used; + unsigned long cache_jiffies; +#endif }; extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *, From 0871a32c4d1b28aa266be3f33f2068a193196ac7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 11 Nov 2015 07:59:01 +0530 Subject: [PATCH 065/797] PM / OPP: Add debugfs support This patch adds debugfs support to OPP layer to export OPPs and their properties for all the devices. This creates a top level directory: /sys/kernel/debug/opp and then device specific directories (based on device names) inside it. For example: 'cpu0', 'cpu1', etc.. If multiple devices share the OPP table, then the real directory is created only for the first device. For all others, links are created to the real directory. Inside the device specific directory, a separate directory is created for each OPP. And within that files per opp property. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit deaa51465105a7eda19a627b10372f4f7c51a4df) Signed-off-by: Alex Shi --- drivers/base/power/opp/Makefile | 1 + drivers/base/power/opp/core.c | 21 ++- drivers/base/power/opp/debugfs.c | 219 +++++++++++++++++++++++++++++++ drivers/base/power/opp/opp.h | 42 ++++++ 4 files changed, 281 insertions(+), 2 deletions(-) create mode 100644 drivers/base/power/opp/debugfs.c diff --git a/drivers/base/power/opp/Makefile b/drivers/base/power/opp/Makefile index 33c1e18c41a4..19837ef04d8e 100644 --- a/drivers/base/power/opp/Makefile +++ b/drivers/base/power/opp/Makefile @@ -1,2 +1,3 @@ ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG obj-y += core.o cpu.o +obj-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index b8e76f75073b..6aa172be6e8e 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -463,6 +463,7 @@ static void _kfree_list_dev_rcu(struct rcu_head *head) static void _remove_list_dev(struct device_list_opp *list_dev, struct device_opp *dev_opp) { + opp_debug_unregister(list_dev, dev_opp); list_del(&list_dev->node); call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head, _kfree_list_dev_rcu); @@ -472,6 +473,7 @@ struct device_list_opp *_add_list_dev(const struct device *dev, struct device_opp *dev_opp) { struct device_list_opp *list_dev; + int ret; list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL); if (!list_dev) @@ -481,6 +483,12 @@ struct device_list_opp *_add_list_dev(const struct device *dev, list_dev->dev = dev; list_add_rcu(&list_dev->node, &dev_opp->dev_list); + /* Create debugfs entries for the dev_opp */ + ret = opp_debug_register(list_dev, dev_opp); + if (ret) + dev_err(dev, "%s: Failed to register opp debugfs (%d)\n", + __func__, ret); + return list_dev; } @@ -596,6 +604,7 @@ static void _opp_remove(struct device_opp *dev_opp, */ if (notify) srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp); + opp_debug_remove_one(opp); list_del_rcu(&opp->node); call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); @@ -673,6 +682,7 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, { struct dev_pm_opp *opp; struct list_head *head = &dev_opp->opp_list; + int ret; /* * Insert new OPP in order of increasing frequency and discard if @@ -703,6 +713,11 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, new_opp->dev_opp = dev_opp; list_add_rcu(&new_opp->node, head); + ret = opp_debug_create_one(new_opp, dev_opp); + if (ret) + dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n", + __func__, ret); + return 0; } @@ -889,12 +904,14 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) /* OPP to select on device suspend */ if (of_property_read_bool(np, "opp-suspend")) { - if (dev_opp->suspend_opp) + if (dev_opp->suspend_opp) { dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n", __func__, dev_opp->suspend_opp->rate, new_opp->rate); - else + } else { + new_opp->suspend = true; dev_opp->suspend_opp = new_opp; + } } if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max) diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c new file mode 100644 index 000000000000..ddfe4773e922 --- /dev/null +++ b/drivers/base/power/opp/debugfs.c @@ -0,0 +1,219 @@ +/* + * Generic OPP debugfs interface + * + * Copyright (C) 2015-2016 Viresh Kumar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include "opp.h" + +static struct dentry *rootdir; + +static void opp_set_dev_name(const struct device *dev, char *name) +{ + if (dev->parent) + snprintf(name, NAME_MAX, "%s-%s", dev_name(dev->parent), + dev_name(dev)); + else + snprintf(name, NAME_MAX, "%s", dev_name(dev)); +} + +void opp_debug_remove_one(struct dev_pm_opp *opp) +{ + debugfs_remove_recursive(opp->dentry); +} + +int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp) +{ + struct dentry *pdentry = dev_opp->dentry; + struct dentry *d; + char name[25]; /* 20 chars for 64 bit value + 5 (opp:\0) */ + + /* Rate is unique to each OPP, use it to give opp-name */ + snprintf(name, sizeof(name), "opp:%lu", opp->rate); + + /* Create per-opp directory */ + d = debugfs_create_dir(name, pdentry); + if (!d) + return -ENOMEM; + + if (!debugfs_create_bool("available", S_IRUGO, d, &opp->available)) + return -ENOMEM; + + if (!debugfs_create_bool("dynamic", S_IRUGO, d, &opp->dynamic)) + return -ENOMEM; + + if (!debugfs_create_bool("turbo", S_IRUGO, d, &opp->turbo)) + return -ENOMEM; + + if (!debugfs_create_bool("suspend", S_IRUGO, d, &opp->suspend)) + return -ENOMEM; + + if (!debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate)) + return -ENOMEM; + + if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d, &opp->u_volt)) + return -ENOMEM; + + if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d, &opp->u_volt_min)) + return -ENOMEM; + + if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d, &opp->u_volt_max)) + return -ENOMEM; + + if (!debugfs_create_ulong("u_amp", S_IRUGO, d, &opp->u_amp)) + return -ENOMEM; + + if (!debugfs_create_ulong("clock_latency_ns", S_IRUGO, d, + &opp->clock_latency_ns)) + return -ENOMEM; + + opp->dentry = d; + return 0; +} + +static int device_opp_debug_create_dir(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ + const struct device *dev = list_dev->dev; + struct dentry *d; + + opp_set_dev_name(dev, dev_opp->dentry_name); + + /* Create device specific directory */ + d = debugfs_create_dir(dev_opp->dentry_name, rootdir); + if (!d) { + dev_err(dev, "%s: Failed to create debugfs dir\n", __func__); + return -ENOMEM; + } + + list_dev->dentry = d; + dev_opp->dentry = d; + + return 0; +} + +static int device_opp_debug_create_link(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ + const struct device *dev = list_dev->dev; + char name[NAME_MAX]; + struct dentry *d; + + opp_set_dev_name(list_dev->dev, name); + + /* Create device specific directory link */ + d = debugfs_create_symlink(name, rootdir, dev_opp->dentry_name); + if (!d) { + dev_err(dev, "%s: Failed to create link\n", __func__); + return -ENOMEM; + } + + list_dev->dentry = d; + + return 0; +} + +/** + * opp_debug_register - add a device opp node to the debugfs 'opp' directory + * @list_dev: list-dev pointer for device + * @dev_opp: the device-opp being added + * + * Dynamically adds device specific directory in debugfs 'opp' directory. If the + * device-opp is shared with other devices, then links will be created for all + * devices except the first. + * + * Return: 0 on success, otherwise negative error. + */ +int opp_debug_register(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ + if (!rootdir) { + pr_debug("%s: Uninitialized rootdir\n", __func__); + return -EINVAL; + } + + if (dev_opp->dentry) + return device_opp_debug_create_link(list_dev, dev_opp); + + return device_opp_debug_create_dir(list_dev, dev_opp); +} + +static void opp_migrate_dentry(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ + struct device_list_opp *new_dev; + const struct device *dev; + struct dentry *dentry; + + /* Look for next list-dev */ + list_for_each_entry(new_dev, &dev_opp->dev_list, node) + if (new_dev != list_dev) + break; + + /* new_dev is guaranteed to be valid here */ + dev = new_dev->dev; + debugfs_remove_recursive(new_dev->dentry); + + opp_set_dev_name(dev, dev_opp->dentry_name); + + dentry = debugfs_rename(rootdir, list_dev->dentry, rootdir, + dev_opp->dentry_name); + if (!dentry) { + dev_err(dev, "%s: Failed to rename link from: %s to %s\n", + __func__, dev_name(list_dev->dev), dev_name(dev)); + return; + } + + new_dev->dentry = dentry; + dev_opp->dentry = dentry; +} + +/** + * opp_debug_unregister - remove a device opp node from debugfs opp directory + * @list_dev: list-dev pointer for device + * @dev_opp: the device-opp being removed + * + * Dynamically removes device specific directory from debugfs 'opp' directory. + */ +void opp_debug_unregister(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ + if (list_dev->dentry == dev_opp->dentry) { + /* Move the real dentry object under another device */ + if (!list_is_singular(&dev_opp->dev_list)) { + opp_migrate_dentry(list_dev, dev_opp); + goto out; + } + dev_opp->dentry = NULL; + } + + debugfs_remove_recursive(list_dev->dentry); + +out: + list_dev->dentry = NULL; +} + +static int __init opp_debug_init(void) +{ + /* Create /sys/kernel/debug/opp directory */ + rootdir = debugfs_create_dir("opp", NULL); + if (!rootdir) { + pr_err("%s: Failed to create root directory\n", __func__); + return -ENOMEM; + } + + return 0; +} +core_initcall(opp_debug_init); diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 7366b2aa8997..a6bd8d2c2b47 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,7 @@ extern struct mutex dev_opp_list_lock; * @dynamic: not-created from static DT entries. * @available: true/false - marks if this OPP as available or not * @turbo: true if turbo (boost) OPP + * @suspend: true if suspend OPP * @rate: Frequency in hertz * @u_volt: Target voltage in microvolts corresponding to this OPP * @u_volt_min: Minimum voltage in microvolts corresponding to this OPP @@ -63,6 +65,7 @@ extern struct mutex dev_opp_list_lock; * @dev_opp: points back to the device_opp struct this opp belongs to * @rcu_head: RCU callback head used for deferred freeing * @np: OPP's device node. + * @dentry: debugfs dentry pointer (per opp) * * This structure stores the OPP information for a given device. */ @@ -72,6 +75,7 @@ struct dev_pm_opp { bool available; bool dynamic; bool turbo; + bool suspend; unsigned long rate; unsigned long u_volt; @@ -84,6 +88,10 @@ struct dev_pm_opp { struct rcu_head rcu_head; struct device_node *np; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dentry; +#endif }; /** @@ -91,6 +99,7 @@ struct dev_pm_opp { * @node: list node * @dev: device to which the struct object belongs * @rcu_head: RCU callback head used for deferred freeing + * @dentry: debugfs dentry pointer (per device) * * This is an internal data structure maintaining the list of devices that are * managed by 'struct device_opp'. @@ -99,6 +108,10 @@ struct device_list_opp { struct list_head node; const struct device *dev; struct rcu_head rcu_head; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dentry; +#endif }; /** @@ -114,6 +127,8 @@ struct device_list_opp { * @opp_list: list of opps * @np: struct device_node pointer for opp's DT node. * @shared_opp: OPP is shared between multiple devices. + * @dentry: debugfs dentry pointer of the real device directory (not links). + * @dentry_name: Name of the real dentry. * * This is an internal data structure maintaining the link to opps attached to * a device. This structure is not meant to be shared to users as it is @@ -135,6 +150,11 @@ struct device_opp { unsigned long clock_latency_ns_max; bool shared_opp; struct dev_pm_opp *suspend_opp; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dentry; + char dentry_name[NAME_MAX]; +#endif }; /* Routines internal to opp core */ @@ -143,4 +163,26 @@ struct device_list_opp *_add_list_dev(const struct device *dev, struct device_opp *dev_opp); struct device_node *_of_get_opp_desc_node(struct device *dev); +#ifdef CONFIG_DEBUG_FS +void opp_debug_remove_one(struct dev_pm_opp *opp); +int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp); +int opp_debug_register(struct device_list_opp *list_dev, + struct device_opp *dev_opp); +void opp_debug_unregister(struct device_list_opp *list_dev, + struct device_opp *dev_opp); +#else +static inline void opp_debug_remove_one(struct dev_pm_opp *opp) {} + +static inline int opp_debug_create_one(struct dev_pm_opp *opp, + struct device_opp *dev_opp) +{ return 0; } +static inline int opp_debug_register(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ return 0; } + +static inline void opp_debug_unregister(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ } +#endif /* DEBUG_FS */ + #endif /* __DRIVER_OPP_H__ */ From 10f4691214f81fad8e5ed2273ac72c1b9a7e22cf Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 11 Nov 2015 08:10:54 +0530 Subject: [PATCH 066/797] PM / OPP: Add "opp-supported-hw" binding We may want to enable only a subset of OPPs, from the bigger list of OPPs, based on what version of the hardware we are running on. This would enable us to not duplicate OPP tables for every version of the hardware we support. To enable that, this patch defines a new property 'opp-supported-hw'. It can support any number of hierarchy levels of the versions the hardware follows. And based on the selected hardware versions, we can pick only the relevant OPPs at runtime. Reviewed-by: Stephen Boyd Acked-by: Rob Herring Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit 1c4d12de2719dfdf27c6dab31e7a5641ee293c94) Signed-off-by: Alex Shi --- Documentation/devicetree/bindings/opp/opp.txt | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 0cb44dc21f97..d072fa0ffbd4 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -123,6 +123,26 @@ Optional properties: - opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in the table should have this. +- opp-supported-hw: This enables us to select only a subset of OPPs from the + larger OPP table, based on what version of the hardware we are running on. We + still can't have multiple nodes with the same opp-hz value in OPP table. + + It's an user defined array containing a hierarchy of hardware version numbers, + supported by the OPP. For example: a platform with hierarchy of three levels + of versions (A, B and C), this field should be like , where X + corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z + corresponds to version hierarchy C. + + Each level of hierarchy is represented by a 32 bit value, and so there can be + only 32 different supported version per hierarchy. i.e. 1 bit per version. A + value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy + level. And a value of 0x00000000 will disable the OPP completely, and so we + never want that to happen. + + If 32 values aren't sufficient for a version hierarchy, than that version + hierarchy can be contained in multiple 32 bit values. i.e. in the + above example, Z1 & Z2 refer to the version hierarchy Z. + - status: Marks the node enabled/disabled. Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together. @@ -463,3 +483,48 @@ Example 5: Multiple OPP tables }; }; }; + +Example 6: opp-supported-hw +(example: three level hierarchy of versions: cuts, substrate and process) + +/ { + cpus { + cpu@0 { + compatible = "arm,cortex-a7"; + ... + + cpu-supply = <&cpu_supply> + operating-points-v2 = <&cpu0_opp_table_slow>; + }; + }; + + opp_table { + compatible = "operating-points-v2"; + status = "okay"; + opp-shared; + + opp00 { + /* + * Supports all substrate and process versions for 0xF + * cuts, i.e. only first four cuts. + */ + opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF> + opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <900000 915000 925000>; + ... + }; + + opp01 { + /* + * Supports: + * - cuts: only one, 6th cut (represented by 6th bit). + * - substrate: supports 16 different substrate versions + * - process: supports 9 different process versions + */ + opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0> + opp-hz = /bits/ 64 <800000000>; + opp-microvolt = <900000 915000 925000>; + ... + }; + }; +}; From 2470ae172511e862b67f3a3eaa5586f16da30741 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 11 Nov 2015 08:10:55 +0530 Subject: [PATCH 067/797] PM / OPP: Add {opp-microvolt|opp-microamp}- binding Depending on the version of hardware or its properties, which are only known at runtime, various properties of the OPP can change. For example, an OPP with frequency 1.2 GHz, may have different voltage/current requirements based on the version of the hardware it is running on. In order to not replicate the same OPP tables for varying values of all such fields, this commit introduces the concept of opp-property-. The can be chosen by the platform at runtime, and OPPs will be initialized depending on that name string. Currently support is extended for the following properties: - opp-microvolt- - opp-microamp- If the name string isn't provided by the platform, or if it is provided but doesn't match the properties present in the OPP node, we will fall back to the original properties without the - string, if they are available. Reviewed-by: Stephen Boyd Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit ffdb8cc7a27c89175e541e68e2a73f1f63ab8c6b) Signed-off-by: Alex Shi --- Documentation/devicetree/bindings/opp/opp.txt | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index d072fa0ffbd4..a3e7f0d5e1fb 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -100,6 +100,14 @@ Optional properties: Entries for multiple regulators must be present in the same order as regulators are specified in device's DT node. +- opp-microvolt-: Named opp-microvolt property. This is exactly similar to + the above opp-microvolt property, but allows multiple voltage ranges to be + provided for the same OPP. At runtime, the platform can pick a and + matching opp-microvolt- property will be enabled for all OPPs. If the + platform doesn't pick a specific or the doesn't match with any + opp-microvolt- properties, then opp-microvolt property shall be used, if + present. + - opp-microamp: The maximum current drawn by the device in microamperes considering system specific parameters (such as transients, process, aging, maximum operating temperature range etc.) as necessary. This may be used to @@ -112,6 +120,9 @@ Optional properties: for few regulators, then this should be marked as zero for them. If it isn't required for any regulator, then this property need not be present. +- opp-microamp-: Named opp-microamp property. Similar to + opp-microvolt- property, but for microamp instead. + - clock-latency-ns: Specifies the maximum possible transition latency (in nanoseconds) for switching to this OPP from any other OPP. @@ -528,3 +539,39 @@ Example 6: opp-supported-hw }; }; }; + +Example 7: opp-microvolt-, opp-microamp-: +(example: device with two possible microvolt ranges: slow and fast) + +/ { + cpus { + cpu@0 { + compatible = "arm,cortex-a7"; + ... + + operating-points-v2 = <&cpu0_opp_table>; + }; + }; + + cpu0_opp_table: opp_table0 { + compatible = "operating-points-v2"; + opp-shared; + + opp00 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt-slow = <900000 915000 925000>; + opp-microvolt-fast = <970000 975000 985000>; + opp-microamp-slow = <70000>; + opp-microamp-fast = <71000>; + }; + + opp01 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt-slow = <900000 915000 925000>, /* Supply vcc0 */ + <910000 925000 935000>; /* Supply vcc1 */ + opp-microvolt-fast = <970000 975000 985000>, /* Supply vcc0 */ + <960000 965000 975000>; /* Supply vcc1 */ + opp-microamp = <70000>; /* Will be used for both slow/fast */ + }; + }; +}; From 2e86d07d460dbb7949fceffdac7845395d22b4d6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 11 Nov 2015 08:10:56 +0530 Subject: [PATCH 068/797] PM / OPP: Remove 'operating-points-names' binding These aren't used until now by any DT files and wouldn't be used now as we have a better scheme in place now, i.e. opp-property- properties. Remove the (useless) binding without breaking ABI. Reviewed-by: Stephen Boyd Acked-by: Rob Herring Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit af87a39a5f7cf6ef252b1aec3e2e6508a40e51f1) Signed-off-by: Alex Shi --- Documentation/devicetree/bindings/opp/opp.txt | 62 +------------------ 1 file changed, 2 insertions(+), 60 deletions(-) diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index a3e7f0d5e1fb..24eac9a97749 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -45,21 +45,10 @@ Devices supporting OPPs must set their "operating-points-v2" property with phandle to a OPP table in their DT node. The OPP core will use this phandle to find the operating points for the device. -Devices may want to choose OPP tables at runtime and so can provide a list of -phandles here. But only *one* of them should be chosen at runtime. This must be -accompanied by a corresponding "operating-points-names" property, to uniquely -identify the OPP tables. - If required, this can be extended for SoC vendor specfic bindings. Such bindings should be documented as Documentation/devicetree/bindings/power/-opp.txt and should have a compatible description like: "operating-points-v2-". -Optional properties: -- operating-points-names: Names of OPP tables (required if multiple OPP - tables are present), to uniquely identify them. The same list must be present - for all the CPUs which are sharing clock/voltage rails and hence the OPP - tables. - * OPP Table Node This describes the OPPs belonging to a device. This node can have following @@ -448,54 +437,7 @@ Example 4: Handling multiple regulators }; }; -Example 5: Multiple OPP tables - -/ { - cpus { - cpu@0 { - compatible = "arm,cortex-a7"; - ... - - cpu-supply = <&cpu_supply> - operating-points-v2 = <&cpu0_opp_table_slow>, <&cpu0_opp_table_fast>; - operating-points-names = "slow", "fast"; - }; - }; - - cpu0_opp_table_slow: opp_table_slow { - compatible = "operating-points-v2"; - status = "okay"; - opp-shared; - - opp00 { - opp-hz = /bits/ 64 <600000000>; - ... - }; - - opp01 { - opp-hz = /bits/ 64 <800000000>; - ... - }; - }; - - cpu0_opp_table_fast: opp_table_fast { - compatible = "operating-points-v2"; - status = "okay"; - opp-shared; - - opp10 { - opp-hz = /bits/ 64 <1000000000>; - ... - }; - - opp11 { - opp-hz = /bits/ 64 <1100000000>; - ... - }; - }; -}; - -Example 6: opp-supported-hw +Example 5: opp-supported-hw (example: three level hierarchy of versions: cuts, substrate and process) / { @@ -540,7 +482,7 @@ Example 6: opp-supported-hw }; }; -Example 7: opp-microvolt-, opp-microamp-: +Example 6: opp-microvolt-, opp-microamp-: (example: device with two possible microvolt ranges: slow and fast) / { From 068d12e6dda9460ac0be82d25b9ec2dccbf7f197 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 11 Nov 2015 08:10:57 +0530 Subject: [PATCH 069/797] PM / OPP: Rename OPP nodes as opp@ It would be better to name OPP nodes as opp@ as that will ensure that multiple DT nodes don't contain the same frequency. Of course we expect the writer to name the node with its opp-hz frequency and not any other frequency. And that will let the compile error out if multiple nodes are using the same opp-hz frequency. Suggested-by: Stephen Boyd Reviewed-by: Stephen Boyd Acked-by: Rob Herring Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit 754dcf35f34698661801ae1d391efa02affe83a7) Signed-off-by: Alex Shi --- Documentation/devicetree/bindings/opp/opp.txt | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 24eac9a97749..601256fe8c0d 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -177,20 +177,20 @@ Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together. compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp01 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <980000 1000000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp02 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; clock-latency-ns = <290000>; @@ -256,20 +256,20 @@ independently. * independently. */ - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp01 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <980000 1000000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp02 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; opp-microamp = <90000; @@ -332,20 +332,20 @@ DVFS state together. compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp01 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <980000 1000000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp02 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; opp-microamp = <90000>; @@ -358,20 +358,20 @@ DVFS state together. compatible = "operating-points-v2"; opp-shared; - opp10 { + opp@1300000000 { opp-hz = /bits/ 64 <1300000000>; opp-microvolt = <1045000 1050000 1055000>; opp-microamp = <95000>; clock-latency-ns = <400000>; opp-suspend; }; - opp11 { + opp@1400000000 { opp-hz = /bits/ 64 <1400000000>; opp-microvolt = <1075000>; opp-microamp = <100000>; clock-latency-ns = <400000>; }; - opp12 { + opp@1500000000 { opp-hz = /bits/ 64 <1500000000>; opp-microvolt = <1010000 1100000 1110000>; opp-microamp = <95000>; @@ -398,7 +398,7 @@ Example 4: Handling multiple regulators compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000>, /* Supply 0 */ <960000>, /* Supply 1 */ @@ -411,7 +411,7 @@ Example 4: Handling multiple regulators /* OR */ - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>, /* Supply 0 */ <960000 965000 975000>, /* Supply 1 */ @@ -424,7 +424,7 @@ Example 4: Handling multiple regulators /* OR */ - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>, /* Supply 0 */ <960000 965000 975000>, /* Supply 1 */ @@ -456,7 +456,7 @@ Example 5: opp-supported-hw status = "okay"; opp-shared; - opp00 { + opp@600000000 { /* * Supports all substrate and process versions for 0xF * cuts, i.e. only first four cuts. @@ -467,7 +467,7 @@ Example 5: opp-supported-hw ... }; - opp01 { + opp@800000000 { /* * Supports: * - cuts: only one, 6th cut (represented by 6th bit). @@ -499,7 +499,7 @@ Example 6: opp-microvolt-, opp-microamp-: compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt-slow = <900000 915000 925000>; opp-microvolt-fast = <970000 975000 985000>; @@ -507,7 +507,7 @@ Example 6: opp-microvolt-, opp-microamp-: opp-microamp-fast = <71000>; }; - opp01 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt-slow = <900000 915000 925000>, /* Supply vcc0 */ <910000 925000 935000>; /* Supply vcc1 */ From d03b18f790c87a0ab9385ba33ba4f706630cc198 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 19 Nov 2015 09:13:56 +0530 Subject: [PATCH 070/797] PM / OPP: Add missing doc comments Few doc-style comments were missing, add them. Rearrange another one to match the sequence within the structure. Signed-off-by: Viresh Kumar Acked-by: Pavel Machek Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit dc4e7b1fa20a840d2317fcfdaa1064fc09d2afcb) Signed-off-by: Alex Shi --- drivers/base/power/opp/opp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index a6bd8d2c2b47..b8880c7f8be1 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -51,8 +51,8 @@ extern struct mutex dev_opp_list_lock; * are protected by the dev_opp_list_lock for integrity. * IMPORTANT: the opp nodes should be maintained in increasing * order. - * @dynamic: not-created from static DT entries. * @available: true/false - marks if this OPP as available or not + * @dynamic: not-created from static DT entries. * @turbo: true if turbo (boost) OPP * @suspend: true if suspend OPP * @rate: Frequency in hertz @@ -126,7 +126,9 @@ struct device_list_opp { * @dev_list: list of devices that share these OPPs * @opp_list: list of opps * @np: struct device_node pointer for opp's DT node. + * @clock_latency_ns_max: Max clock latency in nanoseconds. * @shared_opp: OPP is shared between multiple devices. + * @suspend_opp: Pointer to OPP to be used during device suspend. * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * From ae6a227802225358803c9749a703d11741229c86 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 9 Dec 2015 08:01:46 +0530 Subject: [PATCH 071/797] PM / OPP: Parse 'opp-supported-hw' binding OPP bindings allow a platform to enable OPPs based on the version of the hardware they are used for. Add support to the OPP-core to parse these bindings, by introducing dev_pm_opp_{set|put}_supported_hw() APIs. Signed-off-by: Viresh Kumar Tested-by: Lee Jones Signed-off-by: Rafael J. Wysocki (cherry picked from commit 7de36b0aa51a5a59e28fb2da768fa3ab07de0674) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 148 ++++++++++++++++++++++++++++++++++ drivers/base/power/opp/opp.h | 5 ++ include/linux/pm_opp.h | 13 +++ 3 files changed, 166 insertions(+) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 6aa172be6e8e..55cf1a99b532 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -559,6 +559,9 @@ static void _remove_device_opp(struct device_opp *dev_opp) if (!list_empty(&dev_opp->opp_list)) return; + if (dev_opp->supported_hw) + return; + list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp, node); @@ -833,6 +836,145 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev) return 0; } +/** + * dev_pm_opp_set_supported_hw() - Set supported platforms + * @dev: Device for which supported-hw has to be set. + * @versions: Array of hierarchy of versions to match. + * @count: Number of elements in the array. + * + * This is required only for the V2 bindings, and it enables a platform to + * specify the hierarchy of versions it supports. OPP layer will then enable + * OPPs, which are available for those versions, based on its 'opp-supported-hw' + * property. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, + unsigned int count) +{ + struct device_opp *dev_opp; + int ret = 0; + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + dev_opp = _add_device_opp(dev); + if (!dev_opp) { + ret = -ENOMEM; + goto unlock; + } + + /* Make sure there are no concurrent readers while updating dev_opp */ + WARN_ON(!list_empty(&dev_opp->opp_list)); + + /* Do we already have a version hierarchy associated with dev_opp? */ + if (dev_opp->supported_hw) { + dev_err(dev, "%s: Already have supported hardware list\n", + __func__); + ret = -EBUSY; + goto err; + } + + dev_opp->supported_hw = kmemdup(versions, count * sizeof(*versions), + GFP_KERNEL); + if (!dev_opp->supported_hw) { + ret = -ENOMEM; + goto err; + } + + dev_opp->supported_hw_count = count; + mutex_unlock(&dev_opp_list_lock); + return 0; + +err: + _remove_device_opp(dev_opp); +unlock: + mutex_unlock(&dev_opp_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw); + +/** + * dev_pm_opp_put_supported_hw() - Releases resources blocked for supported hw + * @dev: Device for which supported-hw has to be set. + * + * This is required only for the V2 bindings, and is called for a matching + * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure + * will not be freed. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +void dev_pm_opp_put_supported_hw(struct device *dev) +{ + struct device_opp *dev_opp; + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + /* Check for existing list for 'dev' first */ + dev_opp = _find_device_opp(dev); + if (IS_ERR(dev_opp)) { + dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); + goto unlock; + } + + /* Make sure there are no concurrent readers while updating dev_opp */ + WARN_ON(!list_empty(&dev_opp->opp_list)); + + if (!dev_opp->supported_hw) { + dev_err(dev, "%s: Doesn't have supported hardware list\n", + __func__); + goto unlock; + } + + kfree(dev_opp->supported_hw); + dev_opp->supported_hw = NULL; + dev_opp->supported_hw_count = 0; + + /* Try freeing device_opp if this was the last blocking resource */ + _remove_device_opp(dev_opp); + +unlock: + mutex_unlock(&dev_opp_list_lock); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); + +static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, + struct device_node *np) +{ + unsigned int count = dev_opp->supported_hw_count; + u32 version; + int ret; + + if (!dev_opp->supported_hw) + return true; + + while (count--) { + ret = of_property_read_u32_index(np, "opp-supported-hw", count, + &version); + if (ret) { + dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n", + __func__, count, ret); + return false; + } + + /* Both of these are bitwise masks of the versions */ + if (!(version & dev_opp->supported_hw[count])) + return false; + } + + return true; +} + /** * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings) * @dev: device for which we do this operation @@ -879,6 +1021,12 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) goto free_opp; } + /* Check if the OPP supports hardware's hierarchy of versions or not */ + if (!_opp_is_supported(dev, dev_opp, np)) { + dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate); + goto free_opp; + } + /* * Rate is defined as an unsigned long in clk API, and so casting * explicitly to its type. Must be fixed once rate is 64 bit diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index b8880c7f8be1..70f4564a6ab9 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -129,6 +129,8 @@ struct device_list_opp { * @clock_latency_ns_max: Max clock latency in nanoseconds. * @shared_opp: OPP is shared between multiple devices. * @suspend_opp: Pointer to OPP to be used during device suspend. + * @supported_hw: Array of version number to support. + * @supported_hw_count: Number of elements in supported_hw array. * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * @@ -153,6 +155,9 @@ struct device_opp { bool shared_opp; struct dev_pm_opp *suspend_opp; + unsigned int *supported_hw; + unsigned int supported_hw_count; + #ifdef CONFIG_DEBUG_FS struct dentry *dentry; char dentry_name[NAME_MAX]; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 9a2e50337af9..3a85110242f0 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -55,6 +55,9 @@ int dev_pm_opp_enable(struct device *dev, unsigned long freq); int dev_pm_opp_disable(struct device *dev, unsigned long freq); struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev); +int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, + unsigned int count); +void dev_pm_opp_put_supported_hw(struct device *dev); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -129,6 +132,16 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier( { return ERR_PTR(-EINVAL); } + +static inline int dev_pm_opp_set_supported_hw(struct device *dev, + const u32 *versions, + unsigned int count) +{ + return -EINVAL; +} + +static inline void dev_pm_opp_put_supported_hw(struct device *dev) {} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) From 6a0f8e913de021196bded343c6742fa0db34a1a5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 9 Dec 2015 08:01:47 +0530 Subject: [PATCH 072/797] PM / OPP: Parse 'opp--' bindings OPP bindings (for few properties) allow a platform to choose a value/range among a set of available options. The options are present as opp--, where the platform needs to supply the string. The OPP properties which allow such an option are: opp-microvolt and opp-microamp. Add support to the OPP-core to parse these bindings, by introducing dev_pm_opp_{set|put}_prop_name() APIs. Signed-off-by: Viresh Kumar Tested-by: Lee Jones Signed-off-by: Rafael J. Wysocki (cherry picked from commit 01fb4d3c39d35b725441e8a9a26b3f3ad67793ed) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 165 ++++++++++++++++++++++++++++++---- drivers/base/power/opp/opp.h | 2 + include/linux/pm_opp.h | 9 ++ 3 files changed, 161 insertions(+), 15 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 55cf1a99b532..5c01fec1ed14 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -562,6 +562,9 @@ static void _remove_device_opp(struct device_opp *dev_opp) if (dev_opp->supported_hw) return; + if (dev_opp->prop_name) + return; + list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp, node); @@ -794,35 +797,48 @@ static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, } /* TODO: Support multiple regulators */ -static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev) +static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, + struct device_opp *dev_opp) { u32 microvolt[3] = {0}; u32 val; int count, ret; + struct property *prop = NULL; + char name[NAME_MAX]; - /* Missing property isn't a problem, but an invalid entry is */ - if (!of_find_property(opp->np, "opp-microvolt", NULL)) - return 0; + /* Search for "opp-microvolt-" */ + if (dev_opp->prop_name) { + sprintf(name, "opp-microvolt-%s", dev_opp->prop_name); + prop = of_find_property(opp->np, name, NULL); + } - count = of_property_count_u32_elems(opp->np, "opp-microvolt"); + if (!prop) { + /* Search for "opp-microvolt" */ + name[13] = '\0'; + prop = of_find_property(opp->np, name, NULL); + + /* Missing property isn't a problem, but an invalid entry is */ + if (!prop) + return 0; + } + + count = of_property_count_u32_elems(opp->np, name); if (count < 0) { - dev_err(dev, "%s: Invalid opp-microvolt property (%d)\n", - __func__, count); + dev_err(dev, "%s: Invalid %s property (%d)\n", + __func__, name, count); return count; } /* There can be one or three elements here */ if (count != 1 && count != 3) { - dev_err(dev, "%s: Invalid number of elements in opp-microvolt property (%d)\n", - __func__, count); + dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n", + __func__, name, count); return -EINVAL; } - ret = of_property_read_u32_array(opp->np, "opp-microvolt", microvolt, - count); + ret = of_property_read_u32_array(opp->np, name, microvolt, count); if (ret) { - dev_err(dev, "%s: error parsing opp-microvolt: %d\n", __func__, - ret); + dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret); return -EINVAL; } @@ -830,7 +846,20 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev) opp->u_volt_min = microvolt[1]; opp->u_volt_max = microvolt[2]; - if (!of_property_read_u32(opp->np, "opp-microamp", &val)) + /* Search for "opp-microamp-" */ + prop = NULL; + if (dev_opp->prop_name) { + sprintf(name, "opp-microamp-%s", dev_opp->prop_name); + prop = of_find_property(opp->np, name, NULL); + } + + if (!prop) { + /* Search for "opp-microamp" */ + name[12] = '\0'; + prop = of_find_property(opp->np, name, NULL); + } + + if (prop && !of_property_read_u32(opp->np, name, &val)) opp->u_amp = val; return 0; @@ -948,6 +977,112 @@ void dev_pm_opp_put_supported_hw(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); +/** + * dev_pm_opp_set_prop_name() - Set prop-extn name + * @dev: Device for which the regulator has to be set. + * @name: name to postfix to properties. + * + * This is required only for the V2 bindings, and it enables a platform to + * specify the extn to be used for certain property names. The properties to + * which the extension will apply are opp-microvolt and opp-microamp. OPP core + * should postfix the property name with - while looking for them. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +int dev_pm_opp_set_prop_name(struct device *dev, const char *name) +{ + struct device_opp *dev_opp; + int ret = 0; + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + dev_opp = _add_device_opp(dev); + if (!dev_opp) { + ret = -ENOMEM; + goto unlock; + } + + /* Make sure there are no concurrent readers while updating dev_opp */ + WARN_ON(!list_empty(&dev_opp->opp_list)); + + /* Do we already have a prop-name associated with dev_opp? */ + if (dev_opp->prop_name) { + dev_err(dev, "%s: Already have prop-name %s\n", __func__, + dev_opp->prop_name); + ret = -EBUSY; + goto err; + } + + dev_opp->prop_name = kstrdup(name, GFP_KERNEL); + if (!dev_opp->prop_name) { + ret = -ENOMEM; + goto err; + } + + mutex_unlock(&dev_opp_list_lock); + return 0; + +err: + _remove_device_opp(dev_opp); +unlock: + mutex_unlock(&dev_opp_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name); + +/** + * dev_pm_opp_put_prop_name() - Releases resources blocked for prop-name + * @dev: Device for which the regulator has to be set. + * + * This is required only for the V2 bindings, and is called for a matching + * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure + * will not be freed. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +void dev_pm_opp_put_prop_name(struct device *dev) +{ + struct device_opp *dev_opp; + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + /* Check for existing list for 'dev' first */ + dev_opp = _find_device_opp(dev); + if (IS_ERR(dev_opp)) { + dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); + goto unlock; + } + + /* Make sure there are no concurrent readers while updating dev_opp */ + WARN_ON(!list_empty(&dev_opp->opp_list)); + + if (!dev_opp->prop_name) { + dev_err(dev, "%s: Doesn't have a prop-name\n", __func__); + goto unlock; + } + + kfree(dev_opp->prop_name); + dev_opp->prop_name = NULL; + + /* Try freeing device_opp if this was the last blocking resource */ + _remove_device_opp(dev_opp); + +unlock: + mutex_unlock(&dev_opp_list_lock); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); + static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, struct device_node *np) { @@ -1042,7 +1177,7 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) if (!of_property_read_u32(np, "clock-latency-ns", &val)) new_opp->clock_latency_ns = val; - ret = opp_parse_supplies(new_opp, dev); + ret = opp_parse_supplies(new_opp, dev, dev_opp); if (ret) goto free_opp; diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 70f4564a6ab9..690638ef36ee 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -131,6 +131,7 @@ struct device_list_opp { * @suspend_opp: Pointer to OPP to be used during device suspend. * @supported_hw: Array of version number to support. * @supported_hw_count: Number of elements in supported_hw array. + * @prop_name: A name to postfix to many DT properties, while parsing them. * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * @@ -157,6 +158,7 @@ struct device_opp { unsigned int *supported_hw; unsigned int supported_hw_count; + const char *prop_name; #ifdef CONFIG_DEBUG_FS struct dentry *dentry; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 3a85110242f0..95403d2ccaf5 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -58,6 +58,8 @@ struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev); int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); void dev_pm_opp_put_supported_hw(struct device *dev); +int dev_pm_opp_set_prop_name(struct device *dev, const char *name); +void dev_pm_opp_put_prop_name(struct device *dev); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -142,6 +144,13 @@ static inline int dev_pm_opp_set_supported_hw(struct device *dev, static inline void dev_pm_opp_put_supported_hw(struct device *dev) {} +static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) +{ + return -EINVAL; +} + +static inline void dev_pm_opp_put_prop_name(struct device *dev) {} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) From ff7d038f3c5def395c4b06c156cca9947d4b5a02 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 17 Dec 2015 19:04:38 +0100 Subject: [PATCH 073/797] PM / OPP: Fix parsing of opp-microvolt and opp-microamp properties Commit 01fb4d3c39d3 ("PM / OPP: Parse 'opp--' bindings") broke support for parsing standard opp-microvolt and opp-microamp properties. Fix it by setting 'name' string to proper value for !prop cases. Fixes: 01fb4d3c39d3 ("PM / OPP: Parse 'opp-- 'bindings") Signed-off-by: Bartlomiej Zolnierkiewicz Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit fd8d8e63467c922be9ae4452cca2980d473477d9) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 5c01fec1ed14..cd230c63aee6 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -814,7 +814,7 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, if (!prop) { /* Search for "opp-microvolt" */ - name[13] = '\0'; + sprintf(name, "opp-microvolt"); prop = of_find_property(opp->np, name, NULL); /* Missing property isn't a problem, but an invalid entry is */ @@ -855,7 +855,7 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, if (!prop) { /* Search for "opp-microamp" */ - name[12] = '\0'; + sprintf(name, "opp-microamp"); prop = of_find_property(opp->np, name, NULL); } From cb8acf6df06ef9b4a708d34e169b420e52d0c941 Mon Sep 17 00:00:00 2001 From: Pi-Cheng Chen Date: Mon, 28 Dec 2015 21:06:17 +0800 Subject: [PATCH 074/797] PM / OPP: Set cpu_dev->id in cpumask first Set cpu_dev->id in cpumask first when setting up cpumask for CPUs that share the same OPP table. This might be helpful when handling cpumask without the original CPU bitfield set. Signed-off-by: Pi-Cheng Chen Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit d9de19b1cc013433ad293365b5b3902ec73dfd60) Signed-off-by: Alex Shi --- drivers/base/power/opp/cpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 7b445e88a0d5..9f0c15570f64 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -214,7 +214,6 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table); /* * Works only for OPP v2 bindings. * - * cpumask should be already set to mask of cpu_dev->id. * Returns -ENOENT if operating-points-v2 bindings aren't supported. */ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) @@ -230,6 +229,8 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask return -ENOENT; } + cpumask_set_cpu(cpu_dev->id, cpumask); + /* OPPs are shared ? */ if (!of_property_read_bool(np, "opp-shared")) goto put_cpu_node; From 59dfbb981e645f30e53a32222dd3bd478b242496 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 5 Jan 2016 16:15:54 +0530 Subject: [PATCH 075/797] PM / OPP: Use snprintf() instead of sprintf() sprintf() can access memory outside of the range of the character array, and is risky in some situations. The driver specified prop_name string can be longer than NAME_MAX here (only an attacker will do that though) and so blindly copying it into the character array of size NAME_MAX isn't safe. Instead we must use snprintf() here. Reported-by: Geert Uytterhoeven Signed-off-by: Viresh Kumar Acked-by: Geert Uytterhoeven Acked-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 5ff24d601092b222340b28466e263b1c4559407e) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index cd230c63aee6..cf351d3dab1c 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -808,7 +808,8 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, /* Search for "opp-microvolt-" */ if (dev_opp->prop_name) { - sprintf(name, "opp-microvolt-%s", dev_opp->prop_name); + snprintf(name, sizeof(name), "opp-microvolt-%s", + dev_opp->prop_name); prop = of_find_property(opp->np, name, NULL); } @@ -849,7 +850,8 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, /* Search for "opp-microamp-" */ prop = NULL; if (dev_opp->prop_name) { - sprintf(name, "opp-microamp-%s", dev_opp->prop_name); + snprintf(name, sizeof(name), "opp-microamp-%s", + dev_opp->prop_name); prop = of_find_property(opp->np, name, NULL); } From 5b9202b77c8d670f23581ddebd36ce12a619d9f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2016 16:45:48 +0100 Subject: [PATCH 076/797] cpufreq: cpufreq-dt: avoid uninitialized variable warnings: gcc warns quite a bit about values returned from allocate_resources() in cpufreq-dt.c: cpufreq-dt.c: In function 'cpufreq_init': cpufreq-dt.c:327:6: error: 'cpu_dev' may be used uninitialized in this function [-Werror=maybe-uninitialized] cpufreq-dt.c:197:17: note: 'cpu_dev' was declared here cpufreq-dt.c:376:2: error: 'cpu_clk' may be used uninitialized in this function [-Werror=maybe-uninitialized] cpufreq-dt.c:199:14: note: 'cpu_clk' was declared here cpufreq-dt.c: In function 'dt_cpufreq_probe': cpufreq-dt.c:461:2: error: 'cpu_clk' may be used uninitialized in this function [-Werror=maybe-uninitialized] cpufreq-dt.c:447:14: note: 'cpu_clk' was declared here The problem is that it's slightly hard for gcc to follow return codes across PTR_ERR() calls. This patch uses explicit assignments to the "ret" variable to make it easier for gcc to verify that the code is actually correct, without the need to add a bogus initialization. Signed-off-by: Arnd Bergmann Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit b331bc20d9281213f7fb67912638e0fb5baeb324) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 90d64081ddb3..68232fef54c9 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -141,15 +141,16 @@ static int allocate_resources(int cpu, struct device **cdev, try_again: cpu_reg = regulator_get_optional(cpu_dev, reg); - if (IS_ERR(cpu_reg)) { + ret = PTR_ERR_OR_ZERO(cpu_reg); + if (ret) { /* * If cpu's regulator supply node is present, but regulator is * not yet registered, we should try defering probe. */ - if (PTR_ERR(cpu_reg) == -EPROBE_DEFER) { + if (ret == -EPROBE_DEFER) { dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n", cpu); - return -EPROBE_DEFER; + return ret; } /* Try with "cpu-supply" */ @@ -158,18 +159,16 @@ static int allocate_resources(int cpu, struct device **cdev, goto try_again; } - dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n", - cpu, PTR_ERR(cpu_reg)); + dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret); } cpu_clk = clk_get(cpu_dev, NULL); - if (IS_ERR(cpu_clk)) { + ret = PTR_ERR_OR_ZERO(cpu_clk); + if (ret) { /* put regulator */ if (!IS_ERR(cpu_reg)) regulator_put(cpu_reg); - ret = PTR_ERR(cpu_clk); - /* * If cpu's clk node is present, but clock is not yet * registered, we should try defering probe. From c070696dd69afe9d5432fde66270d29e36da8da2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:33 +0530 Subject: [PATCH 077/797] PM / OPP: get/put regulators from OPP core This allows the OPP core to request/free the regulator resource, attached to a device OPP. The regulator device is fetched using the name provided by the driver, while calling: dev_pm_opp_set_regulator(). This will work for both OPP-v1 and v2 bindings. This is a preliminary step for moving the OPP switching logic into the OPP core. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 9f8ea969d5cfdd4353d2adb004e8e2286b984369) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 111 ++++++++++++++++++++++++++++++++++ drivers/base/power/opp/opp.h | 4 ++ include/linux/pm_opp.h | 9 +++ 3 files changed, 124 insertions(+) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index cf351d3dab1c..1e22b71abf1e 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "opp.h" @@ -565,6 +566,9 @@ static void _remove_device_opp(struct device_opp *dev_opp) if (dev_opp->prop_name) return; + if (!IS_ERR_OR_NULL(dev_opp->regulator)) + return; + list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp, node); @@ -1085,6 +1089,113 @@ void dev_pm_opp_put_prop_name(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); +/** + * dev_pm_opp_set_regulator() - Set regulator name for the device + * @dev: Device for which regulator name is being set. + * @name: Name of the regulator. + * + * In order to support OPP switching, OPP layer needs to know the name of the + * device's regulator, as the core would be required to switch voltages as well. + * + * This must be called before any OPPs are initialized for the device. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +int dev_pm_opp_set_regulator(struct device *dev, const char *name) +{ + struct device_opp *dev_opp; + struct regulator *reg; + int ret; + + mutex_lock(&dev_opp_list_lock); + + dev_opp = _add_device_opp(dev); + if (!dev_opp) { + ret = -ENOMEM; + goto unlock; + } + + /* This should be called before OPPs are initialized */ + if (WARN_ON(!list_empty(&dev_opp->opp_list))) { + ret = -EBUSY; + goto err; + } + + /* Already have a regulator set */ + if (WARN_ON(!IS_ERR_OR_NULL(dev_opp->regulator))) { + ret = -EBUSY; + goto err; + } + /* Allocate the regulator */ + reg = regulator_get_optional(dev, name); + if (IS_ERR(reg)) { + ret = PTR_ERR(reg); + if (ret != -EPROBE_DEFER) + dev_err(dev, "%s: no regulator (%s) found: %d\n", + __func__, name, ret); + goto err; + } + + dev_opp->regulator = reg; + + mutex_unlock(&dev_opp_list_lock); + return 0; + +err: + _remove_device_opp(dev_opp); +unlock: + mutex_unlock(&dev_opp_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator); + +/** + * dev_pm_opp_put_regulator() - Releases resources blocked for regulator + * @dev: Device for which regulator was set. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +void dev_pm_opp_put_regulator(struct device *dev) +{ + struct device_opp *dev_opp; + + mutex_lock(&dev_opp_list_lock); + + /* Check for existing list for 'dev' first */ + dev_opp = _find_device_opp(dev); + if (IS_ERR(dev_opp)) { + dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); + goto unlock; + } + + if (IS_ERR_OR_NULL(dev_opp->regulator)) { + dev_err(dev, "%s: Doesn't have regulator set\n", __func__); + goto unlock; + } + + /* Make sure there are no concurrent readers while updating dev_opp */ + WARN_ON(!list_empty(&dev_opp->opp_list)); + + regulator_put(dev_opp->regulator); + dev_opp->regulator = ERR_PTR(-EINVAL); + + /* Try freeing device_opp if this was the last blocking resource */ + _remove_device_opp(dev_opp); + +unlock: + mutex_unlock(&dev_opp_list_lock); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator); + static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, struct device_node *np) { diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 690638ef36ee..416293b7da23 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -22,6 +22,8 @@ #include #include +struct regulator; + /* Lock to allow exclusive modification to the device and opp lists */ extern struct mutex dev_opp_list_lock; @@ -132,6 +134,7 @@ struct device_list_opp { * @supported_hw: Array of version number to support. * @supported_hw_count: Number of elements in supported_hw array. * @prop_name: A name to postfix to many DT properties, while parsing them. + * @regulator: Supply regulator * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * @@ -159,6 +162,7 @@ struct device_opp { unsigned int *supported_hw; unsigned int supported_hw_count; const char *prop_name; + struct regulator *regulator; #ifdef CONFIG_DEBUG_FS struct dentry *dentry; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 95403d2ccaf5..c70a18ac9c8a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -60,6 +60,8 @@ int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, void dev_pm_opp_put_supported_hw(struct device *dev); int dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct device *dev); +int dev_pm_opp_set_regulator(struct device *dev, const char *name); +void dev_pm_opp_put_regulator(struct device *dev); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -151,6 +153,13 @@ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) static inline void dev_pm_opp_put_prop_name(struct device *dev) {} +static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name) +{ + return -EINVAL; +} + +static inline void dev_pm_opp_put_regulator(struct device *dev) {} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) From c3fae6d33a5d32c439938410cbb1e89d8a70d918 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:34 +0530 Subject: [PATCH 078/797] PM / OPP: Disable OPPs that aren't supported by the regulator Disable any OPPs where the connected regulator isn't able to provide the specified voltage. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 7d34d56ef3349cd5f29cf7aab6650f3414fa81b9) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 1e22b71abf1e..71545becfca1 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -687,6 +687,22 @@ static struct dev_pm_opp *_allocate_opp(struct device *dev, return opp; } +static bool _opp_supported_by_regulators(struct dev_pm_opp *opp, + struct device_opp *dev_opp) +{ + struct regulator *reg = dev_opp->regulator; + + if (!IS_ERR(reg) && + !regulator_is_supported_voltage(reg, opp->u_volt_min, + opp->u_volt_max)) { + pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n", + __func__, opp->u_volt_min, opp->u_volt_max); + return false; + } + + return true; +} + static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct device_opp *dev_opp) { @@ -728,6 +744,12 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n", __func__, ret); + if (!_opp_supported_by_regulators(new_opp, dev_opp)) { + new_opp->available = false; + dev_warn(dev, "%s: OPP not supported by regulators (%lu)\n", + __func__, new_opp->rate); + } + return 0; } From 525680c1a93d13d236e1eaecbdf360c32bfc6917 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:35 +0530 Subject: [PATCH 079/797] PM / OPP: Introduce dev_pm_opp_get_max_volt_latency() In few use cases (like: cpufreq), it is desired to get the maximum voltage latency for changing OPPs. Add support for that. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 655c9df961751ce21466f6e97e8033932c27a675) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 59 +++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 6 ++++ 2 files changed, 65 insertions(+) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 71545becfca1..ffe2406af882 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -230,6 +230,65 @@ unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency); +/** + * dev_pm_opp_get_max_volt_latency() - Get max voltage latency in nanoseconds + * @dev: device for which we do this operation + * + * Return: This function returns the max voltage latency in nanoseconds. + * + * Locking: This function takes rcu_read_lock(). + */ +unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) +{ + struct device_opp *dev_opp; + struct dev_pm_opp *opp; + struct regulator *reg; + unsigned long latency_ns = 0; + unsigned long min_uV = ~0, max_uV = 0; + int ret; + + rcu_read_lock(); + + dev_opp = _find_device_opp(dev); + if (IS_ERR(dev_opp)) { + rcu_read_unlock(); + return 0; + } + + reg = dev_opp->regulator; + if (IS_ERR_OR_NULL(reg)) { + /* Regulator may not be required for device */ + if (reg) + dev_err(dev, "%s: Invalid regulator (%ld)\n", __func__, + PTR_ERR(reg)); + rcu_read_unlock(); + return 0; + } + + list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) { + if (!opp->available) + continue; + + if (opp->u_volt_min < min_uV) + min_uV = opp->u_volt_min; + if (opp->u_volt_max > max_uV) + max_uV = opp->u_volt_max; + } + + rcu_read_unlock(); + + /* + * The caller needs to ensure that dev_opp (and hence the regulator) + * isn't freed, while we are executing this routine. + */ + ret = regulator_set_voltage_time(reg, min_uV, max_uV); + if (ret > 0) + latency_ns = ret * 1000; + + return latency_ns; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_volt_latency); + /** * dev_pm_opp_get_suspend_opp() - Get suspend opp * @dev: device for which we do this operation diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index c70a18ac9c8a..5daa43058ac1 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -34,6 +34,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp); int dev_pm_opp_get_opp_count(struct device *dev); unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); +unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev); struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, @@ -88,6 +89,11 @@ static inline unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) return 0; } +static inline unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) +{ + return 0; +} + static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) { return NULL; From 2f5f3fb48d3e36e6855fb658f25ce088ac6fe277 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:36 +0530 Subject: [PATCH 080/797] PM / OPP: Introduce dev_pm_opp_get_max_transition_latency() In few use cases (like: cpufreq), it is desired to get the maximum latency for changing OPPs. Add support for that. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 2174344765f472895c076d703c9cdc58215e1393) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 17 +++++++++++++++++ include/linux/pm_opp.h | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index ffe2406af882..b0f5c72f0fc3 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -289,6 +289,23 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_volt_latency); +/** + * dev_pm_opp_get_max_transition_latency() - Get max transition latency in + * nanoseconds + * @dev: device for which we do this operation + * + * Return: This function returns the max transition latency, in nanoseconds, to + * switch from one OPP to other. + * + * Locking: This function takes rcu_read_lock(). + */ +unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev) +{ + return dev_pm_opp_get_max_volt_latency(dev) + + dev_pm_opp_get_max_clock_latency(dev); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency); + /** * dev_pm_opp_get_suspend_opp() - Get suspend opp * @dev: device for which we do this operation diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 5daa43058ac1..59da3d9e11ea 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -35,6 +35,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp); int dev_pm_opp_get_opp_count(struct device *dev); unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev); +unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev); struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, @@ -94,6 +95,11 @@ static inline unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) return 0; } +static inline unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev) +{ + return 0; +} + static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) { return NULL; From df8ba0ff4a9638731dbe1355b4b9fbf1b26fa5a9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:37 +0530 Subject: [PATCH 081/797] PM / OPP: Parse clock-latency and voltage-tolerance for v1 bindings V2 bindings have better support for clock-latency and voltage-tolerance and doesn't need special care. To use callbacks, like dev_pm_opp_get_max_{transition|volt}_latency(), irrespective of the bindings, the core needs to know clock-latency/voltage-tolerance for the earlier bindings. This patch reads clock-latency/voltage-tolerance from the device node, irrespective of the bindings (to keep it simple) and use them only for V1 bindings. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 50f8cfbd5897ca182d43f4caf19937153f17a604) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 20 ++++++++++++++++++++ drivers/base/power/opp/opp.h | 6 ++++++ 2 files changed, 26 insertions(+) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index b0f5c72f0fc3..4fafa733a1c7 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -582,6 +582,7 @@ static struct device_opp *_add_device_opp(struct device *dev) { struct device_opp *dev_opp; struct device_list_opp *list_dev; + struct device_node *np; /* Check for existing list for 'dev' first */ dev_opp = _find_device_opp(dev); @@ -604,6 +605,21 @@ static struct device_opp *_add_device_opp(struct device *dev) return NULL; } + /* + * Only required for backward compatibility with v1 bindings, but isn't + * harmful for other cases. And so we do it unconditionally. + */ + np = of_node_get(dev->of_node); + if (np) { + u32 val; + + if (!of_property_read_u32(np, "clock-latency", &val)) + dev_opp->clock_latency_ns_max = val; + of_property_read_u32(np, "voltage-tolerance", + &dev_opp->voltage_tolerance_v1); + of_node_put(np); + } + srcu_init_notifier_head(&dev_opp->srcu_head); INIT_LIST_HEAD(&dev_opp->opp_list); @@ -861,6 +877,7 @@ static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, { struct device_opp *dev_opp; struct dev_pm_opp *new_opp; + unsigned long tol; int ret; /* Hold our list modification lock here */ @@ -874,7 +891,10 @@ static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, /* populate the opp table */ new_opp->rate = freq; + tol = u_volt * dev_opp->voltage_tolerance_v1 / 100; new_opp->u_volt = u_volt; + new_opp->u_volt_min = u_volt - tol; + new_opp->u_volt_max = u_volt + tol; new_opp->available = true; new_opp->dynamic = dynamic; diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 416293b7da23..fe44beb404ba 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -138,6 +138,8 @@ struct device_list_opp { * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * + * @voltage_tolerance_v1: In percentage, for v1 bindings only. + * * This is an internal data structure maintaining the link to opps attached to * a device. This structure is not meant to be shared to users as it is * meant for book keeping and private to OPP library. @@ -156,6 +158,10 @@ struct device_opp { struct device_node *np; unsigned long clock_latency_ns_max; + + /* For backward compatibility with v1 bindings */ + unsigned int voltage_tolerance_v1; + bool shared_opp; struct dev_pm_opp *suspend_opp; From b7737cf7aaeb35319e61acdc0166a3a06e25bd82 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:38 +0530 Subject: [PATCH 082/797] PM / OPP: Manage device clk OPP core has got almost everything now to manage device's OPP transitions, the only thing left is device's clk. Get that as well. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit d54974c2513f487e9e70fbdc79c5da51c53e23da) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 15 +++++++++++++++ drivers/base/power/opp/opp.h | 3 +++ 2 files changed, 18 insertions(+) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 4fafa733a1c7..7d7749ce1ce4 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -13,6 +13,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -583,6 +584,7 @@ static struct device_opp *_add_device_opp(struct device *dev) struct device_opp *dev_opp; struct device_list_opp *list_dev; struct device_node *np; + int ret; /* Check for existing list for 'dev' first */ dev_opp = _find_device_opp(dev); @@ -620,6 +622,15 @@ static struct device_opp *_add_device_opp(struct device *dev) of_node_put(np); } + /* Find clk for the device */ + dev_opp->clk = clk_get(dev, NULL); + if (IS_ERR(dev_opp->clk)) { + ret = PTR_ERR(dev_opp->clk); + if (ret != -EPROBE_DEFER) + dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, + ret); + } + srcu_init_notifier_head(&dev_opp->srcu_head); INIT_LIST_HEAD(&dev_opp->opp_list); @@ -661,6 +672,10 @@ static void _remove_device_opp(struct device_opp *dev_opp) if (!IS_ERR_OR_NULL(dev_opp->regulator)) return; + /* Release clk */ + if (!IS_ERR(dev_opp->clk)) + clk_put(dev_opp->clk); + list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp, node); diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index fe44beb404ba..4f1bdfc7da03 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -22,6 +22,7 @@ #include #include +struct clk; struct regulator; /* Lock to allow exclusive modification to the device and opp lists */ @@ -134,6 +135,7 @@ struct device_list_opp { * @supported_hw: Array of version number to support. * @supported_hw_count: Number of elements in supported_hw array. * @prop_name: A name to postfix to many DT properties, while parsing them. + * @clk: Device's clock handle * @regulator: Supply regulator * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. @@ -168,6 +170,7 @@ struct device_opp { unsigned int *supported_hw; unsigned int supported_hw_count; const char *prop_name; + struct clk *clk; struct regulator *regulator; #ifdef CONFIG_DEBUG_FS From 6229dc0db2699e56996cb114a85eb59988e1163a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:39 +0530 Subject: [PATCH 083/797] PM / OPP: Add dev_pm_opp_set_rate() This adds a routine, dev_pm_opp_set_rate(), responsible for configuring power-supply and clock source for an OPP. The OPP is found by matching against the target_freq passed to the routine. This shall replace similar code present in most of the OPP users and help simplify them a lot. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 6a0712f6f199e737aa5913d28ec4bd3a25de9660) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 176 ++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 6 ++ 2 files changed, 182 insertions(+) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 7d7749ce1ce4..ab711c2c3e00 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -529,6 +529,182 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); +/* + * The caller needs to ensure that device_opp (and hence the clk) isn't freed, + * while clk returned here is used. + */ +static struct clk *_get_opp_clk(struct device *dev) +{ + struct device_opp *dev_opp; + struct clk *clk; + + rcu_read_lock(); + + dev_opp = _find_device_opp(dev); + if (IS_ERR(dev_opp)) { + dev_err(dev, "%s: device opp doesn't exist\n", __func__); + clk = ERR_CAST(dev_opp); + goto unlock; + } + + clk = dev_opp->clk; + if (IS_ERR(clk)) + dev_err(dev, "%s: No clock available for the device\n", + __func__); + +unlock: + rcu_read_unlock(); + return clk; +} + +static int _set_opp_voltage(struct device *dev, struct regulator *reg, + unsigned long u_volt, unsigned long u_volt_min, + unsigned long u_volt_max) +{ + int ret; + + /* Regulator not available for device */ + if (IS_ERR(reg)) { + dev_dbg(dev, "%s: regulator not available: %ld\n", __func__, + PTR_ERR(reg)); + return 0; + } + + dev_dbg(dev, "%s: voltages (mV): %lu %lu %lu\n", __func__, u_volt_min, + u_volt, u_volt_max); + + ret = regulator_set_voltage_triplet(reg, u_volt_min, u_volt, + u_volt_max); + if (ret) + dev_err(dev, "%s: failed to set voltage (%lu %lu %lu mV): %d\n", + __func__, u_volt_min, u_volt, u_volt_max, ret); + + return ret; +} + +/** + * dev_pm_opp_set_rate() - Configure new OPP based on frequency + * @dev: device for which we do this operation + * @target_freq: frequency to achieve + * + * This configures the power-supplies and clock source to the levels specified + * by the OPP corresponding to the target_freq. + * + * Locking: This function takes rcu_read_lock(). + */ +int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) +{ + struct device_opp *dev_opp; + struct dev_pm_opp *old_opp, *opp; + struct regulator *reg; + struct clk *clk; + unsigned long freq, old_freq; + unsigned long u_volt, u_volt_min, u_volt_max; + unsigned long ou_volt, ou_volt_min, ou_volt_max; + int ret; + + if (unlikely(!target_freq)) { + dev_err(dev, "%s: Invalid target frequency %lu\n", __func__, + target_freq); + return -EINVAL; + } + + clk = _get_opp_clk(dev); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + freq = clk_round_rate(clk, target_freq); + if ((long)freq <= 0) + freq = target_freq; + + old_freq = clk_get_rate(clk); + + /* Return early if nothing to do */ + if (old_freq == freq) { + dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n", + __func__, freq); + return 0; + } + + rcu_read_lock(); + + dev_opp = _find_device_opp(dev); + if (IS_ERR(dev_opp)) { + dev_err(dev, "%s: device opp doesn't exist\n", __func__); + rcu_read_unlock(); + return PTR_ERR(dev_opp); + } + + old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq); + if (!IS_ERR(old_opp)) { + ou_volt = old_opp->u_volt; + ou_volt_min = old_opp->u_volt_min; + ou_volt_max = old_opp->u_volt_max; + } else { + dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n", + __func__, old_freq, PTR_ERR(old_opp)); + } + + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n", + __func__, freq, ret); + rcu_read_unlock(); + return ret; + } + + u_volt = opp->u_volt; + u_volt_min = opp->u_volt_min; + u_volt_max = opp->u_volt_max; + + reg = dev_opp->regulator; + + rcu_read_unlock(); + + /* Scaling up? Scale voltage before frequency */ + if (freq > old_freq) { + ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min, + u_volt_max); + if (ret) + goto restore_voltage; + } + + /* Change frequency */ + + dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n", + __func__, old_freq, freq); + + ret = clk_set_rate(clk, freq); + if (ret) { + dev_err(dev, "%s: failed to set clock rate: %d\n", __func__, + ret); + goto restore_voltage; + } + + /* Scaling down? Scale voltage after frequency */ + if (freq < old_freq) { + ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min, + u_volt_max); + if (ret) + goto restore_freq; + } + + return 0; + +restore_freq: + if (clk_set_rate(clk, old_freq)) + dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n", + __func__, old_freq); +restore_voltage: + /* This shouldn't harm even if the voltages weren't updated earlier */ + if (!IS_ERR(old_opp)) + _set_opp_voltage(dev, reg, ou_volt, ou_volt_min, ou_volt_max); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate); + /* List-dev Helpers */ static void _kfree_list_dev_rcu(struct rcu_head *head) { diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 59da3d9e11ea..cccaf4a29e9f 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -64,6 +64,7 @@ int dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct device *dev); int dev_pm_opp_set_regulator(struct device *dev, const char *name); void dev_pm_opp_put_regulator(struct device *dev); +int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -172,6 +173,11 @@ static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name) static inline void dev_pm_opp_put_regulator(struct device *dev) {} +static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) +{ + return -EINVAL; +} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) From ed343155489aa67740862d959e71266992ddf1b5 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 11 Feb 2016 11:25:59 +0000 Subject: [PATCH 084/797] PM / OPP: Fix NULL pointer dereference crash when disabling OPPs Commit 7d34d56ef334 (PM / OPP: Disable OPPs that aren't supported by the regulator) causes a crash to happen on Tegra124 Jetson TK1 when using the DFLL clock source for the CPU. The DFLL manages the voltage itself and so there is no regulator specified for the OPPs and so we get a crash when we try to dereference the regulator pointer. Fix this by checking to see if the regulator IS_ERR_OR_NULL before dereferencing it. Fixes: 7d34d56ef334 (PM / OPP: Disable OPPs that aren't supported by the regulator) Signed-off-by: Jon Hunter Reported-by: Bartlomiej Zolnierkiewicz Acked-by: Viresh Kumar [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki (cherry picked from commit 78ecc56247f0ec2bc0cf6f2f2af69e98d99767bc) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index ab711c2c3e00..d7cd4e265766 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -975,7 +975,7 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp, { struct regulator *reg = dev_opp->regulator; - if (!IS_ERR(reg) && + if (!IS_ERR_OR_NULL(reg) && !regulator_is_supported_voltage(reg, opp->u_volt_min, opp->u_volt_max)) { pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n", From 0ff7577f4fb24d42a34d33a0f16719b05328aa97 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 15 Feb 2016 10:21:53 +0530 Subject: [PATCH 085/797] PM / OPP: Initialize u_volt_min/max to a valid value We kept u_volt_min/max initialized to 0, when only the target voltage is present in DT, instead of the target/min/max triplet. This didn't go well with the regulator framework, as on few calls the min voltage was set to target and max was set to 0 and so resulted in a kernel crash like below: kernel BUG at ../drivers/regulator/core.c:216! [] (regulator_check_voltage) from [] (regulator_set_voltage_unlocked+0x58/0x230) [] (regulator_set_voltage_unlocked) from [] (regulator_set_voltage+0x28/0x54) [] (regulator_set_voltage) from [] (_set_opp_voltage+0x30/0x98) [] (_set_opp_voltage) from [] (dev_pm_opp_set_rate+0xf0/0x28c) [] (dev_pm_opp_set_rate) from [] (__cpufreq_driver_target+0x184/0x2b4) [] (__cpufreq_driver_target) from [] (dbs_check_cpu+0x1b0/0x1f4) [] (dbs_check_cpu) from [] (cpufreq_governor_dbs+0x324/0x5c4) [] (cpufreq_governor_dbs) from [] (__cpufreq_governor+0xe4/0x1ec) [] (__cpufreq_governor) from [] (cpufreq_init_policy+0x64/0x8c) [] (cpufreq_init_policy) from [] (cpufreq_online+0x2fc/0x708) [] (cpufreq_online) from [] (subsys_interface_register+0x94/0xd8) [] (subsys_interface_register) from [] (cpufreq_register_driver+0x14c/0x19c) [] (cpufreq_register_driver) from [] (dt_cpufreq_probe+0x70/0xec) [] (dt_cpufreq_probe) from [] (platform_drv_probe+0x4c/0xb0) [] (platform_drv_probe) from [] (driver_probe_device+0x214/0x2c0) [] (driver_probe_device) from [] (__driver_attach+0x8c/0x90) [] (__driver_attach) from [] (bus_for_each_dev+0x68/0x9c) [] (bus_for_each_dev) from [] (bus_add_driver+0x1a0/0x218) [] (bus_add_driver) from [] (driver_register+0x78/0xf8) [] (driver_register) from [] (do_one_initcall+0x90/0x1d8) [] (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1fc) [] (kernel_init_freeable) from [] (kernel_init+0x8/0xf0) [] (kernel_init) from [] (ret_from_fork+0x14/0x3c) Code: e1550004 baffffeb e3a00000 e8bd8070 (e7f001f2) Fix that by initializing u_volt_min/max to the target voltage in such cases. Reported-and-tested-by: Krzysztof Kozlowski Fixes: 274659029c9d (PM / OPP: Add support to parse "operating-points-v2" bindings) Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit c88c395f4a6485f23f81e385c79945d68bcd5c5d) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index d7cd4e265766..19fd7e7a3969 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -1157,8 +1157,14 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, } opp->u_volt = microvolt[0]; - opp->u_volt_min = microvolt[1]; - opp->u_volt_max = microvolt[2]; + + if (count == 1) { + opp->u_volt_min = opp->u_volt; + opp->u_volt_max = opp->u_volt; + } else { + opp->u_volt_min = microvolt[1]; + opp->u_volt_max = microvolt[2]; + } /* Search for "opp-microamp-" */ prop = NULL; From da978b6ea868b3b2e15025349be34282940039d9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 15 Feb 2016 21:56:42 +0530 Subject: [PATCH 086/797] PM / OPP: Initialize regulator pointer to an error value We are currently required to do two checks for regulator pointer: IS_ERR() and IS_NULL(). And multiple instances are reported, about both of these not being used consistently and so resulting in crashes. Fix that by initializing regulator pointer with an error value and checking it only against an error. This makes code more consistent and more efficient. Fixes: 7d34d56ef334 (PM / OPP: Disable OPPs that aren't supported by the regulator) Reported-and-tested-by: Jon Hunter Reported-and-tested-by: Tony Lindgren Reported-and-tested-by: Guenter Roeck Signed-off-by: Viresh Kumar [ rjw: Initialize to -ENXIO ] Signed-off-by: Rafael J. Wysocki (cherry picked from commit 0c717d0f9cb46259dce5272705adce64a2d646d9) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 19fd7e7a3969..5fb2f061129e 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -257,7 +257,7 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) } reg = dev_opp->regulator; - if (IS_ERR_OR_NULL(reg)) { + if (IS_ERR(reg)) { /* Regulator may not be required for device */ if (reg) dev_err(dev, "%s: Invalid regulator (%ld)\n", __func__, @@ -798,6 +798,9 @@ static struct device_opp *_add_device_opp(struct device *dev) of_node_put(np); } + /* Set regulator to a non-NULL error value */ + dev_opp->regulator = ERR_PTR(-ENXIO); + /* Find clk for the device */ dev_opp->clk = clk_get(dev, NULL); if (IS_ERR(dev_opp->clk)) { @@ -845,7 +848,7 @@ static void _remove_device_opp(struct device_opp *dev_opp) if (dev_opp->prop_name) return; - if (!IS_ERR_OR_NULL(dev_opp->regulator)) + if (!IS_ERR(dev_opp->regulator)) return; /* Release clk */ @@ -975,7 +978,7 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp, { struct regulator *reg = dev_opp->regulator; - if (!IS_ERR_OR_NULL(reg) && + if (!IS_ERR(reg) && !regulator_is_supported_voltage(reg, opp->u_volt_min, opp->u_volt_max)) { pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n", @@ -1441,7 +1444,7 @@ int dev_pm_opp_set_regulator(struct device *dev, const char *name) } /* Already have a regulator set */ - if (WARN_ON(!IS_ERR_OR_NULL(dev_opp->regulator))) { + if (WARN_ON(!IS_ERR(dev_opp->regulator))) { ret = -EBUSY; goto err; } @@ -1492,7 +1495,7 @@ void dev_pm_opp_put_regulator(struct device *dev) goto unlock; } - if (IS_ERR_OR_NULL(dev_opp->regulator)) { + if (IS_ERR(dev_opp->regulator)) { dev_err(dev, "%s: Doesn't have regulator set\n", __func__); goto unlock; } @@ -1501,7 +1504,7 @@ void dev_pm_opp_put_regulator(struct device *dev) WARN_ON(!list_empty(&dev_opp->opp_list)); regulator_put(dev_opp->regulator); - dev_opp->regulator = ERR_PTR(-EINVAL); + dev_opp->regulator = ERR_PTR(-ENXIO); /* Try freeing device_opp if this was the last blocking resource */ _remove_device_opp(dev_opp); From 753a14dbcc37565faa1caf17873fda776002648d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 16 Feb 2016 14:17:52 +0530 Subject: [PATCH 087/797] PM / OPP: Fix incorrect comments Some comments were just copy/pasted from other sections and don't match to the routines they were added for. Fix them. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit a5da64477ee79efa748df256928ec8840a2a7986) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 5fb2f061129e..bdae09c1d8eb 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -1254,7 +1254,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw); /** * dev_pm_opp_put_supported_hw() - Releases resources blocked for supported hw - * @dev: Device for which supported-hw has to be set. + * @dev: Device for which supported-hw has to be put. * * This is required only for the V2 bindings, and is called for a matching * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure @@ -1303,7 +1303,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); /** * dev_pm_opp_set_prop_name() - Set prop-extn name - * @dev: Device for which the regulator has to be set. + * @dev: Device for which the prop-name has to be set. * @name: name to postfix to properties. * * This is required only for the V2 bindings, and it enables a platform to @@ -1362,7 +1362,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name); /** * dev_pm_opp_put_prop_name() - Releases resources blocked for prop-name - * @dev: Device for which the regulator has to be set. + * @dev: Device for which the prop-name has to be put. * * This is required only for the V2 bindings, and is called for a matching * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure From 811cd7442b982402c898e6a55798cecd76be0272 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 16 Feb 2016 14:17:53 +0530 Subject: [PATCH 088/797] PM / OPP: Rename structures for clarity Stephen pointed out recently, that few structures always confuse him as they aren't named properly. And this patch tries to address that: Names are updated as: - device_opp or dev_opp -> opp_table - dev_opp_list -> opp_tables - dev_opp_list_lock -> opp_table_lock - device_list_opp -> opp_device (it was never a list, but a structure) - list_dev -> opp_dev - And similar changes in comments and function names as well. This also fixes checkpatch warnings that were generated with this patch. No functional changes. Suggested-by: Stephen Boyd Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 2c2709dc6921c5d246b686521f932c73a20f428f) Signed-off-by: Alex Shi --- drivers/base/power/opp/core.c | 752 ++++++++++++++++--------------- drivers/base/power/opp/cpu.c | 22 +- drivers/base/power/opp/debugfs.c | 85 ++-- drivers/base/power/opp/opp.h | 61 ++- 4 files changed, 461 insertions(+), 459 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index bdae09c1d8eb..433b60092972 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -25,40 +25,40 @@ #include "opp.h" /* - * The root of the list of all devices. All device_opp structures branch off - * from here, with each device_opp containing the list of opp it supports in + * The root of the list of all opp-tables. All opp_table structures branch off + * from here, with each opp_table containing the list of opps it supports in * various states of availability. */ -static LIST_HEAD(dev_opp_list); +static LIST_HEAD(opp_tables); /* Lock to allow exclusive modification to the device and opp lists */ -DEFINE_MUTEX(dev_opp_list_lock); +DEFINE_MUTEX(opp_table_lock); #define opp_rcu_lockdep_assert() \ do { \ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ - !lockdep_is_held(&dev_opp_list_lock), \ - "Missing rcu_read_lock() or " \ - "dev_opp_list_lock protection"); \ + !lockdep_is_held(&opp_table_lock), \ + "Missing rcu_read_lock() or " \ + "opp_table_lock protection"); \ } while (0) -static struct device_list_opp *_find_list_dev(const struct device *dev, - struct device_opp *dev_opp) +static struct opp_device *_find_opp_dev(const struct device *dev, + struct opp_table *opp_table) { - struct device_list_opp *list_dev; + struct opp_device *opp_dev; - list_for_each_entry(list_dev, &dev_opp->dev_list, node) - if (list_dev->dev == dev) - return list_dev; + list_for_each_entry(opp_dev, &opp_table->dev_list, node) + if (opp_dev->dev == dev) + return opp_dev; return NULL; } -static struct device_opp *_managed_opp(const struct device_node *np) +static struct opp_table *_managed_opp(const struct device_node *np) { - struct device_opp *dev_opp; + struct opp_table *opp_table; - list_for_each_entry_rcu(dev_opp, &dev_opp_list, node) { - if (dev_opp->np == np) { + list_for_each_entry_rcu(opp_table, &opp_tables, node) { + if (opp_table->np == np) { /* * Multiple devices can point to the same OPP table and * so will have same node-pointer, np. @@ -66,7 +66,7 @@ static struct device_opp *_managed_opp(const struct device_node *np) * But the OPPs will be considered as shared only if the * OPP table contains a "opp-shared" property. */ - return dev_opp->shared_opp ? dev_opp : NULL; + return opp_table->shared_opp ? opp_table : NULL; } } @@ -74,24 +74,24 @@ static struct device_opp *_managed_opp(const struct device_node *np) } /** - * _find_device_opp() - find device_opp struct using device pointer - * @dev: device pointer used to lookup device OPPs + * _find_opp_table() - find opp_table struct using device pointer + * @dev: device pointer used to lookup OPP table * - * Search list of device OPPs for one containing matching device. Does a RCU - * reader operation to grab the pointer needed. + * Search OPP table for one containing matching device. Does a RCU reader + * operation to grab the pointer needed. * - * Return: pointer to 'struct device_opp' if found, otherwise -ENODEV or + * Return: pointer to 'struct opp_table' if found, otherwise -ENODEV or * -EINVAL based on type of error. * * Locking: For readers, this function must be called under rcu_read_lock(). - * device_opp is a RCU protected pointer, which means that device_opp is valid + * opp_table is a RCU protected pointer, which means that opp_table is valid * as long as we are under RCU lock. * - * For Writers, this function must be called with dev_opp_list_lock held. + * For Writers, this function must be called with opp_table_lock held. */ -struct device_opp *_find_device_opp(struct device *dev) +struct opp_table *_find_opp_table(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; opp_rcu_lockdep_assert(); @@ -100,9 +100,9 @@ struct device_opp *_find_device_opp(struct device *dev) return ERR_PTR(-EINVAL); } - list_for_each_entry_rcu(dev_opp, &dev_opp_list, node) - if (_find_list_dev(dev, dev_opp)) - return dev_opp; + list_for_each_entry_rcu(opp_table, &opp_tables, node) + if (_find_opp_dev(dev, opp_table)) + return opp_table; return ERR_PTR(-ENODEV); } @@ -215,16 +215,16 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_is_turbo); */ unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; unsigned long clock_latency_ns; rcu_read_lock(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) clock_latency_ns = 0; else - clock_latency_ns = dev_opp->clock_latency_ns_max; + clock_latency_ns = opp_table->clock_latency_ns_max; rcu_read_unlock(); return clock_latency_ns; @@ -241,7 +241,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency); */ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *opp; struct regulator *reg; unsigned long latency_ns = 0; @@ -250,13 +250,13 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) rcu_read_lock(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { rcu_read_unlock(); return 0; } - reg = dev_opp->regulator; + reg = opp_table->regulator; if (IS_ERR(reg)) { /* Regulator may not be required for device */ if (reg) @@ -266,7 +266,7 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) return 0; } - list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(opp, &opp_table->opp_list, node) { if (!opp->available) continue; @@ -279,7 +279,7 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) rcu_read_unlock(); /* - * The caller needs to ensure that dev_opp (and hence the regulator) + * The caller needs to ensure that opp_table (and hence the regulator) * isn't freed, while we are executing this routine. */ ret = regulator_set_voltage_time(reg, min_uV, max_uV); @@ -322,21 +322,21 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency); */ struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; opp_rcu_lockdep_assert(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp) || !dev_opp->suspend_opp || - !dev_opp->suspend_opp->available) + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table) || !opp_table->suspend_opp || + !opp_table->suspend_opp->available) return NULL; - return dev_opp->suspend_opp; + return opp_table->suspend_opp; } EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp); /** - * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list + * dev_pm_opp_get_opp_count() - Get number of opps available in the opp table * @dev: device for which we do this operation * * Return: This function returns the number of available opps if there are any, @@ -346,21 +346,21 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp); */ int dev_pm_opp_get_opp_count(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *temp_opp; int count = 0; rcu_read_lock(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - count = PTR_ERR(dev_opp); - dev_err(dev, "%s: device OPP not found (%d)\n", + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + count = PTR_ERR(opp_table); + dev_err(dev, "%s: OPP table not found (%d)\n", __func__, count); goto out_unlock; } - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { if (temp_opp->available) count++; } @@ -377,7 +377,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count); * @freq: frequency to search for * @available: true/false - match for available opp * - * Return: Searches for exact match in the opp list and returns pointer to the + * Return: Searches for exact match in the opp table and returns pointer to the * matching opp if found, else returns ERR_PTR in case of error and should * be handled using IS_ERR. Error return values can be: * EINVAL: for bad pointer @@ -401,19 +401,20 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); opp_rcu_lockdep_assert(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - int r = PTR_ERR(dev_opp); - dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r); + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + int r = PTR_ERR(opp_table); + + dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r); return ERR_PTR(r); } - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { if (temp_opp->available == available && temp_opp->rate == freq) { opp = temp_opp; @@ -449,7 +450,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact); struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); opp_rcu_lockdep_assert(); @@ -459,11 +460,11 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, return ERR_PTR(-EINVAL); } - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) - return ERR_CAST(dev_opp); + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { if (temp_opp->available && temp_opp->rate >= *freq) { opp = temp_opp; *freq = opp->rate; @@ -499,7 +500,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil); struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); opp_rcu_lockdep_assert(); @@ -509,11 +510,11 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, return ERR_PTR(-EINVAL); } - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) - return ERR_CAST(dev_opp); + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { if (temp_opp->available) { /* go to the next node, before choosing prev */ if (temp_opp->rate > *freq) @@ -530,24 +531,24 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); /* - * The caller needs to ensure that device_opp (and hence the clk) isn't freed, + * The caller needs to ensure that opp_table (and hence the clk) isn't freed, * while clk returned here is used. */ static struct clk *_get_opp_clk(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct clk *clk; rcu_read_lock(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { dev_err(dev, "%s: device opp doesn't exist\n", __func__); - clk = ERR_CAST(dev_opp); + clk = ERR_CAST(opp_table); goto unlock; } - clk = dev_opp->clk; + clk = opp_table->clk; if (IS_ERR(clk)) dev_err(dev, "%s: No clock available for the device\n", __func__); @@ -594,7 +595,7 @@ static int _set_opp_voltage(struct device *dev, struct regulator *reg, */ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *old_opp, *opp; struct regulator *reg; struct clk *clk; @@ -628,11 +629,11 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) rcu_read_lock(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { dev_err(dev, "%s: device opp doesn't exist\n", __func__); rcu_read_unlock(); - return PTR_ERR(dev_opp); + return PTR_ERR(opp_table); } old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq); @@ -658,7 +659,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) u_volt_min = opp->u_volt_min; u_volt_max = opp->u_volt_max; - reg = dev_opp->regulator; + reg = opp_table->regulator; rcu_read_unlock(); @@ -705,81 +706,81 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) } EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate); -/* List-dev Helpers */ -static void _kfree_list_dev_rcu(struct rcu_head *head) +/* OPP-dev Helpers */ +static void _kfree_opp_dev_rcu(struct rcu_head *head) { - struct device_list_opp *list_dev; + struct opp_device *opp_dev; - list_dev = container_of(head, struct device_list_opp, rcu_head); - kfree_rcu(list_dev, rcu_head); + opp_dev = container_of(head, struct opp_device, rcu_head); + kfree_rcu(opp_dev, rcu_head); } -static void _remove_list_dev(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static void _remove_opp_dev(struct opp_device *opp_dev, + struct opp_table *opp_table) { - opp_debug_unregister(list_dev, dev_opp); - list_del(&list_dev->node); - call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head, - _kfree_list_dev_rcu); + opp_debug_unregister(opp_dev, opp_table); + list_del(&opp_dev->node); + call_srcu(&opp_table->srcu_head.srcu, &opp_dev->rcu_head, + _kfree_opp_dev_rcu); } -struct device_list_opp *_add_list_dev(const struct device *dev, - struct device_opp *dev_opp) +struct opp_device *_add_opp_dev(const struct device *dev, + struct opp_table *opp_table) { - struct device_list_opp *list_dev; + struct opp_device *opp_dev; int ret; - list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL); - if (!list_dev) + opp_dev = kzalloc(sizeof(*opp_dev), GFP_KERNEL); + if (!opp_dev) return NULL; - /* Initialize list-dev */ - list_dev->dev = dev; - list_add_rcu(&list_dev->node, &dev_opp->dev_list); + /* Initialize opp-dev */ + opp_dev->dev = dev; + list_add_rcu(&opp_dev->node, &opp_table->dev_list); - /* Create debugfs entries for the dev_opp */ - ret = opp_debug_register(list_dev, dev_opp); + /* Create debugfs entries for the opp_table */ + ret = opp_debug_register(opp_dev, opp_table); if (ret) dev_err(dev, "%s: Failed to register opp debugfs (%d)\n", __func__, ret); - return list_dev; + return opp_dev; } /** - * _add_device_opp() - Find device OPP table or allocate a new one + * _add_opp_table() - Find OPP table or allocate a new one * @dev: device for which we do this operation * * It tries to find an existing table first, if it couldn't find one, it * allocates a new OPP table and returns that. * - * Return: valid device_opp pointer if success, else NULL. + * Return: valid opp_table pointer if success, else NULL. */ -static struct device_opp *_add_device_opp(struct device *dev) +static struct opp_table *_add_opp_table(struct device *dev) { - struct device_opp *dev_opp; - struct device_list_opp *list_dev; + struct opp_table *opp_table; + struct opp_device *opp_dev; struct device_node *np; int ret; - /* Check for existing list for 'dev' first */ - dev_opp = _find_device_opp(dev); - if (!IS_ERR(dev_opp)) - return dev_opp; + /* Check for existing table for 'dev' first */ + opp_table = _find_opp_table(dev); + if (!IS_ERR(opp_table)) + return opp_table; /* - * Allocate a new device OPP table. In the infrequent case where a new + * Allocate a new OPP table. In the infrequent case where a new * device is needed to be added, we pay this penalty. */ - dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL); - if (!dev_opp) + opp_table = kzalloc(sizeof(*opp_table), GFP_KERNEL); + if (!opp_table) return NULL; - INIT_LIST_HEAD(&dev_opp->dev_list); + INIT_LIST_HEAD(&opp_table->dev_list); - list_dev = _add_list_dev(dev, dev_opp); - if (!list_dev) { - kfree(dev_opp); + opp_dev = _add_opp_dev(dev, opp_table); + if (!opp_dev) { + kfree(opp_table); return NULL; } @@ -792,79 +793,80 @@ static struct device_opp *_add_device_opp(struct device *dev) u32 val; if (!of_property_read_u32(np, "clock-latency", &val)) - dev_opp->clock_latency_ns_max = val; + opp_table->clock_latency_ns_max = val; of_property_read_u32(np, "voltage-tolerance", - &dev_opp->voltage_tolerance_v1); + &opp_table->voltage_tolerance_v1); of_node_put(np); } /* Set regulator to a non-NULL error value */ - dev_opp->regulator = ERR_PTR(-ENXIO); + opp_table->regulator = ERR_PTR(-ENXIO); /* Find clk for the device */ - dev_opp->clk = clk_get(dev, NULL); - if (IS_ERR(dev_opp->clk)) { - ret = PTR_ERR(dev_opp->clk); + opp_table->clk = clk_get(dev, NULL); + if (IS_ERR(opp_table->clk)) { + ret = PTR_ERR(opp_table->clk); if (ret != -EPROBE_DEFER) dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret); } - srcu_init_notifier_head(&dev_opp->srcu_head); - INIT_LIST_HEAD(&dev_opp->opp_list); + srcu_init_notifier_head(&opp_table->srcu_head); + INIT_LIST_HEAD(&opp_table->opp_list); - /* Secure the device list modification */ - list_add_rcu(&dev_opp->node, &dev_opp_list); - return dev_opp; + /* Secure the device table modification */ + list_add_rcu(&opp_table->node, &opp_tables); + return opp_table; } /** - * _kfree_device_rcu() - Free device_opp RCU handler + * _kfree_device_rcu() - Free opp_table RCU handler * @head: RCU head */ static void _kfree_device_rcu(struct rcu_head *head) { - struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head); + struct opp_table *opp_table = container_of(head, struct opp_table, + rcu_head); - kfree_rcu(device_opp, rcu_head); + kfree_rcu(opp_table, rcu_head); } /** - * _remove_device_opp() - Removes a device OPP table - * @dev_opp: device OPP table to be removed. + * _remove_opp_table() - Removes a OPP table + * @opp_table: OPP table to be removed. * - * Removes/frees device OPP table it it doesn't contain any OPPs. + * Removes/frees OPP table if it doesn't contain any OPPs. */ -static void _remove_device_opp(struct device_opp *dev_opp) +static void _remove_opp_table(struct opp_table *opp_table) { - struct device_list_opp *list_dev; + struct opp_device *opp_dev; - if (!list_empty(&dev_opp->opp_list)) + if (!list_empty(&opp_table->opp_list)) return; - if (dev_opp->supported_hw) + if (opp_table->supported_hw) return; - if (dev_opp->prop_name) + if (opp_table->prop_name) return; - if (!IS_ERR(dev_opp->regulator)) + if (!IS_ERR(opp_table->regulator)) return; /* Release clk */ - if (!IS_ERR(dev_opp->clk)) - clk_put(dev_opp->clk); + if (!IS_ERR(opp_table->clk)) + clk_put(opp_table->clk); - list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp, - node); + opp_dev = list_first_entry(&opp_table->dev_list, struct opp_device, + node); - _remove_list_dev(list_dev, dev_opp); + _remove_opp_dev(opp_dev, opp_table); /* dev_list must be empty now */ - WARN_ON(!list_empty(&dev_opp->dev_list)); + WARN_ON(!list_empty(&opp_table->dev_list)); - list_del_rcu(&dev_opp->node); - call_srcu(&dev_opp->srcu_head.srcu, &dev_opp->rcu_head, + list_del_rcu(&opp_table->node); + call_srcu(&opp_table->srcu_head.srcu, &opp_table->rcu_head, _kfree_device_rcu); } @@ -881,17 +883,17 @@ static void _kfree_opp_rcu(struct rcu_head *head) /** * _opp_remove() - Remove an OPP from a table definition - * @dev_opp: points back to the device_opp struct this opp belongs to + * @opp_table: points back to the opp_table struct this opp belongs to * @opp: pointer to the OPP to remove * @notify: OPP_EVENT_REMOVE notification should be sent or not * - * This function removes an opp definition from the opp list. + * This function removes an opp definition from the opp table. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * It is assumed that the caller holds required mutex for an RCU updater * strategy. */ -static void _opp_remove(struct device_opp *dev_opp, +static void _opp_remove(struct opp_table *opp_table, struct dev_pm_opp *opp, bool notify) { /* @@ -899,22 +901,23 @@ static void _opp_remove(struct device_opp *dev_opp, * frequency/voltage list. */ if (notify) - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp); + srcu_notifier_call_chain(&opp_table->srcu_head, + OPP_EVENT_REMOVE, opp); opp_debug_remove_one(opp); list_del_rcu(&opp->node); - call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); + call_srcu(&opp_table->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); - _remove_device_opp(dev_opp); + _remove_opp_table(opp_table); } /** - * dev_pm_opp_remove() - Remove an OPP from OPP list + * dev_pm_opp_remove() - Remove an OPP from OPP table * @dev: device for which we do this operation * @freq: OPP to remove with matching 'freq' * - * This function removes an opp from the opp list. + * This function removes an opp from the opp table. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -923,17 +926,17 @@ static void _opp_remove(struct device_opp *dev_opp, void dev_pm_opp_remove(struct device *dev, unsigned long freq) { struct dev_pm_opp *opp; - struct device_opp *dev_opp; + struct opp_table *opp_table; bool found = false; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) goto unlock; - list_for_each_entry(opp, &dev_opp->opp_list, node) { + list_for_each_entry(opp, &opp_table->opp_list, node) { if (opp->rate == freq) { found = true; break; @@ -946,14 +949,14 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq) goto unlock; } - _opp_remove(dev_opp, opp, true); + _opp_remove(opp_table, opp, true); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_remove); static struct dev_pm_opp *_allocate_opp(struct device *dev, - struct device_opp **dev_opp) + struct opp_table **opp_table) { struct dev_pm_opp *opp; @@ -964,8 +967,8 @@ static struct dev_pm_opp *_allocate_opp(struct device *dev, INIT_LIST_HEAD(&opp->node); - *dev_opp = _add_device_opp(dev); - if (!*dev_opp) { + *opp_table = _add_opp_table(dev); + if (!*opp_table) { kfree(opp); return NULL; } @@ -974,9 +977,9 @@ static struct dev_pm_opp *_allocate_opp(struct device *dev, } static bool _opp_supported_by_regulators(struct dev_pm_opp *opp, - struct device_opp *dev_opp) + struct opp_table *opp_table) { - struct regulator *reg = dev_opp->regulator; + struct regulator *reg = opp_table->regulator; if (!IS_ERR(reg) && !regulator_is_supported_voltage(reg, opp->u_volt_min, @@ -990,21 +993,21 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp, } static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, - struct device_opp *dev_opp) + struct opp_table *opp_table) { struct dev_pm_opp *opp; - struct list_head *head = &dev_opp->opp_list; + struct list_head *head = &opp_table->opp_list; int ret; /* * Insert new OPP in order of increasing frequency and discard if * already present. * - * Need to use &dev_opp->opp_list in the condition part of the 'for' + * Need to use &opp_table->opp_list in the condition part of the 'for' * loop, don't replace it with head otherwise it will become an infinite * loop. */ - list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(opp, &opp_table->opp_list, node) { if (new_opp->rate > opp->rate) { head = &opp->node; continue; @@ -1022,15 +1025,15 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, 0 : -EEXIST; } - new_opp->dev_opp = dev_opp; + new_opp->opp_table = opp_table; list_add_rcu(&new_opp->node, head); - ret = opp_debug_create_one(new_opp, dev_opp); + ret = opp_debug_create_one(new_opp, opp_table); if (ret) dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n", __func__, ret); - if (!_opp_supported_by_regulators(new_opp, dev_opp)) { + if (!_opp_supported_by_regulators(new_opp, opp_table)) { new_opp->available = false; dev_warn(dev, "%s: OPP not supported by regulators (%lu)\n", __func__, new_opp->rate); @@ -1046,14 +1049,14 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, * @u_volt: Voltage in uVolts for this OPP * @dynamic: Dynamically added OPPs. * - * This function adds an opp definition to the opp list and returns status. + * This function adds an opp definition to the opp table and returns status. * The opp is made available by default and it can be controlled using * dev_pm_opp_enable/disable functions and may be removed by dev_pm_opp_remove. * * NOTE: "dynamic" parameter impacts OPPs added by the dev_pm_opp_of_add_table * and freed by dev_pm_opp_of_remove_table. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1069,15 +1072,15 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, bool dynamic) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *new_opp; unsigned long tol; int ret; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - new_opp = _allocate_opp(dev, &dev_opp); + new_opp = _allocate_opp(dev, &opp_table); if (!new_opp) { ret = -ENOMEM; goto unlock; @@ -1085,36 +1088,36 @@ static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, /* populate the opp table */ new_opp->rate = freq; - tol = u_volt * dev_opp->voltage_tolerance_v1 / 100; + tol = u_volt * opp_table->voltage_tolerance_v1 / 100; new_opp->u_volt = u_volt; new_opp->u_volt_min = u_volt - tol; new_opp->u_volt_max = u_volt + tol; new_opp->available = true; new_opp->dynamic = dynamic; - ret = _opp_add(dev, new_opp, dev_opp); + ret = _opp_add(dev, new_opp, opp_table); if (ret) goto free_opp; - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); /* * Notify the changes in the availability of the operable * frequency/voltage list. */ - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); + srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp); return 0; free_opp: - _opp_remove(dev_opp, new_opp, false); + _opp_remove(opp_table, new_opp, false); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } /* TODO: Support multiple regulators */ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, - struct device_opp *dev_opp) + struct opp_table *opp_table) { u32 microvolt[3] = {0}; u32 val; @@ -1123,9 +1126,9 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, char name[NAME_MAX]; /* Search for "opp-microvolt-" */ - if (dev_opp->prop_name) { + if (opp_table->prop_name) { snprintf(name, sizeof(name), "opp-microvolt-%s", - dev_opp->prop_name); + opp_table->prop_name); prop = of_find_property(opp->np, name, NULL); } @@ -1171,9 +1174,9 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, /* Search for "opp-microamp-" */ prop = NULL; - if (dev_opp->prop_name) { + if (opp_table->prop_name) { snprintf(name, sizeof(name), "opp-microamp-%s", - dev_opp->prop_name); + opp_table->prop_name); prop = of_find_property(opp->np, name, NULL); } @@ -1200,7 +1203,7 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, * OPPs, which are available for those versions, based on its 'opp-supported-hw' * property. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1209,44 +1212,44 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count) { - struct device_opp *dev_opp; + struct opp_table *opp_table; int ret = 0; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - dev_opp = _add_device_opp(dev); - if (!dev_opp) { + opp_table = _add_opp_table(dev); + if (!opp_table) { ret = -ENOMEM; goto unlock; } - /* Make sure there are no concurrent readers while updating dev_opp */ - WARN_ON(!list_empty(&dev_opp->opp_list)); + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); - /* Do we already have a version hierarchy associated with dev_opp? */ - if (dev_opp->supported_hw) { + /* Do we already have a version hierarchy associated with opp_table? */ + if (opp_table->supported_hw) { dev_err(dev, "%s: Already have supported hardware list\n", __func__); ret = -EBUSY; goto err; } - dev_opp->supported_hw = kmemdup(versions, count * sizeof(*versions), + opp_table->supported_hw = kmemdup(versions, count * sizeof(*versions), GFP_KERNEL); - if (!dev_opp->supported_hw) { + if (!opp_table->supported_hw) { ret = -ENOMEM; goto err; } - dev_opp->supported_hw_count = count; - mutex_unlock(&dev_opp_list_lock); + opp_table->supported_hw_count = count; + mutex_unlock(&opp_table_lock); return 0; err: - _remove_device_opp(dev_opp); + _remove_opp_table(opp_table); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } @@ -1257,10 +1260,10 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw); * @dev: Device for which supported-hw has to be put. * * This is required only for the V2 bindings, and is called for a matching - * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure + * dev_pm_opp_set_supported_hw(). Until this is called, the opp_table structure * will not be freed. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1268,36 +1271,37 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw); */ void dev_pm_opp_put_supported_hw(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - /* Check for existing list for 'dev' first */ - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); + /* Check for existing table for 'dev' first */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "Failed to find opp_table: %ld\n", + PTR_ERR(opp_table)); goto unlock; } - /* Make sure there are no concurrent readers while updating dev_opp */ - WARN_ON(!list_empty(&dev_opp->opp_list)); + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); - if (!dev_opp->supported_hw) { + if (!opp_table->supported_hw) { dev_err(dev, "%s: Doesn't have supported hardware list\n", __func__); goto unlock; } - kfree(dev_opp->supported_hw); - dev_opp->supported_hw = NULL; - dev_opp->supported_hw_count = 0; + kfree(opp_table->supported_hw); + opp_table->supported_hw = NULL; + opp_table->supported_hw_count = 0; - /* Try freeing device_opp if this was the last blocking resource */ - _remove_device_opp(dev_opp); + /* Try freeing opp_table if this was the last blocking resource */ + _remove_opp_table(opp_table); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); @@ -1311,7 +1315,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); * which the extension will apply are opp-microvolt and opp-microamp. OPP core * should postfix the property name with - while looking for them. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1319,42 +1323,42 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); */ int dev_pm_opp_set_prop_name(struct device *dev, const char *name) { - struct device_opp *dev_opp; + struct opp_table *opp_table; int ret = 0; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - dev_opp = _add_device_opp(dev); - if (!dev_opp) { + opp_table = _add_opp_table(dev); + if (!opp_table) { ret = -ENOMEM; goto unlock; } - /* Make sure there are no concurrent readers while updating dev_opp */ - WARN_ON(!list_empty(&dev_opp->opp_list)); + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); - /* Do we already have a prop-name associated with dev_opp? */ - if (dev_opp->prop_name) { + /* Do we already have a prop-name associated with opp_table? */ + if (opp_table->prop_name) { dev_err(dev, "%s: Already have prop-name %s\n", __func__, - dev_opp->prop_name); + opp_table->prop_name); ret = -EBUSY; goto err; } - dev_opp->prop_name = kstrdup(name, GFP_KERNEL); - if (!dev_opp->prop_name) { + opp_table->prop_name = kstrdup(name, GFP_KERNEL); + if (!opp_table->prop_name) { ret = -ENOMEM; goto err; } - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return 0; err: - _remove_device_opp(dev_opp); + _remove_opp_table(opp_table); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } @@ -1365,10 +1369,10 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name); * @dev: Device for which the prop-name has to be put. * * This is required only for the V2 bindings, and is called for a matching - * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure + * dev_pm_opp_set_prop_name(). Until this is called, the opp_table structure * will not be freed. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1376,34 +1380,35 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name); */ void dev_pm_opp_put_prop_name(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - /* Check for existing list for 'dev' first */ - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); + /* Check for existing table for 'dev' first */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "Failed to find opp_table: %ld\n", + PTR_ERR(opp_table)); goto unlock; } - /* Make sure there are no concurrent readers while updating dev_opp */ - WARN_ON(!list_empty(&dev_opp->opp_list)); + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); - if (!dev_opp->prop_name) { + if (!opp_table->prop_name) { dev_err(dev, "%s: Doesn't have a prop-name\n", __func__); goto unlock; } - kfree(dev_opp->prop_name); - dev_opp->prop_name = NULL; + kfree(opp_table->prop_name); + opp_table->prop_name = NULL; - /* Try freeing device_opp if this was the last blocking resource */ - _remove_device_opp(dev_opp); + /* Try freeing opp_table if this was the last blocking resource */ + _remove_opp_table(opp_table); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); @@ -1417,7 +1422,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); * * This must be called before any OPPs are initialized for the device. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1425,26 +1430,26 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); */ int dev_pm_opp_set_regulator(struct device *dev, const char *name) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct regulator *reg; int ret; - mutex_lock(&dev_opp_list_lock); + mutex_lock(&opp_table_lock); - dev_opp = _add_device_opp(dev); - if (!dev_opp) { + opp_table = _add_opp_table(dev); + if (!opp_table) { ret = -ENOMEM; goto unlock; } /* This should be called before OPPs are initialized */ - if (WARN_ON(!list_empty(&dev_opp->opp_list))) { + if (WARN_ON(!list_empty(&opp_table->opp_list))) { ret = -EBUSY; goto err; } /* Already have a regulator set */ - if (WARN_ON(!IS_ERR(dev_opp->regulator))) { + if (WARN_ON(!IS_ERR(opp_table->regulator))) { ret = -EBUSY; goto err; } @@ -1458,15 +1463,15 @@ int dev_pm_opp_set_regulator(struct device *dev, const char *name) goto err; } - dev_opp->regulator = reg; + opp_table->regulator = reg; - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return 0; err: - _remove_device_opp(dev_opp); + _remove_opp_table(opp_table); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } @@ -1476,7 +1481,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator); * dev_pm_opp_put_regulator() - Releases resources blocked for regulator * @dev: Device for which regulator was set. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1484,44 +1489,45 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator); */ void dev_pm_opp_put_regulator(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; - mutex_lock(&dev_opp_list_lock); + mutex_lock(&opp_table_lock); - /* Check for existing list for 'dev' first */ - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); + /* Check for existing table for 'dev' first */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "Failed to find opp_table: %ld\n", + PTR_ERR(opp_table)); goto unlock; } - if (IS_ERR(dev_opp->regulator)) { + if (IS_ERR(opp_table->regulator)) { dev_err(dev, "%s: Doesn't have regulator set\n", __func__); goto unlock; } - /* Make sure there are no concurrent readers while updating dev_opp */ - WARN_ON(!list_empty(&dev_opp->opp_list)); + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); - regulator_put(dev_opp->regulator); - dev_opp->regulator = ERR_PTR(-ENXIO); + regulator_put(opp_table->regulator); + opp_table->regulator = ERR_PTR(-ENXIO); - /* Try freeing device_opp if this was the last blocking resource */ - _remove_device_opp(dev_opp); + /* Try freeing opp_table if this was the last blocking resource */ + _remove_opp_table(opp_table); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator); -static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, +static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, struct device_node *np) { - unsigned int count = dev_opp->supported_hw_count; + unsigned int count = opp_table->supported_hw_count; u32 version; int ret; - if (!dev_opp->supported_hw) + if (!opp_table->supported_hw) return true; while (count--) { @@ -1534,7 +1540,7 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, } /* Both of these are bitwise masks of the versions */ - if (!(version & dev_opp->supported_hw[count])) + if (!(version & opp_table->supported_hw[count])) return false; } @@ -1546,11 +1552,11 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, * @dev: device for which we do this operation * @np: device node * - * This function adds an opp definition to the opp list and returns status. The + * This function adds an opp definition to the opp table and returns status. The * opp can be controlled using dev_pm_opp_enable/disable functions and may be * removed by dev_pm_opp_remove. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1566,16 +1572,16 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, */ static int _opp_add_static_v2(struct device *dev, struct device_node *np) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *new_opp; u64 rate; u32 val; int ret; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - new_opp = _allocate_opp(dev, &dev_opp); + new_opp = _allocate_opp(dev, &opp_table); if (!new_opp) { ret = -ENOMEM; goto unlock; @@ -1588,7 +1594,7 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) } /* Check if the OPP supports hardware's hierarchy of versions or not */ - if (!_opp_is_supported(dev, dev_opp, np)) { + if (!_opp_is_supported(dev, opp_table, np)) { dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate); goto free_opp; } @@ -1608,30 +1614,30 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) if (!of_property_read_u32(np, "clock-latency-ns", &val)) new_opp->clock_latency_ns = val; - ret = opp_parse_supplies(new_opp, dev, dev_opp); + ret = opp_parse_supplies(new_opp, dev, opp_table); if (ret) goto free_opp; - ret = _opp_add(dev, new_opp, dev_opp); + ret = _opp_add(dev, new_opp, opp_table); if (ret) goto free_opp; /* OPP to select on device suspend */ if (of_property_read_bool(np, "opp-suspend")) { - if (dev_opp->suspend_opp) { + if (opp_table->suspend_opp) { dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n", - __func__, dev_opp->suspend_opp->rate, + __func__, opp_table->suspend_opp->rate, new_opp->rate); } else { new_opp->suspend = true; - dev_opp->suspend_opp = new_opp; + opp_table->suspend_opp = new_opp; } } - if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max) - dev_opp->clock_latency_ns_max = new_opp->clock_latency_ns; + if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max) + opp_table->clock_latency_ns_max = new_opp->clock_latency_ns; - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n", __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt, @@ -1642,13 +1648,13 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) * Notify the changes in the availability of the operable * frequency/voltage list. */ - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); + srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp); return 0; free_opp: - _opp_remove(dev_opp, new_opp, false); + _opp_remove(opp_table, new_opp, false); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } @@ -1658,11 +1664,11 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) * @freq: Frequency in Hz for this OPP * @u_volt: Voltage in uVolts for this OPP * - * This function adds an opp definition to the opp list and returns status. + * This function adds an opp definition to the opp table and returns status. * The opp is made available by default and it can be controlled using * dev_pm_opp_enable/disable functions. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1694,7 +1700,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add); * copy operation, returns 0 if no modification was done OR modification was * successful. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks to * keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1703,7 +1709,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add); static int _opp_set_availability(struct device *dev, unsigned long freq, bool availability_req) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV); int r = 0; @@ -1712,18 +1718,18 @@ static int _opp_set_availability(struct device *dev, unsigned long freq, if (!new_opp) return -ENOMEM; - mutex_lock(&dev_opp_list_lock); + mutex_lock(&opp_table_lock); - /* Find the device_opp */ - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - r = PTR_ERR(dev_opp); + /* Find the opp_table */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + r = PTR_ERR(opp_table); dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r); goto unlock; } /* Do we have the frequency? */ - list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) { + list_for_each_entry(tmp_opp, &opp_table->opp_list, node) { if (tmp_opp->rate == freq) { opp = tmp_opp; break; @@ -1744,21 +1750,21 @@ static int _opp_set_availability(struct device *dev, unsigned long freq, new_opp->available = availability_req; list_replace_rcu(&opp->node, &new_opp->node); - mutex_unlock(&dev_opp_list_lock); - call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); + mutex_unlock(&opp_table_lock); + call_srcu(&opp_table->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); /* Notify the change of the OPP availability */ if (availability_req) - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ENABLE, - new_opp); + srcu_notifier_call_chain(&opp_table->srcu_head, + OPP_EVENT_ENABLE, new_opp); else - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_DISABLE, - new_opp); + srcu_notifier_call_chain(&opp_table->srcu_head, + OPP_EVENT_DISABLE, new_opp); return 0; unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); kfree(new_opp); return r; } @@ -1772,7 +1778,7 @@ static int _opp_set_availability(struct device *dev, unsigned long freq, * corresponding error value. It is meant to be used for users an OPP available * after being temporarily made unavailable with dev_pm_opp_disable. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function indirectly uses RCU and mutex locks to keep the * integrity of the internal data structures. Callers should ensure that * this function is *NOT* called under RCU protection or in contexts where @@ -1798,7 +1804,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_enable); * control by users to make this OPP not available until the circumstances are * right to make it available again (with a call to dev_pm_opp_enable). * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function indirectly uses RCU and mutex locks to keep the * integrity of the internal data structures. Callers should ensure that * this function is *NOT* called under RCU protection or in contexts where @@ -1816,26 +1822,26 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_disable); /** * dev_pm_opp_get_notifier() - find notifier_head of the device with opp - * @dev: device pointer used to lookup device OPPs. + * @dev: device pointer used to lookup OPP table. * * Return: pointer to notifier head if found, otherwise -ENODEV or * -EINVAL based on type of error casted as pointer. value must be checked * with IS_ERR to determine valid pointer or error result. * - * Locking: This function must be called under rcu_read_lock(). dev_opp is a RCU - * protected pointer. The reason for the same is that the opp pointer which is - * returned will remain valid for use with opp_get_{voltage, freq} only while + * Locking: This function must be called under rcu_read_lock(). opp_table is a + * RCU protected pointer. The reason for the same is that the opp pointer which + * is returned will remain valid for use with opp_get_{voltage, freq} only while * under the locked area. The pointer returned must be used prior to unlocking * with rcu_read_unlock() to maintain the integrity of the pointer. */ struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev) { - struct device_opp *dev_opp = _find_device_opp(dev); + struct opp_table *opp_table = _find_opp_table(dev); - if (IS_ERR(dev_opp)) - return ERR_CAST(dev_opp); /* matching type */ + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); /* matching type */ - return &dev_opp->srcu_head; + return &opp_table->srcu_head; } EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier); @@ -1843,11 +1849,11 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier); /** * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT * entries - * @dev: device pointer used to lookup device OPPs. + * @dev: device pointer used to lookup OPP table. * * Free OPPs created using static entries present in DT. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function indirectly uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1855,38 +1861,38 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier); */ void dev_pm_opp_of_remove_table(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *opp, *tmp; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - /* Check for existing list for 'dev' */ - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - int error = PTR_ERR(dev_opp); + /* Check for existing table for 'dev' */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + int error = PTR_ERR(opp_table); if (error != -ENODEV) - WARN(1, "%s: dev_opp: %d\n", + WARN(1, "%s: opp_table: %d\n", IS_ERR_OR_NULL(dev) ? "Invalid device" : dev_name(dev), error); goto unlock; } - /* Find if dev_opp manages a single device */ - if (list_is_singular(&dev_opp->dev_list)) { + /* Find if opp_table manages a single device */ + if (list_is_singular(&opp_table->dev_list)) { /* Free static OPPs */ - list_for_each_entry_safe(opp, tmp, &dev_opp->opp_list, node) { + list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) { if (!opp->dynamic) - _opp_remove(dev_opp, opp, true); + _opp_remove(opp_table, opp, true); } } else { - _remove_list_dev(_find_list_dev(dev, dev_opp), dev_opp); + _remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table); } unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); @@ -1907,22 +1913,22 @@ struct device_node *_of_get_opp_desc_node(struct device *dev) static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) { struct device_node *np; - struct device_opp *dev_opp; + struct opp_table *opp_table; int ret = 0, count = 0; - mutex_lock(&dev_opp_list_lock); + mutex_lock(&opp_table_lock); - dev_opp = _managed_opp(opp_np); - if (dev_opp) { + opp_table = _managed_opp(opp_np); + if (opp_table) { /* OPPs are already managed */ - if (!_add_list_dev(dev, dev_opp)) + if (!_add_opp_dev(dev, opp_table)) ret = -ENOMEM; - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); - /* We have opp-list node now, iterate over it and add OPPs */ + /* We have opp-table node now, iterate over it and add OPPs */ for_each_available_child_of_node(opp_np, np) { count++; @@ -1938,19 +1944,19 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) if (WARN_ON(!count)) return -ENOENT; - mutex_lock(&dev_opp_list_lock); + mutex_lock(&opp_table_lock); - dev_opp = _find_device_opp(dev); - if (WARN_ON(IS_ERR(dev_opp))) { - ret = PTR_ERR(dev_opp); - mutex_unlock(&dev_opp_list_lock); + opp_table = _find_opp_table(dev); + if (WARN_ON(IS_ERR(opp_table))) { + ret = PTR_ERR(opp_table); + mutex_unlock(&opp_table_lock); goto free_table; } - dev_opp->np = opp_np; - dev_opp->shared_opp = of_property_read_bool(opp_np, "opp-shared"); + opp_table->np = opp_np; + opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared"); - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return 0; @@ -1979,7 +1985,7 @@ static int _of_add_opp_table_v1(struct device *dev) */ nr = prop->length / sizeof(u32); if (nr % 2) { - dev_err(dev, "%s: Invalid OPP list\n", __func__); + dev_err(dev, "%s: Invalid OPP table\n", __func__); return -EINVAL; } @@ -1999,11 +2005,11 @@ static int _of_add_opp_table_v1(struct device *dev) /** * dev_pm_opp_of_add_table() - Initialize opp table from device tree - * @dev: device pointer used to lookup device OPPs. + * @dev: device pointer used to lookup OPP table. * * Register the initial OPP table with the OPP library for given device. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function indirectly uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 9f0c15570f64..ba2bdbd932ef 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -31,7 +31,7 @@ * @table: Cpufreq table returned back to caller * * Generate a cpufreq table for a provided device- this assumes that the - * opp list is already initialized and ready for usage. + * opp table is already initialized and ready for usage. * * This function allocates required memory for the cpufreq table. It is * expected that the caller does the required maintenance such as freeing @@ -44,7 +44,7 @@ * WARNING: It is important for the callers to ensure refreshing their copy of * the table if any of the mentioned functions have been invoked in the interim. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Since we just use the regular accessor functions to access the internal data * structures, we use RCU read lock inside this function. As a result, users of * this function DONOT need to use explicit locks for invoking. @@ -122,15 +122,15 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table); /* Required only for V1 bindings, as v2 can manage it from DT itself */ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) { - struct device_list_opp *list_dev; - struct device_opp *dev_opp; + struct opp_device *opp_dev; + struct opp_table *opp_table; struct device *dev; int cpu, ret = 0; - mutex_lock(&dev_opp_list_lock); + mutex_lock(&opp_table_lock); - dev_opp = _find_device_opp(cpu_dev); - if (IS_ERR(dev_opp)) { + opp_table = _find_opp_table(cpu_dev); + if (IS_ERR(opp_table)) { ret = -EINVAL; goto unlock; } @@ -146,15 +146,15 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) continue; } - list_dev = _add_list_dev(dev, dev_opp); - if (!list_dev) { - dev_err(dev, "%s: failed to add list-dev for cpu%d device\n", + opp_dev = _add_opp_dev(dev, opp_table); + if (!opp_dev) { + dev_err(dev, "%s: failed to add opp-dev for cpu%d device\n", __func__, cpu); continue; } } unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c index ddfe4773e922..ef1ae6b52042 100644 --- a/drivers/base/power/opp/debugfs.c +++ b/drivers/base/power/opp/debugfs.c @@ -34,9 +34,9 @@ void opp_debug_remove_one(struct dev_pm_opp *opp) debugfs_remove_recursive(opp->dentry); } -int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp) +int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table) { - struct dentry *pdentry = dev_opp->dentry; + struct dentry *pdentry = opp_table->dentry; struct dentry *d; char name[25]; /* 20 chars for 64 bit value + 5 (opp:\0) */ @@ -83,52 +83,52 @@ int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp) return 0; } -static int device_opp_debug_create_dir(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static int opp_list_debug_create_dir(struct opp_device *opp_dev, + struct opp_table *opp_table) { - const struct device *dev = list_dev->dev; + const struct device *dev = opp_dev->dev; struct dentry *d; - opp_set_dev_name(dev, dev_opp->dentry_name); + opp_set_dev_name(dev, opp_table->dentry_name); /* Create device specific directory */ - d = debugfs_create_dir(dev_opp->dentry_name, rootdir); + d = debugfs_create_dir(opp_table->dentry_name, rootdir); if (!d) { dev_err(dev, "%s: Failed to create debugfs dir\n", __func__); return -ENOMEM; } - list_dev->dentry = d; - dev_opp->dentry = d; + opp_dev->dentry = d; + opp_table->dentry = d; return 0; } -static int device_opp_debug_create_link(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static int opp_list_debug_create_link(struct opp_device *opp_dev, + struct opp_table *opp_table) { - const struct device *dev = list_dev->dev; + const struct device *dev = opp_dev->dev; char name[NAME_MAX]; struct dentry *d; - opp_set_dev_name(list_dev->dev, name); + opp_set_dev_name(opp_dev->dev, name); /* Create device specific directory link */ - d = debugfs_create_symlink(name, rootdir, dev_opp->dentry_name); + d = debugfs_create_symlink(name, rootdir, opp_table->dentry_name); if (!d) { dev_err(dev, "%s: Failed to create link\n", __func__); return -ENOMEM; } - list_dev->dentry = d; + opp_dev->dentry = d; return 0; } /** * opp_debug_register - add a device opp node to the debugfs 'opp' directory - * @list_dev: list-dev pointer for device - * @dev_opp: the device-opp being added + * @opp_dev: opp-dev pointer for device + * @opp_table: the device-opp being added * * Dynamically adds device specific directory in debugfs 'opp' directory. If the * device-opp is shared with other devices, then links will be created for all @@ -136,73 +136,72 @@ static int device_opp_debug_create_link(struct device_list_opp *list_dev, * * Return: 0 on success, otherwise negative error. */ -int opp_debug_register(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +int opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table) { if (!rootdir) { pr_debug("%s: Uninitialized rootdir\n", __func__); return -EINVAL; } - if (dev_opp->dentry) - return device_opp_debug_create_link(list_dev, dev_opp); + if (opp_table->dentry) + return opp_list_debug_create_link(opp_dev, opp_table); - return device_opp_debug_create_dir(list_dev, dev_opp); + return opp_list_debug_create_dir(opp_dev, opp_table); } -static void opp_migrate_dentry(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static void opp_migrate_dentry(struct opp_device *opp_dev, + struct opp_table *opp_table) { - struct device_list_opp *new_dev; + struct opp_device *new_dev; const struct device *dev; struct dentry *dentry; - /* Look for next list-dev */ - list_for_each_entry(new_dev, &dev_opp->dev_list, node) - if (new_dev != list_dev) + /* Look for next opp-dev */ + list_for_each_entry(new_dev, &opp_table->dev_list, node) + if (new_dev != opp_dev) break; /* new_dev is guaranteed to be valid here */ dev = new_dev->dev; debugfs_remove_recursive(new_dev->dentry); - opp_set_dev_name(dev, dev_opp->dentry_name); + opp_set_dev_name(dev, opp_table->dentry_name); - dentry = debugfs_rename(rootdir, list_dev->dentry, rootdir, - dev_opp->dentry_name); + dentry = debugfs_rename(rootdir, opp_dev->dentry, rootdir, + opp_table->dentry_name); if (!dentry) { dev_err(dev, "%s: Failed to rename link from: %s to %s\n", - __func__, dev_name(list_dev->dev), dev_name(dev)); + __func__, dev_name(opp_dev->dev), dev_name(dev)); return; } new_dev->dentry = dentry; - dev_opp->dentry = dentry; + opp_table->dentry = dentry; } /** * opp_debug_unregister - remove a device opp node from debugfs opp directory - * @list_dev: list-dev pointer for device - * @dev_opp: the device-opp being removed + * @opp_dev: opp-dev pointer for device + * @opp_table: the device-opp being removed * * Dynamically removes device specific directory from debugfs 'opp' directory. */ -void opp_debug_unregister(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +void opp_debug_unregister(struct opp_device *opp_dev, + struct opp_table *opp_table) { - if (list_dev->dentry == dev_opp->dentry) { + if (opp_dev->dentry == opp_table->dentry) { /* Move the real dentry object under another device */ - if (!list_is_singular(&dev_opp->dev_list)) { - opp_migrate_dentry(list_dev, dev_opp); + if (!list_is_singular(&opp_table->dev_list)) { + opp_migrate_dentry(opp_dev, opp_table); goto out; } - dev_opp->dentry = NULL; + opp_table->dentry = NULL; } - debugfs_remove_recursive(list_dev->dentry); + debugfs_remove_recursive(opp_dev->dentry); out: - list_dev->dentry = NULL; + opp_dev->dentry = NULL; } static int __init opp_debug_init(void) diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 4f1bdfc7da03..f67f806fcf3a 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -26,12 +26,12 @@ struct clk; struct regulator; /* Lock to allow exclusive modification to the device and opp lists */ -extern struct mutex dev_opp_list_lock; +extern struct mutex opp_table_lock; /* * Internal data structure organization with the OPP layer library is as * follows: - * dev_opp_list (root) + * opp_tables (root) * |- device 1 (represents voltage domain 1) * | |- opp 1 (availability, freq, voltage) * | |- opp 2 .. @@ -40,18 +40,18 @@ extern struct mutex dev_opp_list_lock; * |- device 2 (represents the next voltage domain) * ... * `- device m (represents mth voltage domain) - * device 1, 2.. are represented by dev_opp structure while each opp + * device 1, 2.. are represented by opp_table structure while each opp * is represented by the opp structure. */ /** * struct dev_pm_opp - Generic OPP description structure - * @node: opp list node. The nodes are maintained throughout the lifetime + * @node: opp table node. The nodes are maintained throughout the lifetime * of boot. It is expected only an optimal set of OPPs are * added to the library by the SoC framework. - * RCU usage: opp list is traversed with RCU locks. node + * RCU usage: opp table is traversed with RCU locks. node * modification is possible realtime, hence the modifications - * are protected by the dev_opp_list_lock for integrity. + * are protected by the opp_table_lock for integrity. * IMPORTANT: the opp nodes should be maintained in increasing * order. * @available: true/false - marks if this OPP as available or not @@ -65,7 +65,7 @@ extern struct mutex dev_opp_list_lock; * @u_amp: Maximum current drawn by the device in microamperes * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's * frequency from any other OPP's frequency. - * @dev_opp: points back to the device_opp struct this opp belongs to + * @opp_table: points back to the opp_table struct this opp belongs to * @rcu_head: RCU callback head used for deferred freeing * @np: OPP's device node. * @dentry: debugfs dentry pointer (per opp) @@ -87,7 +87,7 @@ struct dev_pm_opp { unsigned long u_amp; unsigned long clock_latency_ns; - struct device_opp *dev_opp; + struct opp_table *opp_table; struct rcu_head rcu_head; struct device_node *np; @@ -98,16 +98,16 @@ struct dev_pm_opp { }; /** - * struct device_list_opp - devices managed by 'struct device_opp' + * struct opp_device - devices managed by 'struct opp_table' * @node: list node * @dev: device to which the struct object belongs * @rcu_head: RCU callback head used for deferred freeing * @dentry: debugfs dentry pointer (per device) * - * This is an internal data structure maintaining the list of devices that are - * managed by 'struct device_opp'. + * This is an internal data structure maintaining the devices that are managed + * by 'struct opp_table'. */ -struct device_list_opp { +struct opp_device { struct list_head node; const struct device *dev; struct rcu_head rcu_head; @@ -118,16 +118,16 @@ struct device_list_opp { }; /** - * struct device_opp - Device opp structure - * @node: list node - contains the devices with OPPs that + * struct opp_table - Device opp structure + * @node: table node - contains the devices with OPPs that * have been registered. Nodes once added are not modified in this - * list. - * RCU usage: nodes are not modified in the list of device_opp, - * however addition is possible and is secured by dev_opp_list_lock + * table. + * RCU usage: nodes are not modified in the table of opp_table, + * however addition is possible and is secured by opp_table_lock * @srcu_head: notifier head to notify the OPP availability changes. * @rcu_head: RCU callback head used for deferred freeing * @dev_list: list of devices that share these OPPs - * @opp_list: list of opps + * @opp_list: table of opps * @np: struct device_node pointer for opp's DT node. * @clock_latency_ns_max: Max clock latency in nanoseconds. * @shared_opp: OPP is shared between multiple devices. @@ -150,7 +150,7 @@ struct device_list_opp { * need to wait for the grace period of both of them before freeing any * resources. And so we have used kfree_rcu() from within call_srcu() handlers. */ -struct device_opp { +struct opp_table { struct list_head node; struct srcu_notifier_head srcu_head; @@ -180,30 +180,27 @@ struct device_opp { }; /* Routines internal to opp core */ -struct device_opp *_find_device_opp(struct device *dev); -struct device_list_opp *_add_list_dev(const struct device *dev, - struct device_opp *dev_opp); +struct opp_table *_find_opp_table(struct device *dev); +struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table); struct device_node *_of_get_opp_desc_node(struct device *dev); #ifdef CONFIG_DEBUG_FS void opp_debug_remove_one(struct dev_pm_opp *opp); -int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp); -int opp_debug_register(struct device_list_opp *list_dev, - struct device_opp *dev_opp); -void opp_debug_unregister(struct device_list_opp *list_dev, - struct device_opp *dev_opp); +int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table); +int opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table); +void opp_debug_unregister(struct opp_device *opp_dev, struct opp_table *opp_table); #else static inline void opp_debug_remove_one(struct dev_pm_opp *opp) {} static inline int opp_debug_create_one(struct dev_pm_opp *opp, - struct device_opp *dev_opp) + struct opp_table *opp_table) { return 0; } -static inline int opp_debug_register(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static inline int opp_debug_register(struct opp_device *opp_dev, + struct opp_table *opp_table) { return 0; } -static inline void opp_debug_unregister(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static inline void opp_debug_unregister(struct opp_device *opp_dev, + struct opp_table *opp_table) { } #endif /* DEBUG_FS */ From 5b1a8eb56e49d9bbf9075551804432a7b3b635a7 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 17 Nov 2015 12:06:22 +0000 Subject: [PATCH 089/797] cpufreq-dt: Supply power coefficient when registering cooling devices Support registering cooling devices with dynamic power coefficient where provided by the device tree. This allows OF registered cooling devices driver to be used with the power_allocator thermal governor. Signed-off-by: Punit Agrawal Acked-by: Viresh Kumar Reviewed-by: Javi Merino Signed-off-by: Rafael J. Wysocki (cherry picked from commit f8fa8ae06b8c2c25d81c99766f9226adc5c3e073) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 68232fef54c9..fc61cab00131 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -406,8 +406,13 @@ static void cpufreq_ready(struct cpufreq_policy *policy) * thermal DT code takes care of matching them. */ if (of_find_property(np, "#cooling-cells", NULL)) { - priv->cdev = of_cpufreq_cooling_register(np, - policy->related_cpus); + u32 power_coefficient = 0; + + of_property_read_u32(np, "dynamic-power-coefficient", + &power_coefficient); + + priv->cdev = of_cpufreq_power_cooling_register(np, + policy->related_cpus, power_coefficient, NULL); if (IS_ERR(priv->cdev)) { dev_err(priv->cpu_dev, "running cpufreq without cooling device: %ld\n", From 2c7296a879c7957ef8bfda8b21c3da432eeedf5d Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 30 Dec 2015 12:18:42 +0100 Subject: [PATCH 090/797] cpufreq-dt: fix handling regulator_get_voltage() result The function can return negative values so it should be assigned to signed type. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/unsigned_lesser_than_zero.cocci. Link: http://permalink.gmane.org/gmane.linux.kernel/2038576 Signed-off-by: Andrzej Hajda Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki (cherry picked from commit 929ca89c305a6ed7a4149115be99af6d73c36918) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index fc61cab00131..0ca74d070058 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -50,7 +50,8 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) struct private_data *priv = policy->driver_data; struct device *cpu_dev = priv->cpu_dev; struct regulator *cpu_reg = priv->cpu_reg; - unsigned long volt = 0, volt_old = 0, tol = 0; + unsigned long volt = 0, tol = 0; + int volt_old = 0; unsigned int old_freq, new_freq; long freq_Hz, freq_exact; int ret; @@ -83,7 +84,7 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) opp_freq / 1000, volt); } - dev_dbg(cpu_dev, "%u MHz, %ld mV --> %u MHz, %ld mV\n", + dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n", old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1, new_freq / 1000, volt ? volt / 1000 : -1); From abd485677e17532c9edfa11ec1f46bf2f2dff037 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:40 +0530 Subject: [PATCH 091/797] cpufreq: dt: Convert few pr_debug/err() calls to dev_dbg/err() We have the device structure available now, lets use it for better print messages. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 896d6a4c0f41a93809b83f9e58aad73874a89d99) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 0ca74d070058..ace0168274d4 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -246,7 +246,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) */ ret = dev_pm_opp_get_opp_count(cpu_dev); if (ret <= 0) { - pr_debug("OPP table is not ready, deferring probe\n"); + dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n"); ret = -EPROBE_DEFER; goto out_free_opp; } @@ -325,7 +325,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { - pr_err("failed to init cpufreq table: %d\n", ret); + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); goto out_free_priv; } From c56192201536f960a81bbf845c4667a9c79a2439 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:41 +0530 Subject: [PATCH 092/797] cpufreq: dt: Rename 'need_update' to 'opp_v1' That's the real purpose of this field, i.e. to take special care of old OPP V1 bindings. Lets name it accordingly, so that it can be used elsewhere. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 457e99e60a8f5a40b7da204c0bfc8a86ad2161b9) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index ace0168274d4..0047d20803db 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -199,7 +199,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) struct dev_pm_opp *suspend_opp; unsigned long min_uV = ~0, max_uV = 0; unsigned int transition_latency; - bool need_update = false; + bool opp_v1 = false; int ret; ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk); @@ -223,7 +223,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) * finding shared-OPPs for backward compatibility. */ if (ret == -ENOENT) - need_update = true; + opp_v1 = true; else goto out_node_put; } @@ -251,7 +251,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) goto out_free_opp; } - if (need_update) { + if (opp_v1) { struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data(); if (!pd || !pd->independent_clocks) From c9be30497a25decc31658f82603c4647c59b5e08 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:42 +0530 Subject: [PATCH 093/797] cpufreq: dt: OPP layers handles clock-latency for V1 bindings as well "clock-latency" is handled by OPP layer for all bindings and so there is no need to make special calls for V1 bindings. Use dev_pm_opp_get_max_clock_latency() for both the cases. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 391d9aef8145204e0a5d67be3bd1fc45c5396dae) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 0047d20803db..4c9f8a828f6f 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -265,10 +265,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) if (ret) dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", __func__, ret); - - of_property_read_u32(np, "clock-latency", &transition_latency); - } else { - transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); } priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -279,6 +275,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); + transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); if (!transition_latency) transition_latency = CPUFREQ_ETERNAL; From 99753a20c8e9cbdfcca7431a0026506c01a0bbe1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:43 +0530 Subject: [PATCH 094/797] cpufreq: dt: Pass regulator name to the OPP core OPP core can handle the regulators by itself, and but it needs to know the name of the regulator to fetch. Add support for that. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 050794aaebbb9f2c2c50b340b6998273e7c64189) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 4c9f8a828f6f..2af75f8088bb 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -34,6 +34,7 @@ struct private_data { struct regulator *cpu_reg; struct thermal_cooling_device *cdev; unsigned int voltage_tolerance; /* in percentage */ + const char *reg_name; }; static struct freq_attr *cpufreq_dt_attr[] = { @@ -119,6 +120,30 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) return ret; } +/* + * An earlier version of opp-v1 bindings used to name the regulator + * "cpu0-supply", we still need to handle that for backwards compatibility. + */ +static const char *find_supply_name(struct device *dev, struct device_node *np) +{ + struct property *pp; + int cpu = dev->id; + + /* Try "cpu0" for older DTs */ + if (!cpu) { + pp = of_find_property(np, "cpu0-supply", NULL); + if (pp) + return "cpu0"; + } + + pp = of_find_property(np, "cpu-supply", NULL); + if (pp) + return "cpu"; + + dev_dbg(dev, "no regulator for cpu%d\n", cpu); + return NULL; +} + static int allocate_resources(int cpu, struct device **cdev, struct regulator **creg, struct clk **cclk) { @@ -200,6 +225,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) unsigned long min_uV = ~0, max_uV = 0; unsigned int transition_latency; bool opp_v1 = false; + const char *name; int ret; ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk); @@ -228,6 +254,20 @@ static int cpufreq_init(struct cpufreq_policy *policy) goto out_node_put; } + /* + * OPP layer will be taking care of regulators now, but it needs to know + * the name of the regulator first. + */ + name = find_supply_name(cpu_dev, np); + if (name) { + ret = dev_pm_opp_set_regulator(cpu_dev, name); + if (ret) { + dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", + policy->cpu, ret); + goto out_node_put; + } + } + /* * Initialize OPP tables for all policy->cpus. They will be shared by * all CPUs which have marked their CPUs shared with OPP bindings. @@ -273,6 +313,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) goto out_free_opp; } + priv->reg_name = name; of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); @@ -366,6 +407,8 @@ static int cpufreq_init(struct cpufreq_policy *policy) kfree(priv); out_free_opp: dev_pm_opp_of_cpumask_remove_table(policy->cpus); + if (name) + dev_pm_opp_put_regulator(cpu_dev); out_node_put: of_node_put(np); out_put_reg_clk: @@ -383,6 +426,9 @@ static int cpufreq_exit(struct cpufreq_policy *policy) cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); + if (priv->reg_name) + dev_pm_opp_put_regulator(priv->cpu_dev); + clk_put(policy->clk); if (!IS_ERR(priv->cpu_reg)) regulator_put(priv->cpu_reg); From 70969a89d8212f3d77a4c67712fbbd22c465d50e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:44 +0530 Subject: [PATCH 095/797] cpufreq: dt: Unsupported OPPs are already disabled The core already have a valid regulator set for the device opp and the unsupported OPPs are already disabled by the core. There is no need to repeat that in the user drivers, get rid of it. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 6def6ea75e6dea45f01a16ae3cfb5b5ce48dd5e9) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 2af75f8088bb..c3fe89461ff4 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -349,8 +349,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) min_uV = opp_uV; if (opp_uV > max_uV) max_uV = opp_uV; - } else { - dev_pm_opp_disable(cpu_dev, opp_freq); } opp_freq++; From ba7b484ca022afe4fc4b64b3f4ed359a57a93831 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:45 +0530 Subject: [PATCH 096/797] cpufreq: dt: Reuse dev_pm_opp_get_max_transition_latency() OPP layer has all the information now to calculate transition latency (clock_latency + voltage_latency). Lets reuse the OPP layer helper dev_pm_opp_get_max_transition_latency() instead of open coding the same in cpufreq-dt driver. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 755b888ff098c9f762717a9fbda7e05b16619069) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 48 +++--------------------------------- 1 file changed, 4 insertions(+), 44 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index c3fe89461ff4..6f80ce56b4ec 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -222,7 +222,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) struct regulator *cpu_reg; struct clk *cpu_clk; struct dev_pm_opp *suspend_opp; - unsigned long min_uV = ~0, max_uV = 0; unsigned int transition_latency; bool opp_v1 = false; const char *name; @@ -316,49 +315,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) priv->reg_name = name; of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); - transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); - if (!transition_latency) - transition_latency = CPUFREQ_ETERNAL; - - if (!IS_ERR(cpu_reg)) { - unsigned long opp_freq = 0; - - /* - * Disable any OPPs where the connected regulator isn't able to - * provide the specified voltage and record minimum and maximum - * voltage levels. - */ - while (1) { - struct dev_pm_opp *opp; - unsigned long opp_uV, tol_uV; - - rcu_read_lock(); - opp = dev_pm_opp_find_freq_ceil(cpu_dev, &opp_freq); - if (IS_ERR(opp)) { - rcu_read_unlock(); - break; - } - opp_uV = dev_pm_opp_get_voltage(opp); - rcu_read_unlock(); - - tol_uV = opp_uV * priv->voltage_tolerance / 100; - if (regulator_is_supported_voltage(cpu_reg, - opp_uV - tol_uV, - opp_uV + tol_uV)) { - if (opp_uV < min_uV) - min_uV = opp_uV; - if (opp_uV > max_uV) - max_uV = opp_uV; - } - - opp_freq++; - } - - ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV); - if (ret > 0) - transition_latency += ret * 1000; - } - ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); @@ -393,6 +349,10 @@ static int cpufreq_init(struct cpufreq_policy *policy) cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; } + transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); + if (!transition_latency) + transition_latency = CPUFREQ_ETERNAL; + policy->cpuinfo.transition_latency = transition_latency; of_node_put(np); From 5da10c86b41732e6bc4444cf037494d3968baa1a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:46 +0530 Subject: [PATCH 097/797] cpufreq: dt: Use dev_pm_opp_set_rate() to switch frequency OPP core supports frequency/voltage changes based on the target frequency now, use that instead of open coding the same in cpufreq-dt driver. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit 78c3ba5df96c875b1668e1cd3ee0a69e62454f32) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 73 +----------------------------------- 1 file changed, 2 insertions(+), 71 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 6f80ce56b4ec..150a172c7d0a 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -45,79 +45,10 @@ static struct freq_attr *cpufreq_dt_attr[] = { static int set_target(struct cpufreq_policy *policy, unsigned int index) { - struct dev_pm_opp *opp; - struct cpufreq_frequency_table *freq_table = policy->freq_table; - struct clk *cpu_clk = policy->clk; struct private_data *priv = policy->driver_data; - struct device *cpu_dev = priv->cpu_dev; - struct regulator *cpu_reg = priv->cpu_reg; - unsigned long volt = 0, tol = 0; - int volt_old = 0; - unsigned int old_freq, new_freq; - long freq_Hz, freq_exact; - int ret; - freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000); - if (freq_Hz <= 0) - freq_Hz = freq_table[index].frequency * 1000; - - freq_exact = freq_Hz; - new_freq = freq_Hz / 1000; - old_freq = clk_get_rate(cpu_clk) / 1000; - - if (!IS_ERR(cpu_reg)) { - unsigned long opp_freq; - - rcu_read_lock(); - opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz); - if (IS_ERR(opp)) { - rcu_read_unlock(); - dev_err(cpu_dev, "failed to find OPP for %ld\n", - freq_Hz); - return PTR_ERR(opp); - } - volt = dev_pm_opp_get_voltage(opp); - opp_freq = dev_pm_opp_get_freq(opp); - rcu_read_unlock(); - tol = volt * priv->voltage_tolerance / 100; - volt_old = regulator_get_voltage(cpu_reg); - dev_dbg(cpu_dev, "Found OPP: %ld kHz, %ld uV\n", - opp_freq / 1000, volt); - } - - dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n", - old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1, - new_freq / 1000, volt ? volt / 1000 : -1); - - /* scaling up? scale voltage before frequency */ - if (!IS_ERR(cpu_reg) && new_freq > old_freq) { - ret = regulator_set_voltage_tol(cpu_reg, volt, tol); - if (ret) { - dev_err(cpu_dev, "failed to scale voltage up: %d\n", - ret); - return ret; - } - } - - ret = clk_set_rate(cpu_clk, freq_exact); - if (ret) { - dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); - if (!IS_ERR(cpu_reg) && volt_old > 0) - regulator_set_voltage_tol(cpu_reg, volt_old, tol); - return ret; - } - - /* scaling down? scale voltage after frequency */ - if (!IS_ERR(cpu_reg) && new_freq < old_freq) { - ret = regulator_set_voltage_tol(cpu_reg, volt, tol); - if (ret) { - dev_err(cpu_dev, "failed to scale voltage down: %d\n", - ret); - clk_set_rate(cpu_clk, old_freq * 1000); - } - } - - return ret; + return dev_pm_opp_set_rate(priv->cpu_dev, + policy->freq_table[index].frequency * 1000); } /* From 4b2c9f2ab35e19c8de1a4e50d798a49d98e5c39d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:47 +0530 Subject: [PATCH 098/797] cpufreq: dt: No need to fetch voltage-tolerance Its already done by core and we don't need to get it anymore. And so, we don't need to get of node in cpufreq_init() anymore, move that to find_supply_name() instead. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit df2c8ec28e73d47392b8cb24828c15c54819da41) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 46 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 150a172c7d0a..bbafd7b63d1a 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -33,7 +33,6 @@ struct private_data { struct device *cpu_dev; struct regulator *cpu_reg; struct thermal_cooling_device *cdev; - unsigned int voltage_tolerance; /* in percentage */ const char *reg_name; }; @@ -55,24 +54,38 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) * An earlier version of opp-v1 bindings used to name the regulator * "cpu0-supply", we still need to handle that for backwards compatibility. */ -static const char *find_supply_name(struct device *dev, struct device_node *np) +static const char *find_supply_name(struct device *dev) { + struct device_node *np; struct property *pp; int cpu = dev->id; + const char *name = NULL; + + np = of_node_get(dev->of_node); + + /* This must be valid for sure */ + if (WARN_ON(!np)) + return NULL; /* Try "cpu0" for older DTs */ if (!cpu) { pp = of_find_property(np, "cpu0-supply", NULL); - if (pp) - return "cpu0"; + if (pp) { + name = "cpu0"; + goto node_put; + } } pp = of_find_property(np, "cpu-supply", NULL); - if (pp) - return "cpu"; + if (pp) { + name = "cpu"; + goto node_put; + } dev_dbg(dev, "no regulator for cpu%d\n", cpu); - return NULL; +node_put: + of_node_put(np); + return name; } static int allocate_resources(int cpu, struct device **cdev, @@ -147,7 +160,6 @@ static int allocate_resources(int cpu, struct device **cdev, static int cpufreq_init(struct cpufreq_policy *policy) { struct cpufreq_frequency_table *freq_table; - struct device_node *np; struct private_data *priv; struct device *cpu_dev; struct regulator *cpu_reg; @@ -164,13 +176,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) return ret; } - np = of_node_get(cpu_dev->of_node); - if (!np) { - dev_err(cpu_dev, "failed to find cpu%d node\n", policy->cpu); - ret = -ENOENT; - goto out_put_reg_clk; - } - /* Get OPP-sharing information from "operating-points-v2" bindings */ ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus); if (ret) { @@ -181,20 +186,20 @@ static int cpufreq_init(struct cpufreq_policy *policy) if (ret == -ENOENT) opp_v1 = true; else - goto out_node_put; + goto out_put_reg_clk; } /* * OPP layer will be taking care of regulators now, but it needs to know * the name of the regulator first. */ - name = find_supply_name(cpu_dev, np); + name = find_supply_name(cpu_dev); if (name) { ret = dev_pm_opp_set_regulator(cpu_dev, name); if (ret) { dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", policy->cpu, ret); - goto out_node_put; + goto out_put_reg_clk; } } @@ -244,7 +249,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) } priv->reg_name = name; - of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { @@ -286,8 +290,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = transition_latency; - of_node_put(np); - return 0; out_free_cpufreq_table: @@ -298,8 +300,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) dev_pm_opp_of_cpumask_remove_table(policy->cpus); if (name) dev_pm_opp_put_regulator(cpu_dev); -out_node_put: - of_node_put(np); out_put_reg_clk: clk_put(cpu_clk); if (!IS_ERR(cpu_reg)) From 454f12fd0b3fae75d101765614e749e3f70a5078 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 10:30:48 +0530 Subject: [PATCH 099/797] cpufreq: dt: No need to allocate resources anymore OPP layer manages it now and cpufreq-dt driver doesn't need it. But, we still need to check for availability of resources for deferred probing. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki (cherry picked from commit dd02a3d920083b6cb0ee4f0eaf2c599b740bf5fe) Signed-off-by: Alex Shi --- drivers/cpufreq/cpufreq-dt.c | 116 ++++++++++++++--------------------- 1 file changed, 47 insertions(+), 69 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index bbafd7b63d1a..f951f911786e 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -31,7 +31,6 @@ struct private_data { struct device *cpu_dev; - struct regulator *cpu_reg; struct thermal_cooling_device *cdev; const char *reg_name; }; @@ -88,73 +87,59 @@ static const char *find_supply_name(struct device *dev) return name; } -static int allocate_resources(int cpu, struct device **cdev, - struct regulator **creg, struct clk **cclk) +static int resources_available(void) { struct device *cpu_dev; struct regulator *cpu_reg; struct clk *cpu_clk; int ret = 0; - char *reg_cpu0 = "cpu0", *reg_cpu = "cpu", *reg; + const char *name; - cpu_dev = get_cpu_device(cpu); + cpu_dev = get_cpu_device(0); if (!cpu_dev) { - pr_err("failed to get cpu%d device\n", cpu); + pr_err("failed to get cpu0 device\n"); return -ENODEV; } - /* Try "cpu0" for older DTs */ - if (!cpu) - reg = reg_cpu0; - else - reg = reg_cpu; + cpu_clk = clk_get(cpu_dev, NULL); + ret = PTR_ERR_OR_ZERO(cpu_clk); + if (ret) { + /* + * If cpu's clk node is present, but clock is not yet + * registered, we should try defering probe. + */ + if (ret == -EPROBE_DEFER) + dev_dbg(cpu_dev, "clock not ready, retry\n"); + else + dev_err(cpu_dev, "failed to get clock: %d\n", ret); -try_again: - cpu_reg = regulator_get_optional(cpu_dev, reg); + return ret; + } + + clk_put(cpu_clk); + + name = find_supply_name(cpu_dev); + /* Platform doesn't require regulator */ + if (!name) + return 0; + + cpu_reg = regulator_get_optional(cpu_dev, name); ret = PTR_ERR_OR_ZERO(cpu_reg); if (ret) { /* * If cpu's regulator supply node is present, but regulator is * not yet registered, we should try defering probe. */ - if (ret == -EPROBE_DEFER) { - dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n", - cpu); - return ret; - } - - /* Try with "cpu-supply" */ - if (reg == reg_cpu0) { - reg = reg_cpu; - goto try_again; - } - - dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret); - } - - cpu_clk = clk_get(cpu_dev, NULL); - ret = PTR_ERR_OR_ZERO(cpu_clk); - if (ret) { - /* put regulator */ - if (!IS_ERR(cpu_reg)) - regulator_put(cpu_reg); - - /* - * If cpu's clk node is present, but clock is not yet - * registered, we should try defering probe. - */ if (ret == -EPROBE_DEFER) - dev_dbg(cpu_dev, "cpu%d clock not ready, retry\n", cpu); + dev_dbg(cpu_dev, "cpu0 regulator not ready, retry\n"); else - dev_err(cpu_dev, "failed to get cpu%d clock: %d\n", cpu, - ret); - } else { - *cdev = cpu_dev; - *creg = cpu_reg; - *cclk = cpu_clk; + dev_dbg(cpu_dev, "no regulator for cpu0: %d\n", ret); + + return ret; } - return ret; + regulator_put(cpu_reg); + return 0; } static int cpufreq_init(struct cpufreq_policy *policy) @@ -162,7 +147,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) struct cpufreq_frequency_table *freq_table; struct private_data *priv; struct device *cpu_dev; - struct regulator *cpu_reg; struct clk *cpu_clk; struct dev_pm_opp *suspend_opp; unsigned int transition_latency; @@ -170,9 +154,16 @@ static int cpufreq_init(struct cpufreq_policy *policy) const char *name; int ret; - ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk); - if (ret) { - pr_err("%s: Failed to allocate resources: %d\n", __func__, ret); + cpu_dev = get_cpu_device(policy->cpu); + if (!cpu_dev) { + pr_err("failed to get cpu%d device\n", policy->cpu); + return -ENODEV; + } + + cpu_clk = clk_get(cpu_dev, NULL); + if (IS_ERR(cpu_clk)) { + ret = PTR_ERR(cpu_clk); + dev_err(cpu_dev, "%s: failed to get clk: %d\n", __func__, ret); return ret; } @@ -186,7 +177,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) if (ret == -ENOENT) opp_v1 = true; else - goto out_put_reg_clk; + goto out_put_clk; } /* @@ -199,7 +190,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) if (ret) { dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", policy->cpu, ret); - goto out_put_reg_clk; + goto out_put_clk; } } @@ -257,9 +248,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) } priv->cpu_dev = cpu_dev; - priv->cpu_reg = cpu_reg; policy->driver_data = priv; - policy->clk = cpu_clk; rcu_read_lock(); @@ -300,10 +289,8 @@ static int cpufreq_init(struct cpufreq_policy *policy) dev_pm_opp_of_cpumask_remove_table(policy->cpus); if (name) dev_pm_opp_put_regulator(cpu_dev); -out_put_reg_clk: +out_put_clk: clk_put(cpu_clk); - if (!IS_ERR(cpu_reg)) - regulator_put(cpu_reg); return ret; } @@ -319,8 +306,6 @@ static int cpufreq_exit(struct cpufreq_policy *policy) dev_pm_opp_put_regulator(priv->cpu_dev); clk_put(policy->clk); - if (!IS_ERR(priv->cpu_reg)) - regulator_put(priv->cpu_reg); kfree(priv); return 0; @@ -373,9 +358,6 @@ static struct cpufreq_driver dt_cpufreq_driver = { static int dt_cpufreq_probe(struct platform_device *pdev) { - struct device *cpu_dev; - struct regulator *cpu_reg; - struct clk *cpu_clk; int ret; /* @@ -385,19 +367,15 @@ static int dt_cpufreq_probe(struct platform_device *pdev) * * FIXME: Is checking this only for CPU0 sufficient ? */ - ret = allocate_resources(0, &cpu_dev, &cpu_reg, &cpu_clk); + ret = resources_available(); if (ret) return ret; - clk_put(cpu_clk); - if (!IS_ERR(cpu_reg)) - regulator_put(cpu_reg); - dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); ret = cpufreq_register_driver(&dt_cpufreq_driver); if (ret) - dev_err(cpu_dev, "failed register driver: %d\n", ret); + dev_err(&pdev->dev, "failed register driver: %d\n", ret); return ret; } From 84cf91ad330f5c9ba51f761a37e58b9aa99bf470 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 29 Feb 2016 15:59:18 +0100 Subject: [PATCH 100/797] s390/cpumf: Fix lpp detection commit 7a76aa95f6f6682db5629449d763251d1c9f8c4e upstream. we have to check bit 40 of the facility list before issuing LPP and not bit 48. Otherwise a guest running on a system with "The decimal-floating-point zoned-conversion facility" and without the "The set-program-parameters facility" might crash on an lpp instruction. Signed-off-by: Christian Borntraeger Fixes: e22cf8ca6f75 ("s390/cpumf: rework program parameter setting to detect guest samples") Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/head64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 58b719fa8067..1ad2407c7f75 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -16,7 +16,7 @@ __HEAD ENTRY(startup_continue) - tm __LC_STFL_FAC_LIST+6,0x80 # LPP available ? + tm __LC_STFL_FAC_LIST+5,0x80 # LPP available ? jz 0f xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid mvi __LC_LPP,0x80 # and set LPP_MAGIC From 4c8fe4f52755d4690a745f4e56b543c51add86fe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Nov 2015 15:24:39 +0100 Subject: [PATCH 101/797] regulator: core: avoid unused variable warning commit fa731ac7ea04a7d3a5c6d2f568132478c02a83b3 upstream. The second argument of the mutex_lock_nested() helper is only evaluated if CONFIG_DEBUG_LOCK_ALLOC is set. Otherwise we get this build warning for the new regulator_lock_supply function: drivers/regulator/core.c: In function 'regulator_lock_supply': drivers/regulator/core.c:142:6: warning: unused variable 'i' [-Wunused-variable] To avoid the warning, this restructures the code to make it both simpler and to move the 'i++' outside of the mutex_lock_nested call, where it is now always used and the variable is not flagged as unused. We had some discussion about changing mutex_lock_nested to an inline function, which would make the code do the right thing here, but in the end decided against it, in order to guarantee that mutex_lock_nested() does not introduced overhead without CONFIG_DEBUG_LOCK_ALLOC. Signed-off-by: Arnd Bergmann Fixes: 9f01cd4a915 ("regulator: core: introduce function to lock regulators and its supplies") Link: http://permalink.gmane.org/gmane.linux.kernel/2068900 Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 73b7683355cd..c70017d5f74b 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -138,18 +138,10 @@ static bool have_full_constraints(void) */ static void regulator_lock_supply(struct regulator_dev *rdev) { - struct regulator *supply; - int i = 0; + int i; - while (1) { - mutex_lock_nested(&rdev->mutex, i++); - supply = rdev->supply; - - if (!rdev->supply) - return; - - rdev = supply->rdev; - } + for (i = 0; rdev->supply; rdev = rdev->supply->rdev, i++) + mutex_lock_nested(&rdev->mutex, i); } /** From b1999fa6e8145305a6c8bda30ea20783717708e6 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 2 Dec 2015 16:54:50 +0100 Subject: [PATCH 102/797] regulator: core: Fix nested locking of supplies commit 70a7fb80e85ae7f78f8e90cec3fbd862ea6a4d4b upstream. Commit fa731ac7ea04 ("regulator: core: avoid unused variable warning") introduced a subtle change in how supplies are locked. Where previously code was always locking the regulator of the current iteration, the new implementation only locks the regulator if it has a supply. For any given power tree that means that the root will never get locked. On the other hand the regulator_unlock_supply() will still release all the locks, which in turn causes the lock debugging code to warn about a mutex being unlocked which wasn't locked. Cc: Mark Brown Cc: Arnd Bergmann Fixes: Fixes: fa731ac7ea04 ("regulator: core: avoid unused variable warning") Signed-off-by: Thierry Reding Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index c70017d5f74b..7b94b8ee087c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -132,6 +132,14 @@ static bool have_full_constraints(void) return has_full_constraints || of_have_populated_dt(); } +static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev) +{ + if (rdev && rdev->supply) + return rdev->supply->rdev; + + return NULL; +} + /** * regulator_lock_supply - lock a regulator and its supplies * @rdev: regulator source @@ -140,7 +148,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev) { int i; - for (i = 0; rdev->supply; rdev = rdev->supply->rdev, i++) + for (i = 0; rdev->supply; rdev = rdev_get_supply(rdev), i++) mutex_lock_nested(&rdev->mutex, i); } From e08f9a7c0e0242c2fae664d252e25c9bf93db522 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Nov 2015 15:25:23 +0100 Subject: [PATCH 103/797] ASoC: samsung: pass DMA channels as pointers commit b9a1a743818ea3265abf98f9431623afa8c50c86 upstream. ARM64 allmodconfig produces a bunch of warnings when building the samsung ASoC code: sound/soc/samsung/dmaengine.c: In function 'samsung_asoc_init_dma_data': sound/soc/samsung/dmaengine.c:53:32: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] playback_data->filter_data = (void *)playback->channel; sound/soc/samsung/dmaengine.c:60:31: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] capture_data->filter_data = (void *)capture->channel; We could easily shut up the warning by adding an intermediate cast, but there is a bigger underlying problem: The use of IORESOURCE_DMA to pass data from platform code to device drivers is dubious to start with, as what we really want is a pointer that can be passed into a filter function. Note that on s3c64xx, the pl08x DMA data is already a pointer, but gets cast to resource_size_t so we can pass it as a resource, and it then gets converted back to a pointer. In contrast, the data we pass for s3c24xx is an index into a device specific table, and we artificially convert that into a pointer for the filter function. Signed-off-by: Arnd Bergmann Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-s3c64xx/dev-audio.c | 41 +++++++++++-------- arch/arm/mach-s3c64xx/include/mach/dma.h | 52 ++++++++++++------------ arch/arm/plat-samsung/devs.c | 11 +++-- include/linux/platform_data/asoc-s3c.h | 4 ++ sound/soc/samsung/ac97.c | 26 ++---------- sound/soc/samsung/dma.h | 2 +- sound/soc/samsung/dmaengine.c | 4 +- sound/soc/samsung/i2s.c | 26 +++--------- sound/soc/samsung/pcm.c | 20 +++------ sound/soc/samsung/s3c2412-i2s.c | 4 +- sound/soc/samsung/s3c24xx-i2s.c | 4 +- sound/soc/samsung/spdif.c | 10 +---- 12 files changed, 84 insertions(+), 120 deletions(-) diff --git a/arch/arm/mach-s3c64xx/dev-audio.c b/arch/arm/mach-s3c64xx/dev-audio.c index ff780a8d8366..9a42736ef4ac 100644 --- a/arch/arm/mach-s3c64xx/dev-audio.c +++ b/arch/arm/mach-s3c64xx/dev-audio.c @@ -54,12 +54,12 @@ static int s3c64xx_i2s_cfg_gpio(struct platform_device *pdev) static struct resource s3c64xx_iis0_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_IIS0, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_I2S0_OUT), - [2] = DEFINE_RES_DMA(DMACH_I2S0_IN), }; -static struct s3c_audio_pdata i2sv3_pdata = { +static struct s3c_audio_pdata i2s0_pdata = { .cfg_gpio = s3c64xx_i2s_cfg_gpio, + .dma_playback = DMACH_I2S0_OUT, + .dma_capture = DMACH_I2S0_IN, }; struct platform_device s3c64xx_device_iis0 = { @@ -68,15 +68,19 @@ struct platform_device s3c64xx_device_iis0 = { .num_resources = ARRAY_SIZE(s3c64xx_iis0_resource), .resource = s3c64xx_iis0_resource, .dev = { - .platform_data = &i2sv3_pdata, + .platform_data = &i2s0_pdata, }, }; EXPORT_SYMBOL(s3c64xx_device_iis0); static struct resource s3c64xx_iis1_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_IIS1, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_I2S1_OUT), - [2] = DEFINE_RES_DMA(DMACH_I2S1_IN), +}; + +static struct s3c_audio_pdata i2s1_pdata = { + .cfg_gpio = s3c64xx_i2s_cfg_gpio, + .dma_playback = DMACH_I2S1_OUT, + .dma_capture = DMACH_I2S1_IN, }; struct platform_device s3c64xx_device_iis1 = { @@ -85,19 +89,19 @@ struct platform_device s3c64xx_device_iis1 = { .num_resources = ARRAY_SIZE(s3c64xx_iis1_resource), .resource = s3c64xx_iis1_resource, .dev = { - .platform_data = &i2sv3_pdata, + .platform_data = &i2s1_pdata, }, }; EXPORT_SYMBOL(s3c64xx_device_iis1); static struct resource s3c64xx_iisv4_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_IISV4, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_HSI_I2SV40_TX), - [2] = DEFINE_RES_DMA(DMACH_HSI_I2SV40_RX), }; static struct s3c_audio_pdata i2sv4_pdata = { .cfg_gpio = s3c64xx_i2s_cfg_gpio, + .dma_playback = DMACH_HSI_I2SV40_TX, + .dma_capture = DMACH_HSI_I2SV40_RX, .type = { .i2s = { .quirks = QUIRK_PRI_6CHAN, @@ -142,12 +146,12 @@ static int s3c64xx_pcm_cfg_gpio(struct platform_device *pdev) static struct resource s3c64xx_pcm0_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_PCM0, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_PCM0_TX), - [2] = DEFINE_RES_DMA(DMACH_PCM0_RX), }; static struct s3c_audio_pdata s3c_pcm0_pdata = { .cfg_gpio = s3c64xx_pcm_cfg_gpio, + .dma_capture = DMACH_PCM0_RX, + .dma_playback = DMACH_PCM0_TX, }; struct platform_device s3c64xx_device_pcm0 = { @@ -163,12 +167,12 @@ EXPORT_SYMBOL(s3c64xx_device_pcm0); static struct resource s3c64xx_pcm1_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_PCM1, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_PCM1_TX), - [2] = DEFINE_RES_DMA(DMACH_PCM1_RX), }; static struct s3c_audio_pdata s3c_pcm1_pdata = { .cfg_gpio = s3c64xx_pcm_cfg_gpio, + .dma_playback = DMACH_PCM1_TX, + .dma_capture = DMACH_PCM1_RX, }; struct platform_device s3c64xx_device_pcm1 = { @@ -196,13 +200,14 @@ static int s3c64xx_ac97_cfg_gpe(struct platform_device *pdev) static struct resource s3c64xx_ac97_resource[] = { [0] = DEFINE_RES_MEM(S3C64XX_PA_AC97, SZ_256), - [1] = DEFINE_RES_DMA(DMACH_AC97_PCMOUT), - [2] = DEFINE_RES_DMA(DMACH_AC97_PCMIN), - [3] = DEFINE_RES_DMA(DMACH_AC97_MICIN), - [4] = DEFINE_RES_IRQ(IRQ_AC97), + [1] = DEFINE_RES_IRQ(IRQ_AC97), }; -static struct s3c_audio_pdata s3c_ac97_pdata; +static struct s3c_audio_pdata s3c_ac97_pdata = { + .dma_playback = DMACH_AC97_PCMOUT, + .dma_capture = DMACH_AC97_PCMIN, + .dma_capture_mic = DMACH_AC97_MICIN, +}; static u64 s3c64xx_ac97_dmamask = DMA_BIT_MASK(32); diff --git a/arch/arm/mach-s3c64xx/include/mach/dma.h b/arch/arm/mach-s3c64xx/include/mach/dma.h index 096e14073bd9..9c739eafe95c 100644 --- a/arch/arm/mach-s3c64xx/include/mach/dma.h +++ b/arch/arm/mach-s3c64xx/include/mach/dma.h @@ -14,38 +14,38 @@ #define S3C64XX_DMA_CHAN(name) ((unsigned long)(name)) /* DMA0/SDMA0 */ -#define DMACH_UART0 S3C64XX_DMA_CHAN("uart0_tx") -#define DMACH_UART0_SRC2 S3C64XX_DMA_CHAN("uart0_rx") -#define DMACH_UART1 S3C64XX_DMA_CHAN("uart1_tx") -#define DMACH_UART1_SRC2 S3C64XX_DMA_CHAN("uart1_rx") -#define DMACH_UART2 S3C64XX_DMA_CHAN("uart2_tx") -#define DMACH_UART2_SRC2 S3C64XX_DMA_CHAN("uart2_rx") -#define DMACH_UART3 S3C64XX_DMA_CHAN("uart3_tx") -#define DMACH_UART3_SRC2 S3C64XX_DMA_CHAN("uart3_rx") -#define DMACH_PCM0_TX S3C64XX_DMA_CHAN("pcm0_tx") -#define DMACH_PCM0_RX S3C64XX_DMA_CHAN("pcm0_rx") -#define DMACH_I2S0_OUT S3C64XX_DMA_CHAN("i2s0_tx") -#define DMACH_I2S0_IN S3C64XX_DMA_CHAN("i2s0_rx") +#define DMACH_UART0 "uart0_tx" +#define DMACH_UART0_SRC2 "uart0_rx" +#define DMACH_UART1 "uart1_tx" +#define DMACH_UART1_SRC2 "uart1_rx" +#define DMACH_UART2 "uart2_tx" +#define DMACH_UART2_SRC2 "uart2_rx" +#define DMACH_UART3 "uart3_tx" +#define DMACH_UART3_SRC2 "uart3_rx" +#define DMACH_PCM0_TX "pcm0_tx" +#define DMACH_PCM0_RX "pcm0_rx" +#define DMACH_I2S0_OUT "i2s0_tx" +#define DMACH_I2S0_IN "i2s0_rx" #define DMACH_SPI0_TX S3C64XX_DMA_CHAN("spi0_tx") #define DMACH_SPI0_RX S3C64XX_DMA_CHAN("spi0_rx") -#define DMACH_HSI_I2SV40_TX S3C64XX_DMA_CHAN("i2s2_tx") -#define DMACH_HSI_I2SV40_RX S3C64XX_DMA_CHAN("i2s2_rx") +#define DMACH_HSI_I2SV40_TX "i2s2_tx" +#define DMACH_HSI_I2SV40_RX "i2s2_rx" /* DMA1/SDMA1 */ -#define DMACH_PCM1_TX S3C64XX_DMA_CHAN("pcm1_tx") -#define DMACH_PCM1_RX S3C64XX_DMA_CHAN("pcm1_rx") -#define DMACH_I2S1_OUT S3C64XX_DMA_CHAN("i2s1_tx") -#define DMACH_I2S1_IN S3C64XX_DMA_CHAN("i2s1_rx") +#define DMACH_PCM1_TX "pcm1_tx" +#define DMACH_PCM1_RX "pcm1_rx" +#define DMACH_I2S1_OUT "i2s1_tx" +#define DMACH_I2S1_IN "i2s1_rx" #define DMACH_SPI1_TX S3C64XX_DMA_CHAN("spi1_tx") #define DMACH_SPI1_RX S3C64XX_DMA_CHAN("spi1_rx") -#define DMACH_AC97_PCMOUT S3C64XX_DMA_CHAN("ac97_out") -#define DMACH_AC97_PCMIN S3C64XX_DMA_CHAN("ac97_in") -#define DMACH_AC97_MICIN S3C64XX_DMA_CHAN("ac97_mic") -#define DMACH_PWM S3C64XX_DMA_CHAN("pwm") -#define DMACH_IRDA S3C64XX_DMA_CHAN("irda") -#define DMACH_EXTERNAL S3C64XX_DMA_CHAN("external") -#define DMACH_SECURITY_RX S3C64XX_DMA_CHAN("sec_rx") -#define DMACH_SECURITY_TX S3C64XX_DMA_CHAN("sec_tx") +#define DMACH_AC97_PCMOUT "ac97_out" +#define DMACH_AC97_PCMIN "ac97_in" +#define DMACH_AC97_MICIN "ac97_mic" +#define DMACH_PWM "pwm" +#define DMACH_IRDA "irda" +#define DMACH_EXTERNAL "external" +#define DMACH_SECURITY_RX "sec_rx" +#define DMACH_SECURITY_TX "sec_tx" enum dma_ch { DMACH_MAX = 32 diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c index 82074625de5c..e212f9d804bd 100644 --- a/arch/arm/plat-samsung/devs.c +++ b/arch/arm/plat-samsung/devs.c @@ -65,6 +65,7 @@ #include #include #include +#include #include static u64 samsung_device_dma_mask = DMA_BIT_MASK(32); @@ -74,9 +75,12 @@ static u64 samsung_device_dma_mask = DMA_BIT_MASK(32); static struct resource s3c_ac97_resource[] = { [0] = DEFINE_RES_MEM(S3C2440_PA_AC97, S3C2440_SZ_AC97), [1] = DEFINE_RES_IRQ(IRQ_S3C244X_AC97), - [2] = DEFINE_RES_DMA_NAMED(DMACH_PCM_OUT, "PCM out"), - [3] = DEFINE_RES_DMA_NAMED(DMACH_PCM_IN, "PCM in"), - [4] = DEFINE_RES_DMA_NAMED(DMACH_MIC_IN, "Mic in"), +}; + +static struct s3c_audio_pdata s3c_ac97_pdata = { + .dma_playback = (void *)DMACH_PCM_OUT, + .dma_capture = (void *)DMACH_PCM_IN, + .dma_capture_mic = (void *)DMACH_MIC_IN, }; struct platform_device s3c_device_ac97 = { @@ -87,6 +91,7 @@ struct platform_device s3c_device_ac97 = { .dev = { .dma_mask = &samsung_device_dma_mask, .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = &s3c_ac97_pdata, } }; #endif /* CONFIG_CPU_S3C2440 */ diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h index 5e0bc779e6c5..33f88b4479e4 100644 --- a/include/linux/platform_data/asoc-s3c.h +++ b/include/linux/platform_data/asoc-s3c.h @@ -39,6 +39,10 @@ struct samsung_i2s { */ struct s3c_audio_pdata { int (*cfg_gpio)(struct platform_device *); + void *dma_playback; + void *dma_capture; + void *dma_play_sec; + void *dma_capture_mic; union { struct samsung_i2s i2s; } type; diff --git a/sound/soc/samsung/ac97.c b/sound/soc/samsung/ac97.c index e4145509d63c..9c5219392460 100644 --- a/sound/soc/samsung/ac97.c +++ b/sound/soc/samsung/ac97.c @@ -324,7 +324,7 @@ static const struct snd_soc_component_driver s3c_ac97_component = { static int s3c_ac97_probe(struct platform_device *pdev) { - struct resource *mem_res, *dmatx_res, *dmarx_res, *dmamic_res, *irq_res; + struct resource *mem_res, *irq_res; struct s3c_audio_pdata *ac97_pdata; int ret; @@ -335,24 +335,6 @@ static int s3c_ac97_probe(struct platform_device *pdev) } /* Check for availability of necessary resource */ - dmatx_res = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!dmatx_res) { - dev_err(&pdev->dev, "Unable to get AC97-TX dma resource\n"); - return -ENXIO; - } - - dmarx_res = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (!dmarx_res) { - dev_err(&pdev->dev, "Unable to get AC97-RX dma resource\n"); - return -ENXIO; - } - - dmamic_res = platform_get_resource(pdev, IORESOURCE_DMA, 2); - if (!dmamic_res) { - dev_err(&pdev->dev, "Unable to get AC97-MIC dma resource\n"); - return -ENXIO; - } - irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!irq_res) { dev_err(&pdev->dev, "AC97 IRQ not provided!\n"); @@ -364,11 +346,11 @@ static int s3c_ac97_probe(struct platform_device *pdev) if (IS_ERR(s3c_ac97.regs)) return PTR_ERR(s3c_ac97.regs); - s3c_ac97_pcm_out.channel = dmatx_res->start; + s3c_ac97_pcm_out.slave = ac97_pdata->dma_playback; s3c_ac97_pcm_out.dma_addr = mem_res->start + S3C_AC97_PCM_DATA; - s3c_ac97_pcm_in.channel = dmarx_res->start; + s3c_ac97_pcm_in.slave = ac97_pdata->dma_capture; s3c_ac97_pcm_in.dma_addr = mem_res->start + S3C_AC97_PCM_DATA; - s3c_ac97_mic_in.channel = dmamic_res->start; + s3c_ac97_mic_in.slave = ac97_pdata->dma_capture_mic; s3c_ac97_mic_in.dma_addr = mem_res->start + S3C_AC97_MIC_DATA; init_completion(&s3c_ac97.done); diff --git a/sound/soc/samsung/dma.h b/sound/soc/samsung/dma.h index 0e85dcfec023..085ef30f5ca2 100644 --- a/sound/soc/samsung/dma.h +++ b/sound/soc/samsung/dma.h @@ -15,7 +15,7 @@ #include struct s3c_dma_params { - int channel; /* Channel ID */ + void *slave; /* Channel ID */ dma_addr_t dma_addr; int dma_size; /* Size of the DMA transfer */ char *ch_name; diff --git a/sound/soc/samsung/dmaengine.c b/sound/soc/samsung/dmaengine.c index 506f5bf6d082..727008d57d14 100644 --- a/sound/soc/samsung/dmaengine.c +++ b/sound/soc/samsung/dmaengine.c @@ -50,14 +50,14 @@ void samsung_asoc_init_dma_data(struct snd_soc_dai *dai, if (playback) { playback_data = &playback->dma_data; - playback_data->filter_data = (void *)playback->channel; + playback_data->filter_data = playback->slave; playback_data->chan_name = playback->ch_name; playback_data->addr = playback->dma_addr; playback_data->addr_width = playback->dma_size; } if (capture) { capture_data = &capture->dma_data; - capture_data->filter_data = (void *)capture->channel; + capture_data->filter_data = capture->slave; capture_data->chan_name = capture->ch_name; capture_data->addr = capture->dma_addr; capture_data->addr_width = capture->dma_size; diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index 7dbf899b2af2..e163b0148c4b 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -1260,27 +1260,14 @@ static int samsung_i2s_probe(struct platform_device *pdev) pri_dai->lock = &pri_dai->spinlock; if (!np) { - res = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!res) { - dev_err(&pdev->dev, - "Unable to get I2S-TX dma resource\n"); - return -ENXIO; - } - pri_dai->dma_playback.channel = res->start; - - res = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (!res) { - dev_err(&pdev->dev, - "Unable to get I2S-RX dma resource\n"); - return -ENXIO; - } - pri_dai->dma_capture.channel = res->start; - if (i2s_pdata == NULL) { dev_err(&pdev->dev, "Can't work without s3c_audio_pdata\n"); return -EINVAL; } + pri_dai->dma_playback.slave = i2s_pdata->dma_playback; + pri_dai->dma_capture.slave = i2s_pdata->dma_capture; + if (&i2s_pdata->type) i2s_cfg = &i2s_pdata->type.i2s; @@ -1341,11 +1328,8 @@ static int samsung_i2s_probe(struct platform_device *pdev) sec_dai->dma_playback.dma_addr = regs_base + I2STXDS; sec_dai->dma_playback.ch_name = "tx-sec"; - if (!np) { - res = platform_get_resource(pdev, IORESOURCE_DMA, 2); - if (res) - sec_dai->dma_playback.channel = res->start; - } + if (!np) + sec_dai->dma_playback.slave = i2s_pdata->dma_play_sec; sec_dai->dma_playback.dma_size = 4; sec_dai->addr = pri_dai->addr; diff --git a/sound/soc/samsung/pcm.c b/sound/soc/samsung/pcm.c index b320a9d3fbf8..c77f324e0bb8 100644 --- a/sound/soc/samsung/pcm.c +++ b/sound/soc/samsung/pcm.c @@ -486,7 +486,7 @@ static const struct snd_soc_component_driver s3c_pcm_component = { static int s3c_pcm_dev_probe(struct platform_device *pdev) { struct s3c_pcm_info *pcm; - struct resource *mem_res, *dmatx_res, *dmarx_res; + struct resource *mem_res; struct s3c_audio_pdata *pcm_pdata; int ret; @@ -499,18 +499,6 @@ static int s3c_pcm_dev_probe(struct platform_device *pdev) pcm_pdata = pdev->dev.platform_data; /* Check for availability of necessary resource */ - dmatx_res = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!dmatx_res) { - dev_err(&pdev->dev, "Unable to get PCM-TX dma resource\n"); - return -ENXIO; - } - - dmarx_res = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (!dmarx_res) { - dev_err(&pdev->dev, "Unable to get PCM-RX dma resource\n"); - return -ENXIO; - } - mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!mem_res) { dev_err(&pdev->dev, "Unable to get register resource\n"); @@ -568,8 +556,10 @@ static int s3c_pcm_dev_probe(struct platform_device *pdev) s3c_pcm_stereo_out[pdev->id].dma_addr = mem_res->start + S3C_PCM_TXFIFO; - s3c_pcm_stereo_in[pdev->id].channel = dmarx_res->start; - s3c_pcm_stereo_out[pdev->id].channel = dmatx_res->start; + if (pcm_pdata) { + s3c_pcm_stereo_in[pdev->id].slave = pcm_pdata->dma_capture; + s3c_pcm_stereo_out[pdev->id].slave = pcm_pdata->dma_playback; + } pcm->dma_capture = &s3c_pcm_stereo_in[pdev->id]; pcm->dma_playback = &s3c_pcm_stereo_out[pdev->id]; diff --git a/sound/soc/samsung/s3c2412-i2s.c b/sound/soc/samsung/s3c2412-i2s.c index 2b766d212ce0..77d27c85a32a 100644 --- a/sound/soc/samsung/s3c2412-i2s.c +++ b/sound/soc/samsung/s3c2412-i2s.c @@ -34,13 +34,13 @@ #include "s3c2412-i2s.h" static struct s3c_dma_params s3c2412_i2s_pcm_stereo_out = { - .channel = DMACH_I2S_OUT, + .slave = (void *)(uintptr_t)DMACH_I2S_OUT, .ch_name = "tx", .dma_size = 4, }; static struct s3c_dma_params s3c2412_i2s_pcm_stereo_in = { - .channel = DMACH_I2S_IN, + .slave = (void *)(uintptr_t)DMACH_I2S_IN, .ch_name = "rx", .dma_size = 4, }; diff --git a/sound/soc/samsung/s3c24xx-i2s.c b/sound/soc/samsung/s3c24xx-i2s.c index 5bf723689692..9da3a77ea2c7 100644 --- a/sound/soc/samsung/s3c24xx-i2s.c +++ b/sound/soc/samsung/s3c24xx-i2s.c @@ -32,13 +32,13 @@ #include "s3c24xx-i2s.h" static struct s3c_dma_params s3c24xx_i2s_pcm_stereo_out = { - .channel = DMACH_I2S_OUT, + .slave = (void *)(uintptr_t)DMACH_I2S_OUT, .ch_name = "tx", .dma_size = 2, }; static struct s3c_dma_params s3c24xx_i2s_pcm_stereo_in = { - .channel = DMACH_I2S_IN, + .slave = (void *)(uintptr_t)DMACH_I2S_IN, .ch_name = "rx", .dma_size = 2, }; diff --git a/sound/soc/samsung/spdif.c b/sound/soc/samsung/spdif.c index 36dbc0e96004..9dd7ee6d03ff 100644 --- a/sound/soc/samsung/spdif.c +++ b/sound/soc/samsung/spdif.c @@ -359,7 +359,7 @@ static const struct snd_soc_component_driver samsung_spdif_component = { static int spdif_probe(struct platform_device *pdev) { struct s3c_audio_pdata *spdif_pdata; - struct resource *mem_res, *dma_res; + struct resource *mem_res; struct samsung_spdif_info *spdif; int ret; @@ -367,12 +367,6 @@ static int spdif_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "Entered %s\n", __func__); - dma_res = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!dma_res) { - dev_err(&pdev->dev, "Unable to get dma resource.\n"); - return -ENXIO; - } - mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!mem_res) { dev_err(&pdev->dev, "Unable to get register resource.\n"); @@ -432,7 +426,7 @@ static int spdif_probe(struct platform_device *pdev) spdif_stereo_out.dma_size = 2; spdif_stereo_out.dma_addr = mem_res->start + DATA_OUTBUF; - spdif_stereo_out.channel = dma_res->start; + spdif_stereo_out.slave = spdif_pdata ? spdif_pdata->dma_playback : NULL; spdif->dma_playback = &spdif_stereo_out; From c045105c641ccbeb6e94e87980cc8db870aa3961 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Nov 2015 17:08:41 +0100 Subject: [PATCH 104/797] mmc: sh_mmcif: rework dma channel handling commit 27cbd7e815a8e223ff7c4fe56daca724101288ac upstream. When compiling the sh_mmcif driver for ARM64, we currently get a harmless build warning: ../drivers/mmc/host/sh_mmcif.c: In function 'sh_mmcif_request_dma_one': ../drivers/mmc/host/sh_mmcif.c:417:4: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (void *)pdata->slave_id_tx : ^ ../drivers/mmc/host/sh_mmcif.c:418:4: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (void *)pdata->slave_id_rx; This could be worked around by adding another cast to uintptr_t, but I decided to simplify the code a little more to avoid that. This splits out the platform data using code into a separate function and builds that only for CONFIG_SUPERH. This part still has a typecast but does not need a second one. The SH platform code could be further modified to pass a pointer directly as we do on other architectures when we have a filter function. The normal case is simplified further and now just calls dma_request_slave_channel() directly without going through the compat handling. Signed-off-by: Arnd Bergmann Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sh_mmcif.c | 88 +++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index ad9ffea7d659..1ca8a1359cbc 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -397,38 +397,26 @@ static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host) } static struct dma_chan * -sh_mmcif_request_dma_one(struct sh_mmcif_host *host, - struct sh_mmcif_plat_data *pdata, - enum dma_transfer_direction direction) +sh_mmcif_request_dma_pdata(struct sh_mmcif_host *host, uintptr_t slave_id) { - struct dma_slave_config cfg = { 0, }; - struct dma_chan *chan; - void *slave_data = NULL; - struct resource *res; - struct device *dev = sh_mmcif_host_to_dev(host); dma_cap_mask_t mask; - int ret; dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); - - if (pdata) - slave_data = direction == DMA_MEM_TO_DEV ? - (void *)pdata->slave_id_tx : - (void *)pdata->slave_id_rx; - - chan = dma_request_slave_channel_compat(mask, shdma_chan_filter, - slave_data, dev, - direction == DMA_MEM_TO_DEV ? "tx" : "rx"); - - dev_dbg(dev, "%s: %s: got channel %p\n", __func__, - direction == DMA_MEM_TO_DEV ? "TX" : "RX", chan); - - if (!chan) + if (slave_id <= 0) return NULL; - res = platform_get_resource(host->pd, IORESOURCE_MEM, 0); + return dma_request_channel(mask, shdma_chan_filter, (void *)slave_id); +} +static int sh_mmcif_dma_slave_config(struct sh_mmcif_host *host, + struct dma_chan *chan, + enum dma_transfer_direction direction) +{ + struct resource *res; + struct dma_slave_config cfg = { 0, }; + + res = platform_get_resource(host->pd, IORESOURCE_MEM, 0); cfg.direction = direction; if (direction == DMA_DEV_TO_MEM) { @@ -439,38 +427,42 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host, cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; } - ret = dmaengine_slave_config(chan, &cfg); - if (ret < 0) { - dma_release_channel(chan); - return NULL; - } - - return chan; + return dmaengine_slave_config(chan, &cfg); } -static void sh_mmcif_request_dma(struct sh_mmcif_host *host, - struct sh_mmcif_plat_data *pdata) +static void sh_mmcif_request_dma(struct sh_mmcif_host *host) { struct device *dev = sh_mmcif_host_to_dev(host); host->dma_active = false; - if (pdata) { - if (pdata->slave_id_tx <= 0 || pdata->slave_id_rx <= 0) - return; - } else if (!dev->of_node) { - return; - } - /* We can only either use DMA for both Tx and Rx or not use it at all */ - host->chan_tx = sh_mmcif_request_dma_one(host, pdata, DMA_MEM_TO_DEV); - if (!host->chan_tx) - return; + if (IS_ENABLED(CONFIG_SUPERH) && dev->platform_data) { + struct sh_mmcif_plat_data *pdata = dev->platform_data; - host->chan_rx = sh_mmcif_request_dma_one(host, pdata, DMA_DEV_TO_MEM); - if (!host->chan_rx) { - dma_release_channel(host->chan_tx); - host->chan_tx = NULL; + host->chan_tx = sh_mmcif_request_dma_pdata(host, + pdata->slave_id_tx); + host->chan_rx = sh_mmcif_request_dma_pdata(host, + pdata->slave_id_rx); + } else { + host->chan_tx = dma_request_slave_channel(dev, "tx"); + host->chan_tx = dma_request_slave_channel(dev, "rx"); } + dev_dbg(dev, "%s: got channel TX %p RX %p\n", __func__, host->chan_tx, + host->chan_rx); + + if (!host->chan_tx || !host->chan_rx || + sh_mmcif_dma_slave_config(host, host->chan_tx, DMA_MEM_TO_DEV) || + sh_mmcif_dma_slave_config(host, host->chan_rx, DMA_DEV_TO_MEM)) + goto error; + + return; + +error: + if (host->chan_tx) + dma_release_channel(host->chan_tx); + if (host->chan_rx) + dma_release_channel(host->chan_rx); + host->chan_tx = host->chan_rx = NULL; } static void sh_mmcif_release_dma(struct sh_mmcif_host *host) @@ -1102,7 +1094,7 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (ios->power_mode == MMC_POWER_UP) { if (!host->card_present) { /* See if we also get DMA */ - sh_mmcif_request_dma(host, dev->platform_data); + sh_mmcif_request_dma(host); host->card_present = true; } sh_mmcif_set_power(host, ios); From 19e0783ae96837e30e94acdb0cc4ae935338a969 Mon Sep 17 00:00:00 2001 From: Chris Paterson Date: Wed, 10 Feb 2016 14:07:01 +0000 Subject: [PATCH 105/797] mmc: sh_mmcif: Correct TX DMA channel allocation commit a32ef81c9889c9554a3c4b465c4ee7b2d26c6b10 upstream. Commit 27cbd7e815a8 ("mmc: sh_mmcif: rework dma channel handling") introduced a typo causing the TX DMA channel allocation to be overwritten by the requested RX DMA channel. Fixes: 27cbd7e815a8 ("mmc: sh_mmcif: rework dma channel handling") Signed-off-by: Chris Paterson Acked-by: Laurent Pinchart Acked-by: Arnd Bergmann Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sh_mmcif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index 1ca8a1359cbc..6234eab38ff3 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -445,7 +445,7 @@ static void sh_mmcif_request_dma(struct sh_mmcif_host *host) pdata->slave_id_rx); } else { host->chan_tx = dma_request_slave_channel(dev, "tx"); - host->chan_tx = dma_request_slave_channel(dev, "rx"); + host->chan_rx = dma_request_slave_channel(dev, "rx"); } dev_dbg(dev, "%s: got channel TX %p RX %p\n", __func__, host->chan_tx, host->chan_rx); From e8c28e096a07b7bdf6fae534c8ff5f372b25ea82 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 3 Feb 2016 12:33:30 +0100 Subject: [PATCH 106/797] x86/microcode/intel: Make early loader look for builtin microcode too commit 264285ac01673e70557c43ecee338ce97c4c0672 upstream. Set the initrd @start depending on the presence of an initrd. Otherwise, builtin microcode loading doesn't work as the start is wrong and we're using it to compute offset to the microcode blobs. Tested-by: Thomas Voegtle Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1454499225-21544-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/intel.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index ce47402eb2f9..5e3a310d54f3 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -555,10 +555,14 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, cd.data = NULL; cd.size = 0; - cd = find_cpio_data(p, (void *)start, size, &offset); - if (!cd.data) { + /* try built-in microcode if no initrd */ + if (!size) { if (!load_builtin_intel_microcode(&cd)) return UCODE_ERROR; + } else { + cd = find_cpio_data(p, (void *)start, size, &offset); + if (!cd.data) + return UCODE_ERROR; } return get_matching_model_microcode(0, start, cd.data, cd.size, @@ -732,16 +736,20 @@ void __init load_ucode_intel_bsp(void) struct boot_params *p; p = (struct boot_params *)__pa_nodebug(&boot_params); - start = p->hdr.ramdisk_image; size = p->hdr.ramdisk_size; - _load_ucode_intel_bsp( - (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), - (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), - start, size); + /* + * Set start only if we have an initrd image. We cannot use initrd_start + * because it is not set that early yet. + */ + start = (size ? p->hdr.ramdisk_image : 0); + + _load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data), + (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), + start, size); #else - start = boot_params.hdr.ramdisk_image + PAGE_OFFSET; size = boot_params.hdr.ramdisk_size; + start = (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0); _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size); #endif From 5aeaf8bd6c6a3f29a1ab55987c10741ef984ea52 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 3 Feb 2016 12:33:29 +0100 Subject: [PATCH 107/797] x86/microcode: Untangle from BLK_DEV_INITRD commit 5f9c01aa7c49a2d74474d6d879a797b8badf29e6 upstream. Thomas Voegtle reported that doing oldconfig with a .config which has CONFIG_MICROCODE enabled but BLK_DEV_INITRD disabled prevents the microcode loading mechanism from being built. So untangle it from the BLK_DEV_INITRD dependency so that oldconfig doesn't turn it off and add an explanatory text to its Kconfig help what the supported methods for supplying microcode are. Reported-by: Thomas Voegtle Tested-by: Thomas Voegtle Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1454499225-21544-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 23 ++++++++++++----------- arch/x86/include/asm/microcode.h | 26 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/microcode/intel.c | 14 ++++---------- 3 files changed, 42 insertions(+), 21 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index db3622f22b61..436639a31624 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1126,22 +1126,23 @@ config MICROCODE bool "CPU microcode loading support" default y depends on CPU_SUP_AMD || CPU_SUP_INTEL - depends on BLK_DEV_INITRD select FW_LOADER ---help--- - If you say Y here, you will be able to update the microcode on - certain Intel and AMD processors. The Intel support is for the - IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, - Xeon etc. The AMD support is for families 0x10 and later. You will - obviously need the actual microcode binary data itself which is not - shipped with the Linux kernel. + Intel and AMD processors. The Intel support is for the IA32 family, + e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The + AMD support is for families 0x10 and later. You will obviously need + the actual microcode binary data itself which is not shipped with + the Linux kernel. - This option selects the general module only, you need to select - at least one vendor specific module as well. + The preferred method to load microcode from a detached initrd is described + in Documentation/x86/early-microcode.txt. For that you need to enable + CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the + initrd for microcode blobs. - To compile this driver as a module, choose M here: the module - will be called microcode. + In addition, you can build-in the microcode into the kernel. For that you + need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode + to the CONFIG_EXTRA_FIRMWARE config option. config MICROCODE_INTEL bool "Intel microcode loading support" diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 34e62b1dcfce..712b24ed3a64 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -2,6 +2,7 @@ #define _ASM_X86_MICROCODE_H #include +#include #define native_rdmsr(msr, val1, val2) \ do { \ @@ -168,4 +169,29 @@ static inline void reload_early_microcode(void) { } static inline bool get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } #endif + +static inline unsigned long get_initrd_start(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + return initrd_start; +#else + return 0; +#endif +} + +static inline unsigned long get_initrd_start_addr(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD +#ifdef CONFIG_X86_32 + unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); + + return (unsigned long)__pa_nodebug(*initrd_start_p); +#else + return get_initrd_start(); +#endif +#else /* CONFIG_BLK_DEV_INITRD */ + return 0; +#endif +} + #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 5e3a310d54f3..ac8975a65280 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -698,7 +698,7 @@ int __init save_microcode_in_initrd_intel(void) if (count == 0) return ret; - copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count); + copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, get_initrd_start(), count); ret = save_microcode(&mc_saved_data, mc_saved, count); if (ret) pr_err("Cannot save microcode patches from initrd.\n"); @@ -760,20 +760,14 @@ void load_ucode_intel_ap(void) struct mc_saved_data *mc_saved_data_p; struct ucode_cpu_info uci; unsigned long *mc_saved_in_initrd_p; - unsigned long initrd_start_addr; enum ucode_state ret; #ifdef CONFIG_X86_32 - unsigned long *initrd_start_p; - mc_saved_in_initrd_p = - (unsigned long *)__pa_nodebug(mc_saved_in_initrd); + mc_saved_in_initrd_p = (unsigned long *)__pa_nodebug(mc_saved_in_initrd); mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); - initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); - initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p); #else - mc_saved_data_p = &mc_saved_data; mc_saved_in_initrd_p = mc_saved_in_initrd; - initrd_start_addr = initrd_start; + mc_saved_data_p = &mc_saved_data; #endif /* @@ -785,7 +779,7 @@ void load_ucode_intel_ap(void) collect_cpu_info_early(&uci); ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, - initrd_start_addr, &uci); + get_initrd_start_addr(), &uci); if (ret != UCODE_OK) return; From 7657a398c105500d1614444a26c1fc9166813fb8 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 10 Feb 2016 14:15:27 -0800 Subject: [PATCH 108/797] x86/entry/compat: Keep TS_COMPAT set during signal delivery commit 4e79e182b419172e35936a47f098509092d69817 upstream. Signal delivery needs to know the sign of an interrupted syscall's return value in order to detect -ERESTART variants. Normally this works independently of bitness because syscalls internally return long. Under ptrace, however, this can break, and syscall_get_error is supposed to sign-extend regs->ax if needed. We were clearing TS_COMPAT too early, though, and this prevented sign extension, which subtly broke syscall restart under ptrace. Reported-by: Robert O'Callahan Signed-off-by: Andy Lutomirski Cc: Al Viro Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shuah Khan Cc: Thomas Gleixner Fixes: c5c46f59e4e7 ("x86/entry: Add new, comprehensible entry and exit handlers written in C") Link: http://lkml.kernel.org/r/cbce3cf545522f64eb37f5478cb59746230db3b5.1455142412.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 03663740c866..1a4477cedc49 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -268,6 +268,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) /* Called with IRQs disabled. */ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) { + struct thread_info *ti = pt_regs_to_thread_info(regs); u32 cached_flags; if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) @@ -275,12 +276,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) lockdep_sys_exit(); - cached_flags = - READ_ONCE(pt_regs_to_thread_info(regs)->flags); + cached_flags = READ_ONCE(ti->flags); if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) exit_to_usermode_loop(regs, cached_flags); +#ifdef CONFIG_COMPAT + /* + * Compat syscalls set TS_COMPAT. Make sure we clear it before + * returning to user mode. We need to clear it *after* signal + * handling, because syscall restart has a fixup for compat + * syscalls. The fixup is exercised by the ptrace_syscall_32 + * selftest. + */ + ti->status &= ~TS_COMPAT; +#endif + user_enter(); } @@ -332,14 +343,6 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS)) syscall_slow_exit_work(regs, cached_flags); -#ifdef CONFIG_COMPAT - /* - * Compat syscalls set TS_COMPAT. Make sure we clear it before - * returning to user mode. - */ - ti->status &= ~TS_COMPAT; -#endif - local_irq_disable(); prepare_exit_to_usermode(regs); } From f6724209df88ec1a760d036b71b8f871b1556785 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 3 Mar 2016 20:50:40 +0100 Subject: [PATCH 109/797] perf/x86/intel: Add definition for PT PMI bit commit 5690ae28e472d25e330ad0c637a5cea3fc39fb32 upstream. This patch adds a definition for GLOBAL_OVFL_STATUS bit 55 which is used with the Processor Trace (PT) feature. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: namhyung@kernel.org Link: http://lkml.kernel.org/r/1457034642-21837-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/perf_event.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 7bcb861a04e5..5a2ed3ed2f26 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -165,6 +165,7 @@ struct x86_pmu_capability { #define GLOBAL_STATUS_ASIF BIT_ULL(60) #define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) #define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) +#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) /* * IBS cpuid feature detection From 54fda475686cfb9a90bb558c0fa590e644503432 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 26 Feb 2016 09:15:11 -0600 Subject: [PATCH 110/797] x86/PCI: Mark Broadwell-EP Home Agent & PCU as having non-compliant BARs commit b894157145e4ac7598d7062bc93320898a5e059e upstream. The Home Agent and PCU PCI devices in Broadwell-EP have a non-BAR register where a BAR should be. We don't know what the side effects of sizing the "BAR" would be, and we don't know what address space the "BAR" might appear to describe. Mark these devices as having non-compliant BARs so the PCI core doesn't touch them. Signed-off-by: Bjorn Helgaas Tested-by: Andi Kleen Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/fixup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index e58565556703..0ae7e9fa348d 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -540,3 +540,10 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); + +static void pci_bdwep_bar(struct pci_dev *dev) +{ + dev->non_compliant_bars = 1; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar); From 0f6e5e26e68f1171b7cf8bebb7ce86c95e506639 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Feb 2016 17:50:23 +0100 Subject: [PATCH 111/797] KVM: x86: fix missed hardware breakpoints commit 4e422bdd2f849d98fffccbc3295c2f0996097fb3 upstream. Sometimes when setting a breakpoint a process doesn't stop on it. This is because the debug registers are not loaded correctly on VCPU load. The following simple reproducer from Oleg Nesterov tries using debug registers in both the host and the guest, for example by running "./bp 0 1" on the host and "./bp 14 15" under QEMU. #include #include #include #include #include #include #include #include #include #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len) { unsigned long dr7; dr7 = ((len | type) & 0xf) << (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); if (enable) dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); return dr7; } int write_dr(int pid, int dr, unsigned long val) { return ptrace(PTRACE_POKEUSER, pid, offsetof (struct user, u_debugreg[dr]), val); } void set_bp(pid_t pid, void *addr) { unsigned long dr7; assert(write_dr(pid, 0, (long)addr) == 0); dr7 = encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1); assert(write_dr(pid, 7, dr7) == 0); } void *get_rip(int pid) { return (void*)ptrace(PTRACE_PEEKUSER, pid, offsetof(struct user, regs.rip), 0); } void test(int nr) { void *bp_addr = &&label + nr, *bp_hit; int pid; printf("test bp %d\n", nr); assert(nr < 16); // see 16 asm nops below pid = fork(); if (!pid) { assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0); kill(getpid(), SIGSTOP); for (;;) { label: asm ( "nop; nop; nop; nop;" "nop; nop; nop; nop;" "nop; nop; nop; nop;" "nop; nop; nop; nop;" ); } } assert(pid == wait(NULL)); set_bp(pid, bp_addr); for (;;) { assert(ptrace(PTRACE_CONT, pid, 0, 0) == 0); assert(pid == wait(NULL)); bp_hit = get_rip(pid); if (bp_hit != bp_addr) fprintf(stderr, "ERR!! hit wrong bp %ld != %d\n", bp_hit - &&label, nr); } } int main(int argc, const char *argv[]) { while (--argc) { int nr = atoi(*++argv); if (!fork()) test(nr); } while (wait(NULL) > 0) ; return 0; } Suggested-by: Nadadv Amit Reported-by: Andrey Wagin Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d2945024ed33..8bfc5fc6a39b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2736,6 +2736,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) From 49f0cbfc3e73108c319c3d3cc5fe04587d96b654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 2 Mar 2016 22:56:38 +0100 Subject: [PATCH 112/797] KVM: i8254: change PIT discard tick policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7dd0fdff145c5be7146d0ac06732ae3613412ac1 upstream. Discard policy uses ack_notifiers to prevent injection of PIT interrupts before EOI from the last one. This patch changes the policy to always try to deliver the interrupt, which makes a difference when its vector is in ISR. Old implementation would drop the interrupt, but proposed one injects to IRR, like real hardware would. The old policy breaks legacy NMI watchdogs, where PIT is used through virtual wire (LVT0): PIT never sends an interrupt before receiving EOI, thus a guest deadlock with disabled interrupts will stop NMIs. Note that NMI doesn't do EOI, so PIT also had to send a normal interrupt through IOAPIC. (KVM's PIT is deeply rotten and luckily not used much in modern systems.) Even though there is a chance of regressions, I think we can fix the LVT0 NMI bug without introducing a new tick policy. Reported-by: Yuki Shibuya Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/i8254.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index b0ea42b78ccd..ab5318727579 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -245,7 +245,7 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) * PIC is being reset. Handle it gracefully here */ atomic_inc(&ps->pending); - else if (value > 0) + else if (value > 0 && ps->reinject) /* in this case, we had multiple outstanding pit interrupts * that we needed to inject. Reinject */ @@ -288,7 +288,9 @@ static void pit_do_work(struct kthread_work *work) * last one has been acked. */ spin_lock(&ps->inject_lock); - if (ps->irq_ack) { + if (!ps->reinject) + inject = 1; + else if (ps->irq_ack) { ps->irq_ack = 0; inject = 1; } @@ -317,10 +319,10 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer); struct kvm_pit *pt = ps->kvm->arch.vpit; - if (ps->reinject || !atomic_read(&ps->pending)) { + if (ps->reinject) atomic_inc(&ps->pending); - queue_kthread_work(&pt->worker, &pt->expired); - } + + queue_kthread_work(&pt->worker, &pt->expired); if (ps->is_periodic) { hrtimer_add_expires_ns(&ps->timer, ps->period); From 4e2fa4bbbac1c2e198e4c980d451c9ec568ae798 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 21 Mar 2016 10:15:25 +0100 Subject: [PATCH 113/797] KVM: fix spin_lock_init order on x86 commit e9ad4ec8379ad1ba6f68b8ca1c26b50b5ae0a327 upstream. Moving the initialization earlier is needed in 4.6 because kvm_arch_init_vm is now using mmu_lock, causing lockdep to complain: [ 284.440294] INFO: trying to register non-static key. [ 284.445259] the code is fine but needs lockdep annotation. [ 284.450736] turning off the locking correctness validator. ... [ 284.528318] [] lock_acquire+0xd3/0x240 [ 284.533733] [] ? kvm_page_track_register_notifier+0x20/0x60 [kvm] [ 284.541467] [] _raw_spin_lock+0x41/0x80 [ 284.546960] [] ? kvm_page_track_register_notifier+0x20/0x60 [kvm] [ 284.554707] [] kvm_page_track_register_notifier+0x20/0x60 [kvm] [ 284.562281] [] kvm_mmu_init_vm+0x20/0x30 [kvm] [ 284.568381] [] kvm_arch_init_vm+0x1ea/0x200 [kvm] [ 284.574740] [] kvm_dev_ioctl+0xbf/0x4d0 [kvm] However, it also helps fixing a preexisting problem, which is why this patch is also good for stable kernels: kvm_create_vm was incrementing current->mm->mm_count but not decrementing it at the out_err label (in case kvm_init_mmu_notifier failed). The new initialization order makes it possible to add the required mmdrop without adding a new error label. Reported-by: Borislav Petkov Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7338e30421d8..fefbf2d148ef 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -547,6 +547,16 @@ static struct kvm *kvm_create_vm(unsigned long type) if (!kvm) return ERR_PTR(-ENOMEM); + spin_lock_init(&kvm->mmu_lock); + atomic_inc(¤t->mm->mm_count); + kvm->mm = current->mm; + kvm_eventfd_init(kvm); + mutex_init(&kvm->lock); + mutex_init(&kvm->irq_lock); + mutex_init(&kvm->slots_lock); + atomic_set(&kvm->users_count, 1); + INIT_LIST_HEAD(&kvm->devices); + r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_no_disable; @@ -579,16 +589,6 @@ static struct kvm *kvm_create_vm(unsigned long type) goto out_err; } - spin_lock_init(&kvm->mmu_lock); - kvm->mm = current->mm; - atomic_inc(&kvm->mm->mm_count); - kvm_eventfd_init(kvm); - mutex_init(&kvm->lock); - mutex_init(&kvm->irq_lock); - mutex_init(&kvm->slots_lock); - atomic_set(&kvm->users_count, 1); - INIT_LIST_HEAD(&kvm->devices); - r = kvm_init_mmu_notifier(kvm); if (r) goto out_err; @@ -613,6 +613,7 @@ static struct kvm *kvm_create_vm(unsigned long type) for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) kvm_free_memslots(kvm, kvm->memslots[i]); kvm_arch_free_vm(kvm); + mmdrop(current->mm); return ERR_PTR(r); } From c44b175bf03cd74e517f3c98b2cb4896e04202ae Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 18 Mar 2016 16:53:29 +0100 Subject: [PATCH 114/797] KVM: VMX: avoid guest hang on invalid invept instruction commit 2849eb4f99d54925c543db12917127f88b3c38ff upstream. A guest executing an invalid invept instruction would hang because the instruction pointer was not updated. Fixes: bfd0a56b90005f8c8a004baf407ad90045c2b11e Reviewed-by: David Matlack Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0958fa2b7cb7..89d5e02b14ae 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7340,6 +7340,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) if (!(types & (1UL << type))) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + skip_emulated_instruction(vcpu); return 1; } From f9153f95f2b5a1a90b81c746342bed9d40dc9ae0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 18 Mar 2016 16:53:42 +0100 Subject: [PATCH 115/797] KVM: VMX: avoid guest hang on invalid invvpid instruction commit f6870ee9e53430f2a318ccf0dd5e66bb46194e43 upstream. A guest executing an invalid invvpid instruction would hang because the instruction pointer was not updated. Reported-by: jmontleo@redhat.com Tested-by: jmontleo@redhat.com Fixes: 99b83ac893b84ed1a62ad6d1f2b6cc32026b9e85 Reviewed-by: David Matlack Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 89d5e02b14ae..75d5d5b75e1f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7399,6 +7399,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) if (!(types & (1UL << type))) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + skip_emulated_instruction(vcpu); return 1; } From 6a84dfcbf56eab7955b607e22696cb145e019f20 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 18 Mar 2016 16:58:38 +0100 Subject: [PATCH 116/797] KVM: VMX: fix nested vpid for old KVM guests commit ef697a712a6165aea7779c295604b099e8bfae2e upstream. Old KVM guests invoke single-context invvpid without actually checking whether it is supported. This was fixed by commit 518c8ae ("KVM: VMX: Make sure single type invvpid is supported before issuing invvpid instruction", 2010-08-01) and the patch after, but pre-2.6.36 kernels lack it including RHEL 6. Reported-by: jmontleo@redhat.com Tested-by: jmontleo@redhat.com Fixes: 99b83ac893b84ed1a62ad6d1f2b6cc32026b9e85 Reviewed-by: David Matlack Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 75d5d5b75e1f..f34ab71dfd57 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2637,8 +2637,15 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) } else vmx->nested.nested_vmx_ept_caps = 0; + /* + * Old versions of KVM use the single-context version without + * checking for support, so declare that it is supported even + * though it is treated as global context. The alternative is + * not failing the single-context invvpid, and it is worse. + */ if (enable_vpid) vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | + VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; else vmx->nested.nested_vmx_vpid_caps = 0; @@ -7416,12 +7423,17 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) } switch (type) { + case VMX_VPID_EXTENT_SINGLE_CONTEXT: + /* + * Old versions of KVM use the single-context version so we + * have to support it; just treat it the same as all-context. + */ case VMX_VPID_EXTENT_ALL_CONTEXT: __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); nested_vmx_succeed(vcpu); break; default: - /* Trap single context invalidation invvpid calls */ + /* Trap individual address invalidation invvpid calls */ BUG_ON(1); break; } From 37014e0c5c90e250892da8aba8533cd43bacb4eb Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 2 Mar 2016 13:24:14 +0200 Subject: [PATCH 117/797] perf/core: Fix perf_sched_count derailment commit 927a5570855836e5d5859a80ce7e91e963545e8f upstream. The error path in perf_event_open() is such that asking for a sampling event on a PMU that doesn't generate interrupts will end up in dropping the perf_sched_count even though it hasn't been incremented for this event yet. Given a sufficient amount of these calls, we'll end up disabling scheduler's jump label even though we'd still have active events in the system, thereby facilitating the arrival of the infernal regions upon us. I'm fixing this by moving account_event() inside perf_event_alloc(). Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/1456917854-29427-1-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1087bbeb152b..faf2067fc8e2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7979,6 +7979,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, } } + /* symmetric to unaccount_event() in _free_event() */ + account_event(event); + return event; err_per_task: @@ -8342,8 +8345,6 @@ SYSCALL_DEFINE5(perf_event_open, } } - account_event(event); - /* * Special case software events and allow them to be part of * any hardware group. @@ -8626,8 +8627,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, /* Mark owner so we could distinguish it from user events. */ event->owner = EVENT_OWNER_KERNEL; - account_event(event); - ctx = find_get_context(event->pmu, task, event); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); From 2dfe91df33715503b253563d5e6d9a816758485c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 17 Feb 2016 14:44:55 -0800 Subject: [PATCH 118/797] perf tools: Dont stop PMU parsing on alias parse error commit 940db6dcd3f4659303fdf6befe7416adc4d24118 upstream. When an error happens during alias parsing currently the complete parsing of all attributes of the PMU is stopped. This is breaks old perf on a newer kernel that may have not-yet-know alias attributes (such as .scale or .per-pkg). Continue when some attribute is unparseable. This is IMHO a stable candidate and should be backported to older versions to avoid problems with newer kernels. v2: Print warnings when something goes wrong. v3: Change warning to debug output Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1455749095-18358-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/pmu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e4b173dec4b9..6f2a0279476c 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -283,13 +283,12 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) { struct dirent *evt_ent; DIR *event_dir; - int ret = 0; event_dir = opendir(dir); if (!event_dir) return -EINVAL; - while (!ret && (evt_ent = readdir(event_dir))) { + while ((evt_ent = readdir(event_dir))) { char path[PATH_MAX]; char *name = evt_ent->d_name; FILE *file; @@ -305,17 +304,19 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) snprintf(path, PATH_MAX, "%s/%s", dir, name); - ret = -EINVAL; file = fopen(path, "r"); - if (!file) - break; + if (!file) { + pr_debug("Cannot open %s\n", path); + continue; + } - ret = perf_pmu__new_alias(head, dir, name, file); + if (perf_pmu__new_alias(head, dir, name, file) < 0) + pr_debug("Cannot set up %s\n", name); fclose(file); } closedir(event_dir); - return ret; + return 0; } /* From 6be7771fe7242d7d4399545e0c9e2f6f3cd76725 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 19 Feb 2016 11:43:52 +0000 Subject: [PATCH 119/797] perf tools: Fix checking asprintf return value commit 26dee028d365fbc0e3326606a8520260b4462381 upstream. According to man pages, asprintf returns -1 when failure. This patch fixes two incorrect return value checker. Signed-off-by: Wang Nan Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Fixes: ffeb883e5662 ("perf tools: Show proper error message for wrong terms of hw/sw events") Link: http://lkml.kernel.org/r/1455882283-79592-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/parse-events.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b48e87693aa5..a35db828bd0d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2101,11 +2101,11 @@ char *parse_events_formats_error_string(char *additional_terms) /* valid terms */ if (additional_terms) { - if (!asprintf(&str, "valid terms: %s,%s", - additional_terms, static_terms)) + if (asprintf(&str, "valid terms: %s,%s", + additional_terms, static_terms) < 0) goto fail; } else { - if (!asprintf(&str, "valid terms: %s", static_terms)) + if (asprintf(&str, "valid terms: %s", static_terms) < 0) goto fail; } return str; From 64bf6d9705aa8724a17a137e76dc6f5b58309026 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Feb 2016 21:21:12 +0100 Subject: [PATCH 120/797] perf tools: Fix python extension build commit 67d5268908283c187e0a460048a423256c2fb288 upstream. The util/python-ext-sources file contains source files required to build the python extension relative to $(srctree)/tools/perf, Such a file path $(FILE).c is handed over to the python extension build system, which builds the final object in the $(PYTHON_EXTBUILD)/tmp/$(FILE).o path. After the build is done all files from $(PYTHON_EXTBUILD)lib/ are carried as the result binaries. Above system fails when we add source file relative to ../lib, which we do for: ../lib/bitmap.c ../lib/find_bit.c ../lib/hweight.c ../lib/rbtree.c All above objects will be built like: $(PYTHON_EXTBUILD)/tmp/../lib/bitmap.c $(PYTHON_EXTBUILD)/tmp/../lib/find_bit.c $(PYTHON_EXTBUILD)/tmp/../lib/hweight.c $(PYTHON_EXTBUILD)/tmp/../lib/rbtree.c which accidentally happens to be final library path: $(PYTHON_EXTBUILD)/lib/ Changing setup.py to pass full paths of source files to Extension build class and thus keep all built objects under $(PYTHON_EXTBUILD)tmp directory. Reported-by: Jeff Bastian Signed-off-by: Jiri Olsa Tested-by: Josh Boyer Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20160227201350.GB28494@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/setup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 1833103768cb..c8680984d2d6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') @@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] +# use full paths with source files +ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources) + perf = Extension('perf', sources = ext_sources, include_dirs = ['util/include'], From d75936f3f968b98243b9380b0c05d90b5292245d Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 18 Mar 2016 10:03:24 +0800 Subject: [PATCH 121/797] Thermal: Ignore invalid trip points commit 81ad4276b505e987dd8ebbdf63605f92cd172b52 upstream. In some cases, platform thermal driver may report invalid trip points, thermal core should not take any action for these trip points. This fixed a regression that bogus trip point starts to screw up thermal control on some Lenovo laptops, after commit bb431ba26c5cd0a17c941ca6c3a195a3a6d5d461 Author: Zhang Rui Date: Fri Oct 30 16:31:47 2015 +0800 Thermal: initialize thermal zone device correctly After thermal zone device registered, as we have not read any temperature before, thus tz->temperature should not be 0, which actually means 0C, and thermal trend is not available. In this case, we need specially handling for the first thermal_zone_device_update(). Both thermal core framework and step_wise governor is enhanced to handle this. And since the step_wise governor is the only one that uses trends, so it's the only thermal governor that needs to be updated. Tested-by: Manuel Krause Tested-by: szegad Tested-by: prash Tested-by: amish Tested-by: Matthias Reviewed-by: Javi Merino Signed-off-by: Zhang Rui Signed-off-by: Chen Yu Link: https://bugzilla.redhat.com/show_bug.cgi?id=1317190 Link: https://bugzilla.kernel.org/show_bug.cgi?id=114551 Signed-off-by: Zhang Rui Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/thermal_core.c | 13 ++++++++++++- include/linux/thermal.h | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index ba08b5521382..3d5f8f432b5b 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -454,6 +454,10 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip) { enum thermal_trip_type type; + /* Ignore disabled trip points */ + if (test_bit(trip, &tz->trips_disabled)) + return; + tz->ops->get_trip_type(tz, trip, &type); if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT) @@ -1796,6 +1800,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, { struct thermal_zone_device *tz; enum thermal_trip_type trip_type; + int trip_temp; int result; int count; int passive = 0; @@ -1867,9 +1872,15 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, goto unregister; for (count = 0; count < trips; count++) { - tz->ops->get_trip_type(tz, count, &trip_type); + if (tz->ops->get_trip_type(tz, count, &trip_type)) + set_bit(count, &tz->trips_disabled); if (trip_type == THERMAL_TRIP_PASSIVE) passive = 1; + if (tz->ops->get_trip_temp(tz, count, &trip_temp)) + set_bit(count, &tz->trips_disabled); + /* Check for bogus trip points */ + if (trip_temp == 0) + set_bit(count, &tz->trips_disabled); } if (!passive) { diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e13a1ace50e9..4a849f19e6c9 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -156,6 +156,7 @@ struct thermal_attr { * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis * @devdata: private pointer for device private data * @trips: number of trip points the thermal zone supports + * @trips_disabled; bitmap for disabled trips * @passive_delay: number of milliseconds to wait between polls when * performing passive cooling. * @polling_delay: number of milliseconds to wait between polls when @@ -191,6 +192,7 @@ struct thermal_zone_device { struct thermal_attr *trip_hyst_attrs; void *devdata; int trips; + unsigned long trips_disabled; /* bitmap for disabled trips */ int passive_delay; int polling_delay; int temperature; From af080e5802224176001a987c3ab77ba7490ef49e Mon Sep 17 00:00:00 2001 From: Chris Friesen Date: Sat, 5 Mar 2016 23:18:48 -0600 Subject: [PATCH 122/797] sched/cputime: Fix steal_account_process_tick() to always return jiffies commit f9c904b7613b8b4c85b10cd6b33ad41b2843fa9d upstream. The callers of steal_account_process_tick() expect it to return whether a jiffy should be considered stolen or not. Currently the return value of steal_account_process_tick() is in units of cputime, which vary between either jiffies or nsecs depending on CONFIG_VIRT_CPU_ACCOUNTING_GEN. If cputime has nsecs granularity and there is a tiny amount of stolen time (a few nsecs, say) then we will consider the entire tick stolen and will not account the tick on user/system/idle, causing /proc/stats to show invalid data. The fix is to change steal_account_process_tick() to accumulate the stolen time and only account it once it's worth a jiffy. (Thanks to Frederic Weisbecker for suggestions to fix a bug in my first version of the patch.) Signed-off-by: Chris Friesen Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/56DBBDB8.40305@mail.usask.ca Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/cputime.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 05de80b48586..f74ea89e77a8 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -259,21 +259,21 @@ static __always_inline bool steal_account_process_tick(void) #ifdef CONFIG_PARAVIRT if (static_key_false(¶virt_steal_enabled)) { u64 steal; - cputime_t steal_ct; + unsigned long steal_jiffies; steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; /* - * cputime_t may be less precise than nsecs (eg: if it's - * based on jiffies). Lets cast the result to cputime + * steal is in nsecs but our caller is expecting steal + * time in jiffies. Lets cast the result to jiffies * granularity and account the rest on the next rounds. */ - steal_ct = nsecs_to_cputime(steal); - this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct); + steal_jiffies = nsecs_to_jiffies(steal); + this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies); - account_steal_time(steal_ct); - return steal_ct; + account_steal_time(jiffies_to_cputime(steal_jiffies)); + return steal_jiffies; } #endif return false; From 10595c57f4682b096d6ac6dd2212ac889f08d9f4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 29 Feb 2016 09:19:24 +0100 Subject: [PATCH 123/797] sched/preempt, sh: kmap_coherent relies on disabled preemption commit b15d53d009558d14c4f394a6d1fa2039c7f45c43 upstream. kmap_coherent needs disabled preemption to not schedule in the critical section, just like kmap_coherent on mips and kmap_atomic in general. Fixes: 8222dbe21e79 "sched/preempt, mm/fault: Decouple preemption from the page fault logic" Reported-by: Hans Verkuil Signed-off-by: David Hildenbrand Tested-by: Hans Verkuil Signed-off-by: Rich Felker Signed-off-by: Greg Kroah-Hartman --- arch/sh/mm/kmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c index ec29e14ec5a8..bf25d7c79a2d 100644 --- a/arch/sh/mm/kmap.c +++ b/arch/sh/mm/kmap.c @@ -36,6 +36,7 @@ void *kmap_coherent(struct page *page, unsigned long addr) BUG_ON(!test_bit(PG_dcache_clean, &page->flags)); + preempt_disable(); pagefault_disable(); idx = FIX_CMAP_END - @@ -64,4 +65,5 @@ void kunmap_coherent(void *kvaddr) } pagefault_enable(); + preempt_enable(); } From dff87fa52ddf26df67526d303d08226e7168560b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 9 Mar 2016 16:40:48 -0800 Subject: [PATCH 124/797] EDAC/sb_edac: Fix computation of channel address commit eb1af3b71f9d83e45f2fd2fd649356e98e1c582c upstream. Large memory Haswell-EX systems with multiple DIMMs per channel were sometimes reporting the wrong DIMM. Found three problems: 1) Debug printouts for socket and channel interleave were not interpreting the register fields correctly. The socket interleave field is a 2^X value (0=1, 1=2, 2=4, 3=8). The channel interleave is X+1 (0=1, 1=2, 2=3. 3=4). 2) Actual use of the socket interleave value didn't interpret as 2^X 3) Conversion of address to channel address was complicated, and wrong. Signed-off-by: Tony Luck Acked-by: Aristeu Rozanski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Mauro Carvalho Chehab Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-edac@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/edac/sb_edac.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 429309c62699..cbee3179ec08 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1117,8 +1117,8 @@ static void get_memory_layout(const struct mem_ctl_info *mci) edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", n_tads, gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, - (u32)TAD_SOCK(reg), - (u32)TAD_CH(reg), + (u32)(1 << TAD_SOCK(reg)), + (u32)TAD_CH(reg) + 1, (u32)TAD_TGT0(reg), (u32)TAD_TGT1(reg), (u32)TAD_TGT2(reg), @@ -1396,7 +1396,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, } ch_way = TAD_CH(reg) + 1; - sck_way = TAD_SOCK(reg) + 1; + sck_way = 1 << TAD_SOCK(reg); if (ch_way == 3) idx = addr >> 6; @@ -1453,7 +1453,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, n_tads, addr, limit, - (u32)TAD_SOCK(reg), + sck_way, ch_way, offset, idx, @@ -1468,18 +1468,12 @@ static int get_memory_error_data(struct mem_ctl_info *mci, offset, addr); return -EINVAL; } - addr -= offset; - /* Store the low bits [0:6] of the addr */ - ch_addr = addr & 0x7f; - /* Remove socket wayness and remove 6 bits */ - addr >>= 6; - addr = div_u64(addr, sck_xch); -#if 0 - /* Divide by channel way */ - addr = addr / ch_way; -#endif - /* Recover the last 6 bits */ - ch_addr |= addr << 6; + + ch_addr = addr - offset; + ch_addr >>= (6 + shiftup); + ch_addr /= ch_way * sck_way; + ch_addr <<= (6 + shiftup); + ch_addr |= addr & ((1 << (6 + shiftup)) - 1); /* * Step 3) Decode rank From 608377369dcebfa0dc9506a4f17d008bc93bb5b9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 20 Jan 2016 12:54:51 +0300 Subject: [PATCH 125/797] EDAC, amd64_edac: Shift wrapping issue in f1x_get_norm_dct_addr() commit 6f3508f61c814ee852c199988a62bd954c50dfc1 upstream. dct_sel_base_off is declared as a u64 but we're only using the lower 32 bits because of a shift wrapping bug. This can possibly truncate the upper 16 bits of DctSelBaseOffset[47:26], causing us to misdecode the CS row. Fixes: c8e518d5673d ('amd64_edac: Sanitize f10_get_base_addr_offset') Signed-off-by: Dan Carpenter Cc: Aravind Gopalakrishnan Cc: linux-edac Link: http://lkml.kernel.org/r/20160120095451.GB19898@mwanda Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/amd64_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 9eee13ef83a5..d87a47547ba5 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1452,7 +1452,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 chan_off; u64 dram_base = get_dram_base(pvt, range); u64 hole_off = f10_dhar_offset(pvt); - u64 dct_sel_base_off = (pvt->dct_sel_hi & 0xFFFFFC00) << 16; + u64 dct_sel_base_off = (u64)(pvt->dct_sel_hi & 0xFFFFFC00) << 16; if (hi_rng) { /* From 1a4d9389206b787f620966dd0442ac0cd8df5525 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 10 Mar 2016 09:52:55 +0100 Subject: [PATCH 126/797] s390: fix floating pointer register corruption (again) commit e370e4769463a65dcf8806fa26d2874e0542ac41 upstream. There is a tricky interaction between the machine check handler and the critical sections of load_fpu_regs and save_fpu_regs functions. If the machine check interrupts one of the two functions the critical section cleanup will complete the function before the machine check handler s390_do_machine_check is called. Trouble is that the machine check handler needs to validate the floating point registers *before* and not *after* the completion of load_fpu_regs/save_fpu_regs. The simplest solution is to rewind the PSW to the start of the load_fpu_regs/save_fpu_regs and retry the function after the return from the machine check handler. Tested-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/entry.S | 106 +-------------------------------------- 1 file changed, 2 insertions(+), 104 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 857b6526d298..424e6809ad07 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1197,114 +1197,12 @@ cleanup_critical: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bor %r14 - clg %r9,BASED(.Lcleanup_save_fpu_regs_done) - jhe 5f - clg %r9,BASED(.Lcleanup_save_fpu_regs_fp) - jhe 4f - clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high) - jhe 3f - clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low) - jhe 2f - clg %r9,BASED(.Lcleanup_save_fpu_fpc_end) - jhe 1f - lg %r2,__LC_CURRENT - aghi %r2,__TASK_thread -0: # Store floating-point controls - stfpc __THREAD_FPU_fpc(%r2) -1: # Load register save area and check if VX is active - lg %r3,__THREAD_FPU_regs(%r2) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - jz 4f # no VX -> store FP regs -2: # Store vector registers (V0-V15) - VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) -3: # Store vector registers (V16-V31) - VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3) - j 5f # -> done, set CIF_FPU flag -4: # Store floating-point registers - std 0,0(%r3) - std 1,8(%r3) - std 2,16(%r3) - std 3,24(%r3) - std 4,32(%r3) - std 5,40(%r3) - std 6,48(%r3) - std 7,56(%r3) - std 8,64(%r3) - std 9,72(%r3) - std 10,80(%r3) - std 11,88(%r3) - std 12,96(%r3) - std 13,104(%r3) - std 14,112(%r3) - std 15,120(%r3) -5: # Set CIF_FPU flag - oi __LC_CPU_FLAGS+7,_CIF_FPU - lg %r9,48(%r11) # return from save_fpu_regs + larl %r9,save_fpu_regs br %r14 -.Lcleanup_save_fpu_fpc_end: - .quad .Lsave_fpu_regs_fpc_end -.Lcleanup_save_fpu_regs_vx_low: - .quad .Lsave_fpu_regs_vx_low -.Lcleanup_save_fpu_regs_vx_high: - .quad .Lsave_fpu_regs_vx_high -.Lcleanup_save_fpu_regs_fp: - .quad .Lsave_fpu_regs_fp -.Lcleanup_save_fpu_regs_done: - .quad .Lsave_fpu_regs_done .Lcleanup_load_fpu_regs: - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bnor %r14 - clg %r9,BASED(.Lcleanup_load_fpu_regs_done) - jhe 1f - clg %r9,BASED(.Lcleanup_load_fpu_regs_fp) - jhe 2f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high) - jhe 3f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx) - jhe 4f - lg %r4,__LC_CURRENT - aghi %r4,__TASK_thread - lfpc __THREAD_FPU_fpc(%r4) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area - jz 2f # -> no VX, load FP regs -4: # Load V0 ..V15 registers - VLM %v0,%v15,0,%r4 -3: # Load V16..V31 registers - VLM %v16,%v31,256,%r4 - j 1f -2: # Load floating-point registers - ld 0,0(%r4) - ld 1,8(%r4) - ld 2,16(%r4) - ld 3,24(%r4) - ld 4,32(%r4) - ld 5,40(%r4) - ld 6,48(%r4) - ld 7,56(%r4) - ld 8,64(%r4) - ld 9,72(%r4) - ld 10,80(%r4) - ld 11,88(%r4) - ld 12,96(%r4) - ld 13,104(%r4) - ld 14,112(%r4) - ld 15,120(%r4) -1: # Clear CIF_FPU bit - ni __LC_CPU_FLAGS+7,255-_CIF_FPU - lg %r9,48(%r11) # return from load_fpu_regs + larl %r9,load_fpu_regs br %r14 -.Lcleanup_load_fpu_regs_vx: - .quad .Lload_fpu_regs_vx -.Lcleanup_load_fpu_regs_vx_high: - .quad .Lload_fpu_regs_vx_high -.Lcleanup_load_fpu_regs_fp: - .quad .Lload_fpu_regs_fp -.Lcleanup_load_fpu_regs_done: - .quad .Lload_fpu_regs_done /* * Integer constants From c024dcd3df1a0180993d4ce94b87e8ec271c6c2e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 10 Mar 2016 10:32:21 +0100 Subject: [PATCH 127/797] s390/cpumf: add missing lpp magic initialization commit 8f100bb1ff27873dd71f636da670e503b9ade3c6 upstream. Add the missing lpp magic initialization for cpu 0. Without this all samples on cpu 0 do not have the most significant bit set in the program parameter field, which we use to distinguish between guest and host samples if the pid is also 0. We did initialize the lpp magic in the absolute zero lowcore but forgot that when switching to the allocated lowcore on cpu 0 only. Reported-by: Shu Juan Zhang Acked-by: Christian Borntraeger Fixes: e22cf8ca6f75 ("s390/cpumf: rework program parameter setting to detect guest samples") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c837bcacf218..1f581eb61bc2 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -329,6 +329,7 @@ static void __init setup_lowcore(void) + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; + lc->lpp = LPP_MAGIC; lc->machine_flags = S390_lowcore.machine_flags; lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, From c1948606af0861bd181cb85ed1797ef02c20bbec Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 14 Mar 2016 15:47:23 +0100 Subject: [PATCH 128/797] s390/pci: enforce fmb page boundary rule commit 80c544ded25ac14d7cc3e555abb8ed2c2da99b84 upstream. The function measurement block must not cross a page boundary. Ensure that by raising the alignment requirement to the smallest power of 2 larger than the size of the fmb. Fixes: d0b088531 ("s390/pci: performance statistics and debug infrastructure") Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/pci.h | 2 +- arch/s390/pci/pci.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index c873e682b67f..2b2ced9dc00a 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -45,7 +45,7 @@ struct zpci_fmb { u64 rpcit_ops; u64 dma_rbytes; u64 dma_wbytes; -} __packed __aligned(16); +} __packed __aligned(64); enum zpci_state { ZPCI_FN_STATE_RESERVED, diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 7ef12a3ace3a..19442395f413 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -871,8 +871,11 @@ static inline int barsize(u8 size) static int zpci_mem_init(void) { + BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || + __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb)); + zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), - 16, 0, NULL); + __alignof__(struct zpci_fmb), 0, NULL); if (!zdev_fmb_cache) goto error_zdev; From 88c9954c5c898dfe2e581cba34417ab5abccdd0d Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Mon, 29 Feb 2016 17:30:08 -0800 Subject: [PATCH 129/797] pinctrl-bcm2835: Fix cut-and-paste error in "pull" parsing commit 2c7e3306d23864d49f686f22e56e180ff0fffb7f upstream. The DT bindings for pinctrl-bcm2835 allow both the function and pull to contain either one entry or one per pin. However, an error in the DT parsing can cause failures if the number of pulls differs from the number of functions. Signed-off-by: Eric Anholt Signed-off-by: Phil Elwell Reviewed-by: Stephen Warren Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 2e6ca69635aa..17dd8fe12b54 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -779,7 +779,7 @@ static int bcm2835_pctl_dt_node_to_map(struct pinctrl_dev *pctldev, } if (num_pulls) { err = of_property_read_u32_index(np, "brcm,pull", - (num_funcs > 1) ? i : 0, &pull); + (num_pulls > 1) ? i : 0, &pull); if (err) goto out; err = bcm2835_pctl_dt_node_to_map_pull(pc, np, pin, From 8cbac3c4f74d92bf04645a613e061ab4f9baa866 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 25 Feb 2016 14:35:57 -0600 Subject: [PATCH 130/797] PCI: Disable IO/MEM decoding for devices with non-compliant BARs commit b84106b4e2290c081cdab521fa832596cdfea246 upstream. The PCI config header (first 64 bytes of each device's config space) is defined by the PCI spec so generic software can identify the device and manage its usage of I/O, memory, and IRQ resources. Some non-spec-compliant devices put registers other than BARs where the BARs should be. When the PCI core sizes these "BARs", the reads and writes it does may have unwanted side effects, and the "BAR" may appear to describe non-sensical address space. Add a flag bit to mark non-compliant devices so we don't touch their BARs. Turn off IO/MEM decoding to prevent the devices from consuming address space, since we can't read the BARs to find out what that address space would be. Signed-off-by: Bjorn Helgaas Tested-by: Andi Kleen Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 14 ++++++++++++++ include/linux/pci.h | 1 + 2 files changed, 15 insertions(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index edb1984201e9..7aafb5fb9336 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -179,6 +179,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, u16 orig_cmd; struct pci_bus_region region, inverted_region; + if (dev->non_compliant_bars) + return 0; + mask = type ? PCI_ROM_ADDRESS_MASK : ~0; /* No printks while decoding is disabled! */ @@ -1174,6 +1177,7 @@ void pci_msi_setup_pci_dev(struct pci_dev *dev) int pci_setup_device(struct pci_dev *dev) { u32 class; + u16 cmd; u8 hdr_type; int pos = 0; struct pci_bus_region region; @@ -1219,6 +1223,16 @@ int pci_setup_device(struct pci_dev *dev) /* device class may be changed after fixup */ class = dev->class >> 8; + if (dev->non_compliant_bars) { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { + dev_info(&dev->dev, "device has non-compliant BARs; disabling IO/MEM decoding\n"); + cmd &= ~PCI_COMMAND_IO; + cmd &= ~PCI_COMMAND_MEMORY; + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + } + switch (dev->hdr_type) { /* header type */ case PCI_HEADER_TYPE_NORMAL: /* standard header */ if (class == PCI_CLASS_BRIDGE_PCI) diff --git a/include/linux/pci.h b/include/linux/pci.h index 6ae25aae88fd..4e554bfff129 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -359,6 +359,7 @@ struct pci_dev { unsigned int io_window_1k:1; /* Intel P2P bridge 1K I/O windows */ unsigned int irq_managed:1; unsigned int has_secondary_link:1; + unsigned int non_compliant_bars:1; /* broken BARs; ignore them */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ From 2221620b0bce6df54e19eaf6065c368075e499e6 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 21 Mar 2016 11:12:55 +0000 Subject: [PATCH 131/797] PCI: ACPI: IA64: fix IO port generic range check commit 4a2e7aab4ffce1e0e79b303dc2f9a03aa9f3a332 upstream. The [0 - 64k] ACPI PCI IO port resource boundary check in: acpi_dev_ioresource_flags() is currently applied blindly in the ACPI resource parsing to all architectures, but only x86 suffers from that IO space limitation. On arches (ie IA64 and ARM64) where IO space is memory mapped, the PCI root bridges IO resource windows are firstly initialized from the _CRS (in acpi_decode_space()) and contain the CPU physical address at which a root bridge decodes IO space in the CPU physical address space with the offset value representing the offset required to translate the PCI bus address into the CPU physical address. The IO resource windows are then parsed and updated in arch code before creating and enumerating PCI buses (eg IA64 add_io_space()) to map in an arch specific way the obtained CPU physical address range to a slice of virtual address space reserved to map PCI IO space, ending up with PCI bridges resource windows containing IO resources like the following on a working IA64 configuration: PCI host bridge to bus 0000:00 pci_bus 0000:00: root bus resource [io 0x1000000-0x100ffff window] (bus address [0x0000-0xffff]) pci_bus 0000:00: root bus resource [mem 0x000a0000-0x000fffff window] pci_bus 0000:00: root bus resource [mem 0x80000000-0x8fffffff window] pci_bus 0000:00: root bus resource [mem 0x80004000000-0x800ffffffff window] pci_bus 0000:00: root bus resource [bus 00] This implies that the [0 - 64K] check in acpi_dev_ioresource_flags() leaves platforms with memory mapped IO space (ie IA64) broken (ie kernel can't claim IO resources since the host bridge IO resource is disabled and discarded by ACPI core code, see log on IA64 with missing root bridge IO resource, silently filtered by current [0 - 64k] check in acpi_dev_ioresource_flags()): PCI host bridge to bus 0000:00 pci_bus 0000:00: root bus resource [mem 0x000a0000-0x000fffff window] pci_bus 0000:00: root bus resource [mem 0x80000000-0x8fffffff window] pci_bus 0000:00: root bus resource [mem 0x80004000000-0x800ffffffff window] pci_bus 0000:00: root bus resource [bus 00] [...] pci 0000:00:03.0: [1002:515e] type 00 class 0x030000 pci 0000:00:03.0: reg 0x10: [mem 0x80000000-0x87ffffff pref] pci 0000:00:03.0: reg 0x14: [io 0x1000-0x10ff] pci 0000:00:03.0: reg 0x18: [mem 0x88020000-0x8802ffff] pci 0000:00:03.0: reg 0x30: [mem 0x88000000-0x8801ffff pref] pci 0000:00:03.0: supports D1 D2 pci 0000:00:03.0: can't claim BAR 1 [io 0x1000-0x10ff]: no compatible bridge window For this reason, the IO port resources boundaries check in generic ACPI parsing code should be guarded with a CONFIG_X86 guard so that more arches (ie ARM64) can benefit from the generic ACPI resources parsing interface without incurring in unexpected resource filtering, fixing at the same time current breakage on IA64. This patch factors out IO ports boundary [0 - 64k] check in generic ACPI code and makes the IO space check X86 specific to make sure that IO space resources are usable on other arches too. Fixes: 3772aea7d6f3 (ia64/PCI/ACPI: Use common ACPI resource parsing interface for host bridge) Signed-off-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/resource.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index cdc5c2599beb..627f8fbb5e9a 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,8 +26,20 @@ #ifdef CONFIG_X86 #define valid_IRQ(i) (((i) != 0) && ((i) != 2)) +static inline bool acpi_iospace_resource_valid(struct resource *res) +{ + /* On X86 IO space is limited to the [0 - 64K] IO port range */ + return res->end < 0x10003; +} #else #define valid_IRQ(i) (true) +/* + * ACPI IO descriptors on arches other than X86 contain MMIO CPU physical + * addresses mapping IO space in CPU physical address space, IO space + * resources can be placed anywhere in the 64-bit physical address space. + */ +static inline bool +acpi_iospace_resource_valid(struct resource *res) { return true; } #endif static bool acpi_dev_resource_len_valid(u64 start, u64 end, u64 len, bool io) @@ -126,7 +138,7 @@ static void acpi_dev_ioresource_flags(struct resource *res, u64 len, if (!acpi_dev_resource_len_valid(res->start, res->end, len, true)) res->flags |= IORESOURCE_DISABLED | IORESOURCE_UNSET; - if (res->end >= 0x10003) + if (!acpi_iospace_resource_valid(res)) res->flags |= IORESOURCE_DISABLED | IORESOURCE_UNSET; if (io_decode == ACPI_DECODE_16) From dc1441612fdb4ca221e3a4aa32e39e74d020e386 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Mar 2016 09:40:46 +0100 Subject: [PATCH 132/797] x86/irq: Cure live lock in fixup_irqs() commit 551adc60573cb68e3d55cacca9ba1b7437313df7 upstream. Harry reported, that he's able to trigger a system freeze with cpu hot unplug. The freeze turned out to be a live lock caused by recent changes in irq_force_complete_move(). When fixup_irqs() and from there irq_force_complete_move() is called on the dying cpu, then all other cpus are in stop machine an wait for the dying cpu to complete the teardown. If there is a move of an interrupt pending then irq_force_complete_move() sends the cleanup IPI to the cpus in the old_domain mask and waits for them to clear the mask. That's obviously impossible as those cpus are firmly stuck in stop machine with interrupts disabled. I should have known that, but I completely overlooked it being concentrated on the locking issues around the vectors. And the existance of the call to __irq_complete_move() in the code, which actually sends the cleanup IPI made it reasonable to wait for that cleanup to complete. That call was bogus even before the recent changes as it was just a pointless distraction. We have to look at two cases: 1) The move_in_progress flag of the interrupt is set This means the ioapic has been updated with the new vector, but it has not fired yet. In theory there is a race: set_ioapic(new_vector) <-- Interrupt is raised before update is effective, i.e. it's raised on the old vector. So if the target cpu cannot handle that interrupt before the old vector is cleaned up, we get a spurious interrupt and in the worst case the ioapic irq line becomes stale, but my experiments so far have only resulted in spurious interrupts. But in case of cpu hotplug this should be a non issue because if the affinity update happens right before all cpus rendevouz in stop machine, there is no way that the interrupt can be blocked on the target cpu because all cpus loops first with interrupts enabled in stop machine, so the old vector is not yet cleaned up when the interrupt fires. So the only way to run into this issue is if the delivery of the interrupt on the apic/system bus would be delayed beyond the point where the target cpu disables interrupts in stop machine. I doubt that it can happen, but at least there is a theroretical chance. Virtualization might be able to expose this, but AFAICT the IOAPIC emulation is not as stupid as the real hardware. I've spent quite some time over the weekend to enforce that situation, though I was not able to trigger the delayed case. 2) The move_in_progress flag is not set and the old_domain cpu mask is not empty. That means, that an interrupt was delivered after the change and the cleanup IPI has been sent to the cpus in old_domain, but not all CPUs have responded to it yet. In both cases we can assume that the next interrupt will arrive on the new vector, so we can cleanup the old vectors on the cpus in the old_domain cpu mask. Fixes: 98229aa36caa "x86/irq: Plug vector cleanup race" Reported-by: Harry Junior Tested-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Joe Lawrence Cc: Borislav Petkov Cc: Ben Hutchings Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1603140931430.3657@nanos Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/hw_irq.h | 1 + arch/x86/kernel/apic/vector.c | 92 +++++++++++++++++++++++++++-------- 2 files changed, 73 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1e3408e88604..59caa55fb9b5 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -136,6 +136,7 @@ struct irq_alloc_info { struct irq_cfg { unsigned int dest_apicid; u8 vector; + u8 old_vector; }; extern struct irq_cfg *irq_cfg(unsigned int irq); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index a35f6b5473f4..7af2505f20c2 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -211,6 +211,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, */ cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); d->move_in_progress = !cpumask_empty(d->old_domain); + d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0; d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); success: @@ -653,46 +654,97 @@ void irq_complete_move(struct irq_cfg *cfg) } /* - * Called with @desc->lock held and interrupts disabled. + * Called from fixup_irqs() with @desc->lock held and interrupts disabled. */ void irq_force_complete_move(struct irq_desc *desc) { struct irq_data *irqdata = irq_desc_get_irq_data(desc); struct apic_chip_data *data = apic_chip_data(irqdata); struct irq_cfg *cfg = data ? &data->cfg : NULL; + unsigned int cpu; if (!cfg) return; - __irq_complete_move(cfg, cfg->vector); - /* * This is tricky. If the cleanup of @data->old_domain has not been * done yet, then the following setaffinity call will fail with * -EBUSY. This can leave the interrupt in a stale state. * - * The cleanup cannot make progress because we hold @desc->lock. So in - * case @data->old_domain is not yet cleaned up, we need to drop the - * lock and acquire it again. @desc cannot go away, because the - * hotplug code holds the sparse irq lock. + * All CPUs are stuck in stop machine with interrupts disabled so + * calling __irq_complete_move() would be completely pointless. */ raw_spin_lock(&vector_lock); - /* Clean out all offline cpus (including ourself) first. */ + /* + * Clean out all offline cpus (including the outgoing one) from the + * old_domain mask. + */ cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); - while (!cpumask_empty(data->old_domain)) { + + /* + * If move_in_progress is cleared and the old_domain mask is empty, + * then there is nothing to cleanup. fixup_irqs() will take care of + * the stale vectors on the outgoing cpu. + */ + if (!data->move_in_progress && cpumask_empty(data->old_domain)) { raw_spin_unlock(&vector_lock); - raw_spin_unlock(&desc->lock); - cpu_relax(); - raw_spin_lock(&desc->lock); - /* - * Reevaluate apic_chip_data. It might have been cleared after - * we dropped @desc->lock. - */ - data = apic_chip_data(irqdata); - if (!data) - return; - raw_spin_lock(&vector_lock); + return; } + + /* + * 1) The interrupt is in move_in_progress state. That means that we + * have not seen an interrupt since the io_apic was reprogrammed to + * the new vector. + * + * 2) The interrupt has fired on the new vector, but the cleanup IPIs + * have not been processed yet. + */ + if (data->move_in_progress) { + /* + * In theory there is a race: + * + * set_ioapic(new_vector) <-- Interrupt is raised before update + * is effective, i.e. it's raised on + * the old vector. + * + * So if the target cpu cannot handle that interrupt before + * the old vector is cleaned up, we get a spurious interrupt + * and in the worst case the ioapic irq line becomes stale. + * + * But in case of cpu hotplug this should be a non issue + * because if the affinity update happens right before all + * cpus rendevouz in stop machine, there is no way that the + * interrupt can be blocked on the target cpu because all cpus + * loops first with interrupts enabled in stop machine, so the + * old vector is not yet cleaned up when the interrupt fires. + * + * So the only way to run into this issue is if the delivery + * of the interrupt on the apic/system bus would be delayed + * beyond the point where the target cpu disables interrupts + * in stop machine. I doubt that it can happen, but at least + * there is a theroretical chance. Virtualization might be + * able to expose this, but AFAICT the IOAPIC emulation is not + * as stupid as the real hardware. + * + * Anyway, there is nothing we can do about that at this point + * w/o refactoring the whole fixup_irq() business completely. + * We print at least the irq number and the old vector number, + * so we have the necessary information when a problem in that + * area arises. + */ + pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", + irqdata->irq, cfg->old_vector); + } + /* + * If old_domain is not empty, then other cpus still have the irq + * descriptor set in their vector array. Clean it up. + */ + for_each_cpu(cpu, data->old_domain) + per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED; + + /* Cleanup the left overs of the (half finished) move */ + cpumask_clear(data->old_domain); + data->move_in_progress = 0; raw_spin_unlock(&vector_lock); } #endif From 1eeb3225856a914d199f92d2d492142783eb5740 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 14 Mar 2016 21:20:54 -0400 Subject: [PATCH 133/797] x86/apic: Fix suspicious RCU usage in smp_trace_call_function_interrupt() commit 7834c10313fb823e538f2772be78edcdeed2e6e3 upstream. Since 4.4, I've been able to trigger this occasionally: =============================== [ INFO: suspicious RCU usage. ] 4.5.0-rc7-think+ #3 Not tainted Cc: Andi Kleen Link: http://lkml.kernel.org/r/20160315012054.GA17765@codemonkey.org.uk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman ------------------------------- ./arch/x86/include/asm/msr-trace.h:47 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 1 RCU used illegally from extended quiescent state! no locks held by swapper/3/0. stack backtrace: CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.5.0-rc7-think+ #3 ffffffff92f821e0 1f3e5c340597d7fc ffff880468e07f10 ffffffff92560c2a ffff880462145280 0000000000000001 ffff880468e07f40 ffffffff921376a6 ffffffff93665ea0 0000cc7c876d28da 0000000000000005 ffffffff9383dd60 Call Trace: [] dump_stack+0x67/0x9d [] lockdep_rcu_suspicious+0xe6/0x100 [] do_trace_write_msr+0x127/0x1a0 [] native_apic_msr_eoi_write+0x23/0x30 [] smp_trace_call_function_interrupt+0x38/0x360 [] trace_call_function_interrupt+0x90/0xa0 [] ? cpuidle_enter_state+0x1b4/0x520 Move the entering_irq() call before ack_APIC_irq(), because entering_irq() tells the RCU susbstems to end the extended quiescent state, so that the following trace call in ack_APIC_irq() works correctly. Suggested-by: Andi Kleen Fixes: 4787c368a9bc "x86/tracing: Add irq_enter/exit() in smp_trace_reschedule_interrupt()" Signed-off-by: Dave Jones Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/apic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a30316bf801a..163769d82475 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -638,8 +638,8 @@ static inline void entering_irq(void) static inline void entering_ack_irq(void) { - ack_APIC_irq(); entering_irq(); + ack_APIC_irq(); } static inline void ipi_entering_ack_irq(void) From 0f63ab5873ed78838afa4b2f8bfd9d18f806cf40 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 16 Mar 2016 14:14:21 -0700 Subject: [PATCH 134/797] x86/iopl/64: Properly context-switch IOPL on Xen PV commit b7a584598aea7ca73140cb87b40319944dd3393f upstream. On Xen PV, regs->flags doesn't reliably reflect IOPL and the exit-to-userspace code doesn't change IOPL. We need to context switch it manually. I'm doing this without going through paravirt because this is specific to Xen PV. After the dust settles, we can merge this with the 32-bit code, tidy up the iopl syscall implementation, and remove the set_iopl pvop entirely. Fixes XSA-171. Reviewewd-by: Jan Beulich Signed-off-by: Andy Lutomirski Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Vrabel Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jan Beulich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/693c3bd7aeb4d3c27c92c622b7d0f554a458173c.1458162709.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/xen/hypervisor.h | 2 ++ arch/x86/kernel/process_64.c | 12 ++++++++++++ arch/x86/xen/enlighten.c | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 8b2d4bea9962..39171b3646bb 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,4 +62,6 @@ void xen_arch_register_cpu(int num); void xen_arch_unregister_cpu(int num); #endif +extern void xen_set_iopl_mask(unsigned mask); + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e835d263a33b..4cbb60fbff3e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -48,6 +48,7 @@ #include #include #include +#include asmlinkage extern void ret_from_fork(void); @@ -411,6 +412,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) __switch_to_xtra(prev_p, next_p, tss); +#ifdef CONFIG_XEN + /* + * On Xen PV, IOPL bits in pt_regs->flags have no effect, and + * current_pt_regs()->flags may not match the current task's + * intended IOPL. We need to switch it manually. + */ + if (unlikely(static_cpu_has(X86_FEATURE_XENPV) && + prev->iopl != next->iopl)) + xen_set_iopl_mask(next->iopl); +#endif + if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { /* * AMD CPUs have a misfeature: SYSRET sets the SS selector but diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b7de78bdc09c..beab8c706ac9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -961,7 +961,7 @@ static void xen_load_sp0(struct tss_struct *tss, tss->x86_tss.sp0 = thread->sp0; } -static void xen_set_iopl_mask(unsigned mask) +void xen_set_iopl_mask(unsigned mask) { struct physdev_set_iopl set_iopl; From f71e846236048ca5165b4ff5bc6f1745cabb6bd6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 16 Mar 2016 14:14:22 -0700 Subject: [PATCH 135/797] x86/iopl: Fix iopl capability check on Xen PV commit c29016cf41fe9fa994a5ecca607cf5f1cd98801e upstream. iopl(3) is supposed to work if iopl is already 3, even if unprivileged. This didn't work right on Xen PV. Fix it. Reviewewd-by: Jan Beulich Signed-off-by: Andy Lutomirski Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Vrabel Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jan Beulich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/8ce12013e6e4c0a44a97e316be4a6faff31bd5ea.1458162709.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/ioport.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 37dae792dbbe..589b3193f102 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -96,9 +96,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) SYSCALL_DEFINE1(iopl, unsigned int, level) { struct pt_regs *regs = current_pt_regs(); - unsigned int old = (regs->flags >> 12) & 3; struct thread_struct *t = ¤t->thread; + /* + * Careful: the IOPL bits in regs->flags are undefined under Xen PV + * and changing them has no effect. + */ + unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT; + if (level > 3) return -EINVAL; /* Trying to gain more privileges? */ @@ -106,8 +111,9 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) if (!capable(CAP_SYS_RAWIO)) return -EPERM; } - regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); - t->iopl = level << 12; + regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | + (level << X86_EFLAGS_IOPL_BIT); + t->iopl = level << X86_EFLAGS_IOPL_BIT; set_iopl_mask(t->iopl); return 0; From 8e8a1a17bcc016c59044b06776fc1ddbcc897bb3 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Fri, 1 Apr 2016 14:31:23 -0700 Subject: [PATCH 136/797] x86/mm: TLB_REMOTE_SEND_IPI should count pages commit 18c98243ddf05a1827ad2c359c5ac051101e7ff7 upstream. TLB_REMOTE_SEND_IPI was recently introduced, but it counts bytes instead of pages. In addition, it does not report correctly the case in which flush_tlb_page flushes a page. Fix it to be consistent with other TLB counters. Fixes: 5b74283ab251b9d ("x86, mm: trace when an IPI is about to be sent") Signed-off-by: Nadav Amit Cc: Mel Gorman Cc: Rik van Riel Cc: Dave Hansen Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/tlb.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 8f4cc3dfac32..5fb6adaaa796 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -106,8 +106,6 @@ static void flush_tlb_func(void *info) if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; - if (!f->flush_end) - f->flush_end = f->flush_start + PAGE_SIZE; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { @@ -135,12 +133,20 @@ void native_flush_tlb_others(const struct cpumask *cpumask, unsigned long end) { struct flush_tlb_info info; + + if (end == 0) + end = start + PAGE_SIZE; info.flush_mm = mm; info.flush_start = start; info.flush_end = end; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); - trace_tlb_flush(TLB_REMOTE_SEND_IPI, end - start); + if (end == TLB_FLUSH_ALL) + trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL); + else + trace_tlb_flush(TLB_REMOTE_SEND_IPI, + (end - start) >> PAGE_SHIFT); + if (is_uv_system()) { unsigned int cpu; From f5967a77df538ea60fb83f3957b9d7cfc4646ae2 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Thu, 3 Mar 2016 00:31:29 -0500 Subject: [PATCH 137/797] sg: fix dxferp in from_to case commit 5ecee0a3ee8d74b6950cb41e8989b0c2174568d4 upstream. One of the strange things that the original sg driver did was let the user provide both a data-out buffer (it followed the sg_header+cdb) _and_ specify a reply length greater than zero. What happened was that the user data-out buffer was copied into some kernel buffers and then the mid level was told a read type operation would take place with the data from the device overwriting the same kernel buffers. The user would then read those kernel buffers back into the user space. From what I can tell, the above action was broken by commit fad7f01e61bf ("sg: set dxferp to NULL for READ with the older SG interface") in 2008 and syzkaller found that out recently. Make sure that a user space pointer is passed through when data follows the sg_header structure and command. Fix the abnormal case when a non-zero reply_len is also given. Fixes: fad7f01e61bf737fe8a3740d803f000db57ecac6 Signed-off-by: Douglas Gilbert Reviewed-by: Ewan Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 5e820674432c..ae7d9bdf409c 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -652,7 +652,8 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) else hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE; hp->dxfer_len = mxsize; - if (hp->dxfer_direction == SG_DXFER_TO_DEV) + if ((hp->dxfer_direction == SG_DXFER_TO_DEV) || + (hp->dxfer_direction == SG_DXFER_TO_FROM_DEV)) hp->dxferp = (char __user *)buf + cmd_size; else hp->dxferp = NULL; From 1624297ccc24486aa9e264f04f59743e5563a6b2 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Wed, 3 Feb 2016 15:06:00 -0800 Subject: [PATCH 138/797] aacraid: Fix RRQ overload commit 3f4ce057d51a9c0ed9b01ba693df685d230ffcae upstream. The driver utilizes an array of atomic variables to keep track of IO submissions to each vector. To submit an IO multiple threads iterate through the array to find a vector which has empty slots to send an IO. The reading and updating of the variable is not atomic, causing race conditions when a thread uses a full vector to submit an IO. Fixed by mapping each FIB to a vector, the submission path then uses said vector to submit IO thereby removing the possibly of a race condition.The vector assignment is started from 1 since vector 0 is reserved for the use of AIF management FIBS.If the number of MSIx vectors is 1 (MSI or INTx mode) then all the fibs are allocated to vector 0. Fixes: 495c0217 "aacraid: MSI-x support" Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Johannes Thumshirn Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/aacraid.h | 2 ++ drivers/scsi/aacraid/commsup.c | 28 ++++++++++++++++++++++++++++ drivers/scsi/aacraid/src.c | 30 +++++++----------------------- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 074878b55a0b..d044f3f273be 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -944,6 +944,7 @@ struct fib { */ struct list_head fiblink; void *data; + u32 vector_no; struct hw_fib *hw_fib_va; /* Actual shared object */ dma_addr_t hw_fib_pa; /* physical address of hw_fib*/ }; @@ -2113,6 +2114,7 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor) int aac_acquire_irq(struct aac_dev *dev); void aac_free_irq(struct aac_dev *dev); const char *aac_driverinfo(struct Scsi_Host *); +void aac_fib_vector_assign(struct aac_dev *dev); struct fib *aac_fib_alloc(struct aac_dev *dev); int aac_fib_setup(struct aac_dev *dev); void aac_fib_map_free(struct aac_dev *dev); diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index a1f90fe849c9..e9b4c1119eb9 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -90,6 +90,28 @@ void aac_fib_map_free(struct aac_dev *dev) dev->hw_fib_pa = 0; } +void aac_fib_vector_assign(struct aac_dev *dev) +{ + u32 i = 0; + u32 vector = 1; + struct fib *fibptr = NULL; + + for (i = 0, fibptr = &dev->fibs[i]; + i < (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB); + i++, fibptr++) { + if ((dev->max_msix == 1) || + (i > ((dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB - 1) + - dev->vector_cap))) { + fibptr->vector_no = 0; + } else { + fibptr->vector_no = vector; + vector++; + if (vector == dev->max_msix) + vector = 1; + } + } +} + /** * aac_fib_setup - setup the fibs * @dev: Adapter to set up @@ -151,6 +173,12 @@ int aac_fib_setup(struct aac_dev * dev) hw_fib_pa = hw_fib_pa + dev->max_fib_size + sizeof(struct aac_fib_xporthdr); } + + /* + *Assign vector numbers to fibs + */ + aac_fib_vector_assign(dev); + /* * Add the fib chain to the free list */ diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 2aa34ea8ceb1..bc0203f3d243 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -156,8 +156,8 @@ static irqreturn_t aac_src_intr_message(int irq, void *dev_id) break; if (dev->msi_enabled && dev->max_msix > 1) atomic_dec(&dev->rrq_outstanding[vector_no]); - aac_intr_normal(dev, handle-1, 0, isFastResponse, NULL); dev->host_rrq[index++] = 0; + aac_intr_normal(dev, handle-1, 0, isFastResponse, NULL); if (index == (vector_no + 1) * dev->vector_cap) index = vector_no * dev->vector_cap; dev->host_rrq_idx[vector_no] = index; @@ -452,36 +452,20 @@ static int aac_src_deliver_message(struct fib *fib) #endif u16 hdr_size = le16_to_cpu(fib->hw_fib_va->header.Size); + u16 vector_no; atomic_inc(&q->numpending); if (dev->msi_enabled && fib->hw_fib_va->header.Command != AifRequest && dev->max_msix > 1) { - u_int16_t vector_no, first_choice = 0xffff; - - vector_no = dev->fibs_pushed_no % dev->max_msix; - do { - vector_no += 1; - if (vector_no == dev->max_msix) - vector_no = 1; - if (atomic_read(&dev->rrq_outstanding[vector_no]) < - dev->vector_cap) - break; - if (0xffff == first_choice) - first_choice = vector_no; - else if (vector_no == first_choice) - break; - } while (1); - if (vector_no == first_choice) - vector_no = 0; - atomic_inc(&dev->rrq_outstanding[vector_no]); - if (dev->fibs_pushed_no == 0xffffffff) - dev->fibs_pushed_no = 0; - else - dev->fibs_pushed_no++; + vector_no = fib->vector_no; fib->hw_fib_va->header.Handle += (vector_no << 16); + } else { + vector_no = 0; } + atomic_inc(&dev->rrq_outstanding[vector_no]); + if (dev->comm_interface == AAC_COMM_MESSAGE_TYPE2) { /* Calculate the amount to the fibsize bits */ fibsize = (hdr_size + 127) / 128 - 1; From f3a3019dfc5411743273b5a53f2d95a845bec736 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Wed, 3 Feb 2016 15:06:02 -0800 Subject: [PATCH 139/797] aacraid: Fix memory leak in aac_fib_map_free commit f88fa79a61726ce9434df9b4aede36961f709f17 upstream. aac_fib_map_free() calls pci_free_consistent() without checking that dev->hw_fib_va is not NULL and dev->max_fib_size is not zero.If they are indeed NULL/0, this will result in a hang as pci_free_consistent() will attempt to invalidate cache for the entire 64-bit address space (which would take a very long time). Fixed by adding a check to make sure that dev->hw_fib_va and dev->max_fib_size are not NULL and 0 respectively. Fixes: 9ad5204d6 - "[SCSI]aacraid: incorrect dma mapping mask during blinked recover or user initiated reset" Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Johannes Thumshirn Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/commsup.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index e9b4c1119eb9..4cbf54928640 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -83,9 +83,12 @@ static int fib_map_alloc(struct aac_dev *dev) void aac_fib_map_free(struct aac_dev *dev) { - pci_free_consistent(dev->pdev, - dev->max_fib_size * (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB), - dev->hw_fib_va, dev->hw_fib_pa); + if (dev->hw_fib_va && dev->max_fib_size) { + pci_free_consistent(dev->pdev, + (dev->max_fib_size * + (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB)), + dev->hw_fib_va, dev->hw_fib_pa); + } dev->hw_fib_va = NULL; dev->hw_fib_pa = 0; } From e468298bd4f80e9353b2fe1273ad036a4abaf6e6 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Wed, 3 Feb 2016 15:06:03 -0800 Subject: [PATCH 140/797] aacraid: Set correct msix count for EEH recovery commit ecc479e00db8eb110b200afe1effcb3df20ca7ae upstream. During EEH recovery number of online CPU's might change thereby changing the number of MSIx vectors. Since each fib is allocated to a vector, changes in the number of vectors causes fib to be sent thru invalid vectors.In addition the correct number of MSIx vectors is not updated in the INIT struct sent to the controller, when it is reinitialized. Fixed by reassigning vectors to fibs based on the updated number of MSIx vectors and updating the INIT structure before sending to controller. Fixes: MSI-X vector calculation for suspend/resume Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Shane Seymour Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/linit.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 3b6e5c67e853..aa6eccb8940b 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1404,8 +1404,18 @@ static int aac_acquire_resources(struct aac_dev *dev) aac_adapter_enable_int(dev); - if (!dev->sync_mode) + /*max msix may change after EEH + * Re-assign vectors to fibs + */ + aac_fib_vector_assign(dev); + + if (!dev->sync_mode) { + /* After EEH recovery or suspend resume, max_msix count + * may change, therfore updating in init as well. + */ aac_adapter_start(dev); + dev->init->Sa_MSIXVectors = cpu_to_le32(dev->max_msix); + } return 0; error_iounmap: From 67aa7e6dd927c17103b3c5acb7eb50efb2372dab Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Sat, 5 Mar 2016 17:52:02 -0500 Subject: [PATCH 141/797] sd: Fix discard granularity when LBPRZ=1 commit 6540a65da90c09590897310e31993b1f6e28485a upstream. Commit 397737223c59 ("sd: Make discard granularity match logical block size when LBPRZ=1") accidentally set the granularity to one byte instead of one logical block on devices that provide deterministic zeroes after UNMAP. Signed-off-by: Martin K. Petersen Reported-by: Mike Snitzer Reviewed-by: Ewan Milne Reviewed-by: Bart Van Assche Fixes: 397737223c59e89dca7305feb6528caef8fbef84 Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index bb669d32ccd0..cc84ea7d09cc 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -648,7 +648,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) */ if (sdkp->lbprz) { q->limits.discard_alignment = 0; - q->limits.discard_granularity = 1; + q->limits.discard_granularity = logical_block_size; } else { q->limits.discard_alignment = sdkp->unmap_alignment * logical_block_size; From c1f327046b17e6d9fac9bc61e23bcb1897c2d9b3 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 7 Mar 2016 11:59:44 +0100 Subject: [PATCH 142/797] scsi: storvsc: fix SRB_STATUS_ABORTED handling commit ff06c5ffbcb4ffa542fb80c897be977956fafecc upstream. Commit 3209f9d780d1 ("scsi: storvsc: Fix a bug in the handling of SRB status flags") filtered SRB_STATUS_AUTOSENSE_VALID out effectively making the (SRB_STATUS_ABORTED | SRB_STATUS_AUTOSENSE_VALID) case a dead code. The logic from this branch (e.g. storvsc_device_scan() call) is still required, fix the check. Fixes: 3209f9d780d1 ("scsi: storvsc: Fix a bug in the handling of SRB status flags") Signed-off-by: Vitaly Kuznetsov Acked-by: K. Y. Srinivasan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 3fba42ad9fb8..0f636cc4c809 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -889,8 +889,9 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, do_work = true; process_err_fn = storvsc_remove_lun; break; - case (SRB_STATUS_ABORTED | SRB_STATUS_AUTOSENSE_VALID): - if ((asc == 0x2a) && (ascq == 0x9)) { + case SRB_STATUS_ABORTED: + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID && + (asc == 0x2a) && (ascq == 0x9)) { do_work = true; process_err_fn = storvsc_device_scan; /* From e760edfb7ba763e48b870025ab0b5edb4af85089 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 4 Mar 2016 10:41:49 +0100 Subject: [PATCH 143/797] be2iscsi: set the boot_kset pointer to NULL in case of failure commit 84bd64993f916bcf86270c67686ecf4cea7b8933 upstream. In beiscsi_setup_boot_info(), the boot_kset pointer should be set to NULL in case of failure otherwise an invalid pointer dereference may occur later. Signed-off-by: Maurizio Lombardi Reviewed-by: Johannes Thumshirn Reviewed-by: Jitendra Bhivare Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/be2iscsi/be_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index fe0c5143f8e6..758f76e88704 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -4470,6 +4470,7 @@ static int beiscsi_setup_boot_info(struct beiscsi_hba *phba) scsi_host_put(phba->shost); free_kset: iscsi_boot_destroy_kset(phba->boot_kset); + phba->boot_kset = NULL; return -ENOMEM; } From b9d26f81ae9fa23988a5b503455f300bcef292fb Mon Sep 17 00:00:00 2001 From: Alan Date: Mon, 15 Feb 2016 18:53:15 +0000 Subject: [PATCH 144/797] aic7xxx: Fix queue depth handling commit 5a51a7abca133860a6f4429655a9eda3c4afde32 upstream. We were setting the queue depth correctly, then setting it back to two. If you hit this as a bisection point then please send me an email as it would imply we've been hiding other bugs with this one. Signed-off-by: Alan Cox Reviewed-by: Hannes Reinicke Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aic7xxx/aic7xxx_osm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index b846a4683562..fc6a83188c1e 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -1336,6 +1336,7 @@ ahc_platform_set_tags(struct ahc_softc *ahc, struct scsi_device *sdev, case AHC_DEV_Q_TAGGED: scsi_change_queue_depth(sdev, dev->openings + dev->active); + break; default: /* * We allow the OS to queue 2 untagged transactions to From 83250e67a9d65a662c5fdec6075f3c2dee0e79a0 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Wed, 6 Jan 2016 16:03:41 -0700 Subject: [PATCH 145/797] libnvdimm: Fix security issue with DSM IOCTL. commit 07accfa9d1a8bac8262f6d24a94a54d2d1f35149 upstream. Code attempts to prevent certain IOCTL DSM from being called when device is opened read only. This security feature can be trivially overcome by changing the size portion of the ioctl_command which isn't used. Check only the _IOC_NR (i.e. the command). Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/bus.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 7e2c43f701bc..496b9b662dc6 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -513,10 +513,10 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, /* fail write commands (when read-only) */ if (read_only) - switch (ioctl_cmd) { - case ND_IOCTL_VENDOR: - case ND_IOCTL_SET_CONFIG_DATA: - case ND_IOCTL_ARS_START: + switch (cmd) { + case ND_CMD_VENDOR: + case ND_CMD_SET_CONFIG_DATA: + case ND_CMD_ARS_START: dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", nvdimm ? nvdimm_cmd_name(cmd) : nvdimm_bus_cmd_name(cmd)); From 15c3af026b6e66ca2d9566d862af7e2fd7943a40 Mon Sep 17 00:00:00 2001 From: DingXiang Date: Tue, 2 Feb 2016 12:29:18 +0800 Subject: [PATCH 146/797] dm snapshot: disallow the COW and origin devices from being identical commit 4df2bf466a9c9c92f40d27c4aa9120f4e8227bfc upstream. Otherwise loading a "snapshot" table using the same device for the origin and COW devices, e.g.: echo "0 20971520 snapshot 253:3 253:3 P 8" | dmsetup create snap will trigger: BUG: unable to handle kernel NULL pointer dereference at 0000000000000098 [ 1958.979934] IP: [] dm_exception_store_set_chunk_size+0x7a/0x110 [dm_snapshot] [ 1958.989655] PGD 0 [ 1958.991903] Oops: 0000 [#1] SMP ... [ 1959.059647] CPU: 9 PID: 3556 Comm: dmsetup Tainted: G IO 4.5.0-rc5.snitm+ #150 ... [ 1959.083517] task: ffff8800b9660c80 ti: ffff88032a954000 task.ti: ffff88032a954000 [ 1959.091865] RIP: 0010:[] [] dm_exception_store_set_chunk_size+0x7a/0x110 [dm_snapshot] [ 1959.104295] RSP: 0018:ffff88032a957b30 EFLAGS: 00010246 [ 1959.110219] RAX: 0000000000000000 RBX: 0000000000000008 RCX: 0000000000000001 [ 1959.118180] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff880329334a00 [ 1959.126141] RBP: ffff88032a957b50 R08: 0000000000000000 R09: 0000000000000001 [ 1959.134102] R10: 000000000000000a R11: f000000000000000 R12: ffff880330884d80 [ 1959.142061] R13: 0000000000000008 R14: ffffc90001c13088 R15: ffff880330884d80 [ 1959.150021] FS: 00007f8926ba3840(0000) GS:ffff880333440000(0000) knlGS:0000000000000000 [ 1959.159047] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1959.165456] CR2: 0000000000000098 CR3: 000000032f48b000 CR4: 00000000000006e0 [ 1959.173415] Stack: [ 1959.175656] ffffc90001c13040 ffff880329334a00 ffff880330884ed0 ffff88032a957bdc [ 1959.183946] ffff88032a957bb8 ffffffffa040f225 ffff880329334a30 ffff880300000000 [ 1959.192233] ffffffffa04133e0 ffff880329334b30 0000000830884d58 00000000569c58cf [ 1959.200521] Call Trace: [ 1959.203248] [] dm_exception_store_create+0x1d5/0x240 [dm_snapshot] [ 1959.211986] [] snapshot_ctr+0x140/0x630 [dm_snapshot] [ 1959.219469] [] ? dm_split_args+0x64/0x150 [dm_mod] [ 1959.226656] [] dm_table_add_target+0x177/0x440 [dm_mod] [ 1959.234328] [] table_load+0x143/0x370 [dm_mod] [ 1959.241129] [] ? retrieve_status+0x1b0/0x1b0 [dm_mod] [ 1959.248607] [] ctl_ioctl+0x255/0x4d0 [dm_mod] [ 1959.255307] [] ? memzero_explicit+0x12/0x20 [ 1959.261816] [] dm_ctl_ioctl+0x13/0x20 [dm_mod] [ 1959.268615] [] do_vfs_ioctl+0xa6/0x5c0 [ 1959.274637] [] ? __audit_syscall_entry+0xaf/0x100 [ 1959.281726] [] ? do_audit_syscall_entry+0x66/0x70 [ 1959.288814] [] SyS_ioctl+0x79/0x90 [ 1959.294450] [] entry_SYSCALL_64_fastpath+0x12/0x71 ... [ 1959.323277] RIP [] dm_exception_store_set_chunk_size+0x7a/0x110 [dm_snapshot] [ 1959.333090] RSP [ 1959.336978] CR2: 0000000000000098 [ 1959.344121] ---[ end trace b049991ccad1169e ]--- Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1195899 Signed-off-by: Ding Xiang Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-snap.c | 9 +++++++++ drivers/md/dm-table.c | 36 +++++++++++++++++++++++------------ include/linux/device-mapper.h | 2 ++ 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 61f184ad081c..e108deebbaaa 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1106,6 +1106,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) int i; int r = -EINVAL; char *origin_path, *cow_path; + dev_t origin_dev, cow_dev; unsigned args_used, num_flush_bios = 1; fmode_t origin_mode = FMODE_READ; @@ -1136,11 +1137,19 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->error = "Cannot get origin device"; goto bad_origin; } + origin_dev = s->origin->bdev->bd_dev; cow_path = argv[0]; argv++; argc--; + cow_dev = dm_get_dev_t(cow_path); + if (cow_dev && cow_dev == origin_dev) { + ti->error = "COW device cannot be the same as origin device"; + r = -EINVAL; + goto bad_cow; + } + r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow); if (r) { ti->error = "Cannot get COW device"; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 061152a43730..cb5d0daf53bb 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -364,6 +364,26 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, return 0; } +/* + * Convert the path to a device + */ +dev_t dm_get_dev_t(const char *path) +{ + dev_t uninitialized_var(dev); + struct block_device *bdev; + + bdev = lookup_bdev(path); + if (IS_ERR(bdev)) + dev = name_to_dev_t(path); + else { + dev = bdev->bd_dev; + bdput(bdev); + } + + return dev; +} +EXPORT_SYMBOL_GPL(dm_get_dev_t); + /* * Add a device to the list, or just increment the usage count if * it's already present. @@ -372,23 +392,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, struct dm_dev **result) { int r; - dev_t uninitialized_var(dev); + dev_t dev; struct dm_dev_internal *dd; struct dm_table *t = ti->table; - struct block_device *bdev; BUG_ON(!t); - /* convert the path to a device */ - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) { - dev = name_to_dev_t(path); - if (!dev) - return -ENODEV; - } else { - dev = bdev->bd_dev; - bdput(bdev); - } + dev = dm_get_dev_t(path); + if (!dev) + return -ENODEV; dd = find_device(&t->devices, dev); if (!dd) { diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ec1c61c87d89..899ab9f8549e 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -124,6 +124,8 @@ struct dm_dev { char name[16]; }; +dev_t dm_get_dev_t(const char *path); + /* * Constructors should call these functions to ensure destination devices * are opened/closed correctly. From 5504a47088034573d0839120751b1aec46204aab Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 5 Feb 2016 08:49:01 -0500 Subject: [PATCH 147/797] dm: fix excessive dm-mq context switching commit 6acfe68bac7e6f16dc312157b1fa6e2368985013 upstream. Request-based DM's blk-mq support (dm-mq) was reported to be 50% slower than if an underlying null_blk device were used directly. One of the reasons for this drop in performance is that blk_insert_clone_request() was calling blk_mq_insert_request() with @async=true. This forced the use of kblockd_schedule_delayed_work_on() to run the blk-mq hw queues which ushered in ping-ponging between process context (fio in this case) and kblockd's kworker to submit the cloned request. The ftrace function_graph tracer showed: kworker-2013 => fio-12190 fio-12190 => kworker-2013 ... kworker-2013 => fio-12190 fio-12190 => kworker-2013 ... Fixing blk_insert_clone_request()'s blk_mq_insert_request() call to _not_ use kblockd to submit the cloned requests isn't enough to eliminate the observed context switches. In addition to this dm-mq specific blk-core fix, there are 2 DM core fixes to dm-mq that (when paired with the blk-core fix) completely eliminate the observed context switching: 1) don't blk_mq_run_hw_queues in blk-mq request completion Motivated by desire to reduce overhead of dm-mq, punting to kblockd just increases context switches. In my testing against a really fast null_blk device there was no benefit to running blk_mq_run_hw_queues() on completion (and no other blk-mq driver does this). So hopefully this change doesn't induce the need for yet another revert like commit 621739b00e16ca2d ! 2) use blk_mq_complete_request() in dm_complete_request() blk_complete_request() doesn't offer the traditional q->mq_ops vs .request_fn branching pattern that other historic block interfaces do (e.g. blk_get_request). Using blk_mq_complete_request() for blk-mq requests is important for performance. It should be noted that, like blk_complete_request(), blk_mq_complete_request() doesn't natively handle partial completions -- but the request-based DM-multipath target does provide the required partial completion support by dm.c:end_clone_bio() triggering requeueing of the request via dm-mpath.c:multipath_end_io()'s return of DM_ENDIO_REQUEUE. dm-mq fix #2 is _much_ more important than #1 for eliminating the context switches. Before: cpu : usr=15.10%, sys=59.39%, ctx=7905181, majf=0, minf=475 After: cpu : usr=20.60%, sys=79.35%, ctx=2008, majf=0, minf=472 With these changes multithreaded async read IOPs improved from ~950K to ~1350K for this dm-mq stacked on null_blk test-case. The raw read IOPs of the underlying null_blk device for the same workload is ~1950K. Fixes: 7fb4898e0 ("block: add blk-mq support to blk_insert_cloned_request()") Fixes: bfebd1cdb ("dm: add full blk-mq support to request-based DM") Reported-by: Sagi Grimberg Signed-off-by: Mike Snitzer Acked-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 2 +- drivers/md/dm.c | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 33e2f62d5062..f8e64cac981a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2189,7 +2189,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) if (q->mq_ops) { if (blk_queue_io_stat(q)) blk_account_io_start(rq, true); - blk_mq_insert_request(rq, false, true, true); + blk_mq_insert_request(rq, false, true, false); return 0; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index dd834927bc66..887c6a11885b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1109,12 +1109,8 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) * back into ->request_fn() could deadlock attempting to grab the * queue lock again. */ - if (run_queue) { - if (md->queue->mq_ops) - blk_mq_run_hw_queues(md->queue, true); - else - blk_run_queue_async(md->queue); - } + if (!md->queue->mq_ops && run_queue) + blk_run_queue_async(md->queue); /* * dm_put() must be at the end of this function. See the comment above @@ -1336,7 +1332,10 @@ static void dm_complete_request(struct request *rq, int error) struct dm_rq_target_io *tio = tio_from_request(rq); tio->error = error; - blk_complete_request(rq); + if (!rq->q->mq_ops) + blk_complete_request(rq); + else + blk_mq_complete_request(rq, error); } /* From 291e2b3900da45dcc9c58e264f960fcb822bd07a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 1 Mar 2016 10:58:44 +0000 Subject: [PATCH 148/797] dm thin metadata: don't issue prefetches if a transaction abort has failed commit 2eae9e4489b4cf83213fa3bd508b5afca3f01780 upstream. If a transaction abort has failed then we can no longer use the metadata device. Typically this happens if the superblock is unreadable. This fix addresses a crash seen during metadata device failure testing. Fixes: 8a01a6af75 ("dm thin: prefetch missing metadata pages") Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index c219a053c7f6..911ada643364 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1943,5 +1943,8 @@ bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd) void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd) { - dm_tm_issue_prefetches(pmd->tm); + down_read(&pmd->root_lock); + if (!pmd->fail_io) + dm_tm_issue_prefetches(pmd->tm); + up_read(&pmd->root_lock); } From 7f47aea487df2dc281c7f64ff7430aff3b260af0 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 10 Mar 2016 16:20:58 +0000 Subject: [PATCH 149/797] dm cache: make sure every metadata function checks fail_io commit d14fcf3dd79c0b8a8d0ba469c44a6b04f3a1403b upstream. Otherwise operations may be attempted that will only ever go on to crash (since the metadata device is either missing or unreliable if 'fail_io' is set). Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 98 ++++++++++++++++++++-------------- drivers/md/dm-cache-metadata.h | 4 +- drivers/md/dm-cache-target.c | 12 ++++- 3 files changed, 71 insertions(+), 43 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index f6543f3a970f..27f2ef300f8b 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -867,19 +867,40 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, return 0; } -#define WRITE_LOCK(cmd) \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) \ +#define WRITE_LOCK(cmd) \ + down_write(&cmd->root_lock); \ + if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ + up_write(&cmd->root_lock); \ return -EINVAL; \ - down_write(&cmd->root_lock) + } #define WRITE_LOCK_VOID(cmd) \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) \ + down_write(&cmd->root_lock); \ + if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ + up_write(&cmd->root_lock); \ return; \ - down_write(&cmd->root_lock) + } #define WRITE_UNLOCK(cmd) \ up_write(&cmd->root_lock) +#define READ_LOCK(cmd) \ + down_read(&cmd->root_lock); \ + if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ + up_read(&cmd->root_lock); \ + return -EINVAL; \ + } + +#define READ_LOCK_VOID(cmd) \ + down_read(&cmd->root_lock); \ + if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ + up_read(&cmd->root_lock); \ + return; \ + } + +#define READ_UNLOCK(cmd) \ + up_read(&cmd->root_lock) + int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) { int r; @@ -1015,22 +1036,20 @@ int dm_cache_load_discards(struct dm_cache_metadata *cmd, { int r; - down_read(&cmd->root_lock); + READ_LOCK(cmd); r = __load_discards(cmd, fn, context); - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); return r; } -dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd) +int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result) { - dm_cblock_t r; + READ_LOCK(cmd); + *result = cmd->cache_blocks; + READ_UNLOCK(cmd); - down_read(&cmd->root_lock); - r = cmd->cache_blocks; - up_read(&cmd->root_lock); - - return r; + return 0; } static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock) @@ -1188,9 +1207,9 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd, { int r; - down_read(&cmd->root_lock); + READ_LOCK(cmd); r = __load_mappings(cmd, policy, fn, context); - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); return r; } @@ -1215,18 +1234,18 @@ static int __dump_mappings(struct dm_cache_metadata *cmd) void dm_cache_dump(struct dm_cache_metadata *cmd) { - down_read(&cmd->root_lock); + READ_LOCK_VOID(cmd); __dump_mappings(cmd); - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); } int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd) { int r; - down_read(&cmd->root_lock); + READ_LOCK(cmd); r = cmd->changed; - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); return r; } @@ -1276,9 +1295,9 @@ int dm_cache_set_dirty(struct dm_cache_metadata *cmd, void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd, struct dm_cache_statistics *stats) { - down_read(&cmd->root_lock); + READ_LOCK_VOID(cmd); *stats = cmd->stats; - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); } void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd, @@ -1312,9 +1331,9 @@ int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd, { int r = -EINVAL; - down_read(&cmd->root_lock); + READ_LOCK(cmd); r = dm_sm_get_nr_free(cmd->metadata_sm, result); - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); return r; } @@ -1324,9 +1343,9 @@ int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, { int r = -EINVAL; - down_read(&cmd->root_lock); + READ_LOCK(cmd); r = dm_sm_get_nr_blocks(cmd->metadata_sm, result); - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); return r; } @@ -1417,7 +1436,13 @@ int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy * int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result) { - return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result); + int r; + + READ_LOCK(cmd); + r = blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result); + READ_UNLOCK(cmd); + + return r; } void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd) @@ -1440,10 +1465,7 @@ int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd) struct dm_block *sblock; struct cache_disk_superblock *disk_super; - /* - * We ignore fail_io for this function. - */ - down_write(&cmd->root_lock); + WRITE_LOCK(cmd); set_bit(NEEDS_CHECK, &cmd->flags); r = superblock_lock(cmd, &sblock); @@ -1458,19 +1480,17 @@ int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd) dm_bm_unlock(sblock); out: - up_write(&cmd->root_lock); + WRITE_UNLOCK(cmd); return r; } -bool dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd) +int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result) { - bool needs_check; + READ_LOCK(cmd); + *result = !!test_bit(NEEDS_CHECK, &cmd->flags); + READ_UNLOCK(cmd); - down_read(&cmd->root_lock); - needs_check = !!test_bit(NEEDS_CHECK, &cmd->flags); - up_read(&cmd->root_lock); - - return needs_check; + return 0; } int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index 2ffee21f318d..8528744195e5 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h @@ -66,7 +66,7 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd); * origin blocks to map to. */ int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size); -dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd); +int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result); int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, sector_t discard_block_size, @@ -137,7 +137,7 @@ int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy * */ int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result); -bool dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd); +int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result); int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd); void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd); void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 2fd4c8296144..515f83e7d9ab 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -987,9 +987,14 @@ static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mod static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode) { - bool needs_check = dm_cache_metadata_needs_check(cache->cmd); + bool needs_check; enum cache_metadata_mode old_mode = get_cache_mode(cache); + if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) { + DMERR("unable to read needs_check flag, setting failure mode"); + new_mode = CM_FAIL; + } + if (new_mode == CM_WRITE && needs_check) { DMERR("%s: unable to switch cache to write mode until repaired.", cache_device_name(cache)); @@ -3513,6 +3518,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, char buf[BDEVNAME_SIZE]; struct cache *cache = ti->private; dm_cblock_t residency; + bool needs_check; switch (type) { case STATUSTYPE_INFO: @@ -3586,7 +3592,9 @@ static void cache_status(struct dm_target *ti, status_type_t type, else DMEMIT("rw "); - if (dm_cache_metadata_needs_check(cache->cmd)) + r = dm_cache_metadata_needs_check(cache->cmd, &needs_check); + + if (r || needs_check) DMEMIT("needs_check "); else DMEMIT("- "); From 8907d8a6fd3f21992283efd67002aea719396f2b Mon Sep 17 00:00:00 2001 From: "Bryn M. Reeves" Date: Mon, 14 Mar 2016 17:04:34 -0400 Subject: [PATCH 150/797] dm: fix rq_end_stats() NULL pointer in dm_requeue_original_request() commit 98dbc9c6c61698792e3a66f32f3bf066201d42d7 upstream. An "old" (.request_fn) DM 'struct request' stores a pointer to the associated 'struct dm_rq_target_io' in rq->special. dm_requeue_original_request(), previously named dm_requeue_unmapped_original_request(), called dm_unprep_request() to reset rq->special to NULL. But rq_end_stats() would go on to hit a NULL pointer deference because its call to tio_from_request() returned NULL. Fix this by calling rq_end_stats() _before_ dm_unprep_request() Signed-off-by: Bryn M. Reeves Signed-off-by: Mike Snitzer Fixes: e262f34741 ("dm stats: add support for request-based DM devices") Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 887c6a11885b..c338aebb4ccd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1210,9 +1210,9 @@ static void dm_requeue_original_request(struct mapped_device *md, { int rw = rq_data_dir(rq); + rq_end_stats(md, rq); dm_unprep_request(rq); - rq_end_stats(md, rq); if (!rq->q->mq_ops) old_requeue_request(rq); else { From 951822beba268f76af9463c69095284df20311f6 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 10 Feb 2016 11:33:18 +0100 Subject: [PATCH 151/797] usb: retry reset if a device times out commit 264904ccc33c604d4b3141bbd33808152dfac45b upstream. Some devices I got show an inability to operate right after power on if they are already connected. They are beyond recovery if the descriptors are requested multiple times. So in case of a timeout we rather bail early and reset again. But it must be done only on the first loop lest we get into a reset/time out spiral that can be overcome with a retry. This patch is a rework of a patch that fell through the cracks. http://www.spinics.net/lists/linux-usb/msg103263.html Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1560f3f3e756..a44db86018c7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4426,7 +4426,13 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, r = -EPROTO; break; } - if (r == 0) + /* + * Some devices time out if they are powered on + * when already connected. They need a second + * reset. But only on the first attempt, + * lest we get into a time out/reset loop + */ + if (r == 0 || (r == -ETIMEDOUT && j == 0)) break; } udev->descriptor.bMaxPacketSize0 = From aa563cf3bc7ef570f449501ff7ab12f3b7080ff0 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 17 Feb 2016 11:52:43 +0100 Subject: [PATCH 152/797] usb: hub: fix a typo in hub_port_init() leading to wrong logic commit 0d5ce778c43bf888328231bcdce05d5c860655aa upstream. A typo of j for i led to a logic bug. To rule out future confusion, the variable names are made meaningful. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a44db86018c7..2a274884c7ea 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4277,7 +4277,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, { struct usb_device *hdev = hub->hdev; struct usb_hcd *hcd = bus_to_hcd(hdev->bus); - int i, j, retval; + int retries, operations, retval, i; unsigned delay = HUB_SHORT_RESET_TIME; enum usb_device_speed oldspeed = udev->speed; const char *speed; @@ -4379,7 +4379,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, * first 8 bytes of the device descriptor to get the ep0 maxpacket * value. */ - for (i = 0; i < GET_DESCRIPTOR_TRIES; (++i, msleep(100))) { + for (retries = 0; retries < GET_DESCRIPTOR_TRIES; (++retries, msleep(100))) { bool did_new_scheme = false; if (use_new_scheme(udev, retry_counter)) { @@ -4406,7 +4406,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, * 255 is for WUSB devices, we actually need to use * 512 (WUSB1.0[4.8.1]). */ - for (j = 0; j < 3; ++j) { + for (operations = 0; operations < 3; ++operations) { buf->bMaxPacketSize0 = 0; r = usb_control_msg(udev, usb_rcvaddr0pipe(), USB_REQ_GET_DESCRIPTOR, USB_DIR_IN, @@ -4432,7 +4432,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, * reset. But only on the first attempt, * lest we get into a time out/reset loop */ - if (r == 0 || (r == -ETIMEDOUT && j == 0)) + if (r == 0 || (r == -ETIMEDOUT && retries == 0)) break; } udev->descriptor.bMaxPacketSize0 = @@ -4464,7 +4464,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, * authorization will assign the final address. */ if (udev->wusb == 0) { - for (j = 0; j < SET_ADDRESS_TRIES; ++j) { + for (operations = 0; operations < SET_ADDRESS_TRIES; ++operations) { retval = hub_set_address(udev, devnum); if (retval >= 0) break; From a85722c650265714c592d7ef99d277106dbb17bb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Mar 2016 20:11:52 +0100 Subject: [PATCH 153/797] USB: uas: Reduce can_queue to MAX_CMNDS commit 55ff8cfbc4e12a7d2187df523938cc671fbebdd1 upstream. The uas driver can never queue more then MAX_CMNDS (- 1) tags and tags are shared between luns, so there is no need to claim that we can_queue some random large number. Not claiming that we can_queue 65536 commands, fixes the uas driver failing to initialize while allocating the tag map with a "Page allocation failure (order 7)" error on systems which have been running for a while and thus have fragmented memory. Reported-and-tested-by: Yves-Alexis Perez Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 5c66d3f7a6d0..ce0cd6e20d4f 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -812,7 +812,7 @@ static struct scsi_host_template uas_host_template = { .slave_configure = uas_slave_configure, .eh_abort_handler = uas_eh_abort_handler, .eh_bus_reset_handler = uas_eh_bus_reset_handler, - .can_queue = 65536, /* Is there a limit on the _host_ ? */ + .can_queue = MAX_CMNDS, .this_id = -1, .sg_tablesize = SG_NONE, .skip_settle_delay = 1, From 1ea680abf7640c777396909102bc22915107cb5b Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 15 Mar 2016 10:14:04 +0100 Subject: [PATCH 154/797] USB: cdc-acm: more sanity checking commit 8835ba4a39cf53f705417b3b3a94eb067673f2c9 upstream. An attack has become available which pretends to be a quirky device circumventing normal sanity checks and crashes the kernel by an insufficient number of interfaces. This patch adds a check to the code path for quirky devices. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index fa4e23930614..d37fdcc3143c 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1114,6 +1114,9 @@ static int acm_probe(struct usb_interface *intf, if (quirks == NO_UNION_NORMAL) { data_interface = usb_ifnum_to_if(usb_dev, 1); control_interface = usb_ifnum_to_if(usb_dev, 0); + /* we would crash */ + if (!data_interface || !control_interface) + return -ENODEV; goto skip_normal_probe; } From 850631bedd3cb7f79cb32a456c9ad3a5f6e1d1f3 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Mon, 14 Mar 2016 10:42:38 -0400 Subject: [PATCH 155/797] USB: iowarrior: fix oops with malicious USB descriptors commit 4ec0ef3a82125efc36173062a50624550a900ae0 upstream. The iowarrior driver expects at least one valid endpoint. If given malicious descriptors that specify 0 for the number of endpoints, it will crash in the probe function. Ensure there is at least one endpoint on the interface before using it. The full report of this issue can be found here: http://seclists.org/bugtraq/2016/Mar/87 Reported-by: Ralf Spenneberg Signed-off-by: Josh Boyer Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index c6bfd13f6c92..1950e87b4219 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -787,6 +787,12 @@ static int iowarrior_probe(struct usb_interface *interface, iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); + if (iface_desc->desc.bNumEndpoints < 1) { + dev_err(&interface->dev, "Invalid number of endpoints\n"); + retval = -EINVAL; + goto error; + } + /* set up the endpoint information */ for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; From b6c6426252e2653407811f46c883661955b9f5fa Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 16 Mar 2016 13:26:17 +0100 Subject: [PATCH 156/797] USB: usb_driver_claim_interface: add sanity checking commit 0b818e3956fc1ad976bee791eadcbb3b5fec5bfd upstream. Attacks that trick drivers into passing a NULL pointer to usb_driver_claim_interface() using forged descriptors are known. This thwarts them by sanity checking. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 56593a9a8726..2057d91d8336 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -502,11 +502,15 @@ static int usb_unbind_interface(struct device *dev) int usb_driver_claim_interface(struct usb_driver *driver, struct usb_interface *iface, void *priv) { - struct device *dev = &iface->dev; + struct device *dev; struct usb_device *udev; int retval = 0; int lpm_disable_error; + if (!iface) + return -ENODEV; + + dev = &iface->dev; if (dev->driver) return -EBUSY; From 9deac9454b7a5643a09829f4731276cea6697b72 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 31 Mar 2016 12:04:24 -0400 Subject: [PATCH 157/797] USB: mct_u232: add sanity checking in probe commit 4e9a0b05257f29cf4b75f3209243ed71614d062e upstream. An attack using the lack of sanity checking in probe is known. This patch checks for the existence of a second port. CVE-2016-3136 Signed-off-by: Oliver Neukum [johan: add error message ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mct_u232.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index fd707d6a10e2..89726f702202 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -376,14 +376,21 @@ static void mct_u232_msr_to_state(struct usb_serial_port *port, static int mct_u232_port_probe(struct usb_serial_port *port) { + struct usb_serial *serial = port->serial; struct mct_u232_private *priv; + /* check first to simplify error handling */ + if (!serial->port[1] || !serial->port[1]->interrupt_in_urb) { + dev_err(&port->dev, "expected endpoint missing\n"); + return -ENODEV; + } + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; /* Use second interrupt-in endpoint for reading. */ - priv->read_urb = port->serial->port[1]->interrupt_in_urb; + priv->read_urb = serial->port[1]->interrupt_in_urb; priv->read_urb->context = port; spin_lock_init(&priv->lock); From 4f6ad5b0d28c84030693fe21b308c0b711fa66f6 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 31 Mar 2016 12:04:26 -0400 Subject: [PATCH 158/797] USB: digi_acceleport: do sanity checking for the number of ports commit 5a07975ad0a36708c6b0a5b9fea1ff811d0b0c1f upstream. The driver can be crashed with devices that expose crafted descriptors with too few endpoints. See: http://seclists.org/bugtraq/2016/Mar/61 Signed-off-by: Oliver Neukum [johan: fix OOB endpoint check and add error messages ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/digi_acceleport.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 12b0e67473ba..3df7b7ec178e 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1251,8 +1251,27 @@ static int digi_port_init(struct usb_serial_port *port, unsigned port_num) static int digi_startup(struct usb_serial *serial) { + struct device *dev = &serial->interface->dev; struct digi_serial *serial_priv; int ret; + int i; + + /* check whether the device has the expected number of endpoints */ + if (serial->num_port_pointers < serial->type->num_ports + 1) { + dev_err(dev, "OOB endpoints missing\n"); + return -ENODEV; + } + + for (i = 0; i < serial->type->num_ports + 1 ; i++) { + if (!serial->port[i]->read_urb) { + dev_err(dev, "bulk-in endpoint missing\n"); + return -ENODEV; + } + if (!serial->port[i]->write_urb) { + dev_err(dev, "bulk-out endpoint missing\n"); + return -ENODEV; + } + } serial_priv = kzalloc(sizeof(*serial_priv), GFP_KERNEL); if (!serial_priv) From ca76906a7753052b00e491ba017393f9071b0406 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 31 Mar 2016 12:04:25 -0400 Subject: [PATCH 159/797] USB: cypress_m8: add endpoint sanity check commit c55aee1bf0e6b6feec8b2927b43f7a09a6d5f754 upstream. An attack using missing endpoints exists. CVE-2016-3137 Signed-off-by: Oliver Neukum Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cypress_m8.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index 01bf53392819..244acb1299a9 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -447,6 +447,11 @@ static int cypress_generic_port_probe(struct usb_serial_port *port) struct usb_serial *serial = port->serial; struct cypress_private *priv; + if (!port->interrupt_out_urb || !port->interrupt_in_urb) { + dev_err(&port->dev, "required endpoint is missing\n"); + return -ENODEV; + } + priv = kzalloc(sizeof(struct cypress_private), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -606,12 +611,6 @@ static int cypress_open(struct tty_struct *tty, struct usb_serial_port *port) cypress_set_termios(tty, port, &priv->tmp_termios); /* setup the port and start reading from the device */ - if (!port->interrupt_in_urb) { - dev_err(&port->dev, "%s - interrupt_in_urb is empty!\n", - __func__); - return -1; - } - usb_fill_int_urb(port->interrupt_in_urb, serial->dev, usb_rcvintpipe(serial->dev, port->interrupt_in_endpointAddress), port->interrupt_in_urb->transfer_buffer, From df3dddcc643832fc4fd133b8956be64b37841b67 Mon Sep 17 00:00:00 2001 From: Martyn Welch Date: Tue, 29 Mar 2016 17:47:29 +0100 Subject: [PATCH 160/797] USB: serial: cp210x: Adding GE Healthcare Device ID commit cddc9434e3dcc37a85c4412fb8e277d3a582e456 upstream. The CP2105 is used in the GE Healthcare Remote Alarm Box, with the Manufacturer ID of 0x1901 and Product ID of 0x0194. Signed-off-by: Martyn Welch Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 7a76fe4c2f9e..bdc0f2f24f19 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -164,6 +164,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ + { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ From b3e0983cb9fc0b1e876fd51f186f8fa887089261 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 10 Mar 2016 09:48:52 -0500 Subject: [PATCH 161/797] USB: serial: ftdi_sio: Add support for ICP DAS I-756xU devices commit ea6db90e750328068837bed34cb1302b7a177339 upstream. A Fedora user reports that the ftdi_sio driver works properly for the ICP DAS I-7561U device. Further, the user manual for these devices instructs users to load the driver and add the ids using the sysfs interface. Add support for these in the driver directly so that the devices work out of the box instead of needing manual configuration. Reported-by: Signed-off-by: Josh Boyer Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 4 ++++ drivers/usb/serial/ftdi_sio_ids.h | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 8c660ae401d8..b61f12160d37 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1004,6 +1004,10 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) }, + /* ICP DAS I-756xU devices */ + { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) }, + { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, + { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index a84df2513994..c5d6c1e73e8e 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -871,6 +871,14 @@ #define NOVITUS_VID 0x1a28 #define NOVITUS_BONO_E_PID 0x6010 +/* + * ICPDAS I-756*U devices + */ +#define ICPDAS_VID 0x1b5c +#define ICPDAS_I7560U_PID 0x0103 +#define ICPDAS_I7561U_PID 0x0104 +#define ICPDAS_I7563U_PID 0x0105 + /* * RT Systems programming cables for various ham radios */ From 49102971470cc282f50f674b4aa5c4adabeb281d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 7 Apr 2016 12:09:17 +0200 Subject: [PATCH 162/797] USB: option: add "D-Link DWM-221 B1" device id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d48d5691ebf88a15d95ba96486917ffc79256536 upstream. Thomas reports: "Windows: 00 diagnostics 01 modem 02 at-port 03 nmea 04 nic Linux: T: Bus=02 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2001 ProdID=7e19 Rev=02.32 S: Manufacturer=Mobile Connect S: Product=Mobile Connect S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage" Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 348e19834b83..c6f497f16526 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1818,6 +1818,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, + { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff), /* D-Link DWM-221 B1 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ From 5cede226daa83e26c4ef21773a75f535927b935d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Jan 2016 08:53:55 -0200 Subject: [PATCH 163/797] pwc: Add USB id for Philips Spc880nc webcam commit 7445e45d19a09e5269dc85f17f9635be29d2f76c upstream. SPC 880NC PC camera discussions: http://www.pclinuxos.com/forum/index.php/topic,135688.0.html Reported-by: Kikim Signed-off-by: Hans de Goede Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/pwc/pwc-if.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c index b79c36fd8cd2..58f23bcfe94e 100644 --- a/drivers/media/usb/pwc/pwc-if.c +++ b/drivers/media/usb/pwc/pwc-if.c @@ -91,6 +91,7 @@ static const struct usb_device_id pwc_device_table [] = { { USB_DEVICE(0x0471, 0x0312) }, { USB_DEVICE(0x0471, 0x0313) }, /* the 'new' 720K */ { USB_DEVICE(0x0471, 0x0329) }, /* Philips SPC 900NC PC Camera */ + { USB_DEVICE(0x0471, 0x032C) }, /* Philips SPC 880NC PC Camera */ { USB_DEVICE(0x069A, 0x0001) }, /* Askey */ { USB_DEVICE(0x046D, 0x08B0) }, /* Logitech QuickCam Pro 3000 */ { USB_DEVICE(0x046D, 0x08B1) }, /* Logitech QuickCam Notebook Pro */ @@ -811,6 +812,11 @@ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id name = "Philips SPC 900NC webcam"; type_id = 740; break; + case 0x032C: + PWC_INFO("Philips SPC 880NC USB webcam detected.\n"); + name = "Philips SPC 880NC webcam"; + type_id = 740; + break; default: return -ENODEV; break; From fbd40d7beef0b17624bc1f838f4d44dfa4b0326b Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Mon, 14 Mar 2016 09:33:40 -0700 Subject: [PATCH 164/797] Input: powermate - fix oops with malicious USB descriptors commit 9c6ba456711687b794dcf285856fc14e2c76074f upstream. The powermate driver expects at least one valid USB endpoint in its probe function. If given malicious descriptors that specify 0 for the number of endpoints, it will crash. Validate the number of endpoints on the interface before using them. The full report for this issue can be found here: http://seclists.org/bugtraq/2016/Mar/85 Reported-by: Ralf Spenneberg Signed-off-by: Josh Boyer Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/powermate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/misc/powermate.c b/drivers/input/misc/powermate.c index 63b539d3daba..84909a12ff36 100644 --- a/drivers/input/misc/powermate.c +++ b/drivers/input/misc/powermate.c @@ -307,6 +307,9 @@ static int powermate_probe(struct usb_interface *intf, const struct usb_device_i int error = -ENOMEM; interface = intf->cur_altsetting; + if (interface->desc.bNumEndpoints < 1) + return -EINVAL; + endpoint = &interface->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) return -EIO; From 57f6ad5f1580a5a06c573fb15ed6dcf701e037f6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Mar 2016 12:09:10 +0100 Subject: [PATCH 165/797] ALSA: usb-audio: Fix NULL dereference in create_fixed_stream_quirk() commit 0f886ca12765d20124bd06291c82951fd49a33be upstream. create_fixed_stream_quirk() may cause a NULL-pointer dereference by accessing the non-existing endpoint when a USB device with a malformed USB descriptor is used. This patch avoids it simply by adding a sanity check of bNumEndpoints before the accesses. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=971125 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index c458d60d5030..f2e4eebdf76d 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -180,6 +180,12 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, } alts = &iface->altsetting[fp->altset_idx]; altsd = get_iface_desc(alts); + if (altsd->bNumEndpoints < 1) { + kfree(fp); + kfree(rate_table); + return -EINVAL; + } + fp->protocol = altsd->bInterfaceProtocol; if (fp->datainterval == 0) From f9f026d3903957ffe515ab47ea5df421dafc47cb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Mar 2016 15:20:58 +0100 Subject: [PATCH 166/797] ALSA: usb-audio: Add sanity checks for endpoint accesses commit 447d6275f0c21f6cc97a88b3a0c601436a4cdf2a upstream. Add some sanity check codes before actually accessing the endpoint via get_endpoint() in order to avoid the invalid access through a malformed USB descriptor. Mostly just checking bNumEndpoints, but in one place (snd_microii_spdif_default_get()), the validity of iface and altsetting index is checked as well. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=971125 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/clock.c | 2 ++ sound/usb/endpoint.c | 3 +++ sound/usb/mixer_quirks.c | 4 ++++ sound/usb/pcm.c | 2 ++ 4 files changed, 11 insertions(+) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 2ed260b10f6d..7ccbcaf6a147 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -285,6 +285,8 @@ static int set_sample_rate_v1(struct snd_usb_audio *chip, int iface, unsigned char data[3]; int err, crate; + if (get_iface_desc(alts)->bNumEndpoints < 1) + return -EINVAL; ep = get_endpoint(alts, 0)->bEndpointAddress; /* if endpoint doesn't have sampling rate control, bail out */ diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 7b1cb365ffab..c07a7eda42a2 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -438,6 +438,9 @@ static void snd_complete_urb(struct urb *urb) * * New endpoints will be added to chip->ep_list and must be freed by * calling snd_usb_endpoint_free(). + * + * For SND_USB_ENDPOINT_TYPE_SYNC, the caller needs to guarantee that + * bNumEndpoints > 1 beforehand. */ struct snd_usb_endpoint *snd_usb_add_endpoint(struct snd_usb_audio *chip, struct usb_host_interface *alts, diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 279025650568..f6c3bf79af9a 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -1519,7 +1519,11 @@ static int snd_microii_spdif_default_get(struct snd_kcontrol *kcontrol, /* use known values for that card: interface#1 altsetting#1 */ iface = usb_ifnum_to_if(chip->dev, 1); + if (!iface || iface->num_altsetting < 2) + return -EINVAL; alts = &iface->altsetting[1]; + if (get_iface_desc(alts)->bNumEndpoints < 1) + return -EINVAL; ep = get_endpoint(alts, 0)->bEndpointAddress; err = snd_usb_ctl_msg(chip->dev, diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 9245f52d43bd..44d178ee9177 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -159,6 +159,8 @@ static int init_pitch_v1(struct snd_usb_audio *chip, int iface, unsigned char data[1]; int err; + if (get_iface_desc(alts)->bNumEndpoints < 1) + return -EINVAL; ep = get_endpoint(alts, 0)->bEndpointAddress; data[0] = 1; From 94bfaf24e6ba9a789fa0aa50fb4b7d228f8d3cff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Victor=20Cl=C3=A9ment?= Date: Sat, 19 Mar 2016 13:17:42 +0100 Subject: [PATCH 167/797] ALSA: usb-audio: add Microsoft HD-5001 to quirks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0ef21100ae912f76ed89f76ecd894f4ffb3689c1 upstream. The Microsoft HD-5001 webcam microphone does not support sample rate reading as the HD-5000 one. This results in dmesg errors and sound hanging with pulseaudio. Signed-off-by: Victor Clément Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index f2e4eebdf76d..2967242a5716 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1127,6 +1127,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) switch (chip->usb_id) { case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema */ case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */ + case USB_ID(0x045E, 0x076E): /* MS Lifecam HD-5001 */ case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ From 4d073cfdf7f685628485d9692623c668b173bb60 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Mar 2016 12:14:49 +0100 Subject: [PATCH 168/797] ALSA: usb-audio: Minor code cleanup in create_fixed_stream_quirk() commit 902eb7fd1e4af3ac69b9b30f8373f118c92b9729 upstream. Just a minor code cleanup: unify the error paths. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 2967242a5716..f9263de8b9a2 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -167,23 +167,18 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, stream = (fp->endpoint & USB_DIR_IN) ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; err = snd_usb_add_audio_stream(chip, stream, fp); - if (err < 0) { - kfree(fp); - kfree(rate_table); - return err; - } + if (err < 0) + goto error; if (fp->iface != get_iface_desc(&iface->altsetting[0])->bInterfaceNumber || fp->altset_idx >= iface->num_altsetting) { - kfree(fp); - kfree(rate_table); - return -EINVAL; + err = -EINVAL; + goto error; } alts = &iface->altsetting[fp->altset_idx]; altsd = get_iface_desc(alts); if (altsd->bNumEndpoints < 1) { - kfree(fp); - kfree(rate_table); - return -EINVAL; + err = -EINVAL; + goto error; } fp->protocol = altsd->bInterfaceProtocol; @@ -196,6 +191,11 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, snd_usb_init_pitch(chip, fp->iface, alts, fp); snd_usb_init_sample_rate(chip, fp->iface, alts, fp, fp->rate_max); return 0; + + error: + kfree(fp); + kfree(rate_table); + return err; } static int create_auto_pcm_quirk(struct snd_usb_audio *chip, From b7f03eeaaf7cdb9528c9710d648182af5a4db493 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Thu, 31 Mar 2016 12:05:43 -0400 Subject: [PATCH 169/797] ALSA: usb-audio: Fix double-free in error paths after snd_usb_add_audio_stream() call commit 836b34a935abc91e13e63053d0a83b24dfb5ea78 upstream. create_fixed_stream_quirk(), snd_usb_parse_audio_interface() and create_uaxx_quirk() functions allocate the audioformat object by themselves and free it upon error before returning. However, once the object is linked to a stream, it's freed again in snd_usb_audio_pcm_free(), thus it'll be double-freed, eventually resulting in a memory corruption. This patch fixes these failures in the error paths by unlinking the audioformat object before freeing it. Based on a patch by Takashi Iwai [Note for stable backports: this patch requires the commit 902eb7fd1e4a ('ALSA: usb-audio: Minor code cleanup in create_fixed_stream_quirk()')] Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1283358 Reported-by: Ralf Spenneberg Signed-off-by: Vladis Dronov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 4 ++++ sound/usb/stream.c | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index f9263de8b9a2..cd7eac28edee 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -150,6 +150,7 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, usb_audio_err(chip, "cannot memdup\n"); return -ENOMEM; } + INIT_LIST_HEAD(&fp->list); if (fp->nr_rates > MAX_NR_RATES) { kfree(fp); return -EINVAL; @@ -193,6 +194,7 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, return 0; error: + list_del(&fp->list); /* unlink for avoiding double-free */ kfree(fp); kfree(rate_table); return err; @@ -468,6 +470,7 @@ static int create_uaxx_quirk(struct snd_usb_audio *chip, fp->ep_attr = get_endpoint(alts, 0)->bmAttributes; fp->datainterval = 0; fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize); + INIT_LIST_HEAD(&fp->list); switch (fp->maxpacksize) { case 0x120: @@ -491,6 +494,7 @@ static int create_uaxx_quirk(struct snd_usb_audio *chip, ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; err = snd_usb_add_audio_stream(chip, stream, fp); if (err < 0) { + list_del(&fp->list); /* unlink for avoiding double-free */ kfree(fp); return err; } diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 8ee14f2365e7..3b23102230c0 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -316,7 +316,9 @@ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits, /* * add this endpoint to the chip instance. * if a stream with the same endpoint already exists, append to it. - * if not, create a new pcm stream. + * if not, create a new pcm stream. note, fp is added to the substream + * fmt_list and will be freed on the chip instance release. do not free + * fp or do remove it from the substream fmt_list to avoid double-free. */ int snd_usb_add_audio_stream(struct snd_usb_audio *chip, int stream, @@ -677,6 +679,7 @@ int snd_usb_parse_audio_interface(struct snd_usb_audio *chip, int iface_no) * (fp->maxpacksize & 0x7ff); fp->attributes = parse_uac_endpoint_attributes(chip, alts, protocol, iface_no); fp->clock = clock; + INIT_LIST_HEAD(&fp->list); /* some quirks for attributes here */ @@ -725,6 +728,7 @@ int snd_usb_parse_audio_interface(struct snd_usb_audio *chip, int iface_no) dev_dbg(&dev->dev, "%u:%d: add audio endpoint %#x\n", iface_no, altno, fp->endpoint); err = snd_usb_add_audio_stream(chip, stream, fp); if (err < 0) { + list_del(&fp->list); /* unlink for avoiding double-free */ kfree(fp->rate_table); kfree(fp->chmap); kfree(fp); From fb243c3d81a5d8b5a275c4e118bea481df5d0510 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Wed, 10 Feb 2016 15:33:17 +0300 Subject: [PATCH 170/797] Bluetooth: btusb: Add new AR3012 ID 13d3:3395 commit 609574eb46335cfac1421a07c0505627cbbab1f0 upstream. T: Bus=03 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3395 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb BugLink: https://bugs.launchpad.net/bugs/1542564 Reported-and-tested-by: Christopher Simerly Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index fa893c3ec408..cb1cb9acc1b2 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -113,6 +113,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x13d3, 0x3362) }, { USB_DEVICE(0x13d3, 0x3375) }, { USB_DEVICE(0x13d3, 0x3393) }, + { USB_DEVICE(0x13d3, 0x3395) }, { USB_DEVICE(0x13d3, 0x3402) }, { USB_DEVICE(0x13d3, 0x3408) }, { USB_DEVICE(0x13d3, 0x3423) }, @@ -175,6 +176,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3395), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 968897108c76..d4888ea28b91 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -227,6 +227,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3395), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 }, From 36a7f23fc74cf3f3947ea837310b1140e9592f6d Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Sun, 28 Feb 2016 11:04:06 +0300 Subject: [PATCH 171/797] Bluetooth: btusb: Add a new AR3012 ID 04ca:3014 commit 81d90442eac779938217c3444b240aa51fd3db47 upstream. T: Bus=01 Lev=01 Prnt=01 Port=04 Cnt=03 Dev#= 5 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04ca ProdID=3014 Rev=00.02 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb BugLink: https://bugs.launchpad.net/bugs/1546694 Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index cb1cb9acc1b2..3062a3ae1999 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -92,6 +92,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x04CA, 0x300d) }, { USB_DEVICE(0x04CA, 0x300f) }, { USB_DEVICE(0x04CA, 0x3010) }, + { USB_DEVICE(0x04CA, 0x3014) }, { USB_DEVICE(0x0930, 0x0219) }, { USB_DEVICE(0x0930, 0x021c) }, { USB_DEVICE(0x0930, 0x0220) }, @@ -155,6 +156,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x04ca, 0x300d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index d4888ea28b91..b4dd9400c4fc 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -206,6 +206,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x300d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, From 28dee875257cf0985de5ddea4053d968fe365b44 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Fri, 4 Mar 2016 01:32:19 +0300 Subject: [PATCH 172/797] Bluetooth: btusb: Add a new AR3012 ID 13d3:3472 commit 75c6aca4765dbe3d0c1507ab5052f2e373dc2331 upstream. T: Bus=01 Lev=01 Prnt=01 Port=04 Cnt=01 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3472 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb BugLink: https://bugs.launchpad.net/bugs/1552925 Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 3062a3ae1999..0c4a748fef7a 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -119,6 +119,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x13d3, 0x3408) }, { USB_DEVICE(0x13d3, 0x3423) }, { USB_DEVICE(0x13d3, 0x3432) }, + { USB_DEVICE(0x13d3, 0x3472) }, { USB_DEVICE(0x13d3, 0x3474) }, /* Atheros AR5BBU12 with sflash firmware */ @@ -183,6 +184,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU22 with sflash firmware */ diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index b4dd9400c4fc..342ec8d203e3 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -233,6 +233,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU12 with sflash firmware */ From 120e2febfc11ea91e34bec1c92fe5d6475c89508 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 12 Jan 2016 11:17:38 -0600 Subject: [PATCH 173/797] crypto: ccp - Add hash state import and export support commit 952bce9792e6bf36fda09c2e5718abb5d9327369 upstream. Commit 8996eafdcbad ("crypto: ahash - ensure statesize is non-zero") added a check to prevent ahash algorithms from successfully registering if the import and export functions were not implemented. This prevents an oops in the hash_accept function of algif_hash. This commit causes the ccp-crypto module SHA support and AES CMAC support from successfully registering and causing the ccp-crypto module load to fail because the ahash import and export functions are not implemented. Update the CCP Crypto API support to provide import and export support for ahash algorithms. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 23 +++++++++++++++++++++++ drivers/crypto/ccp/ccp-crypto-sha.c | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index d89f20c04266..00207cf5c79b 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -220,6 +220,26 @@ static int ccp_aes_cmac_digest(struct ahash_request *req) return ccp_aes_cmac_finup(req); } +static int ccp_aes_cmac_export(struct ahash_request *req, void *out) +{ + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_req_ctx *state = out; + + *state = *rctx; + + return 0; +} + +static int ccp_aes_cmac_import(struct ahash_request *req, const void *in) +{ + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + const struct ccp_aes_cmac_req_ctx *state = in; + + *rctx = *state; + + return 0; +} + static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int key_len) { @@ -352,10 +372,13 @@ int ccp_register_aes_cmac_algs(struct list_head *head) alg->final = ccp_aes_cmac_final; alg->finup = ccp_aes_cmac_finup; alg->digest = ccp_aes_cmac_digest; + alg->export = ccp_aes_cmac_export; + alg->import = ccp_aes_cmac_import; alg->setkey = ccp_aes_cmac_setkey; halg = &alg->halg; halg->digestsize = AES_BLOCK_SIZE; + halg->statesize = sizeof(struct ccp_aes_cmac_req_ctx); base = &halg->base; snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)"); diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index d14b3f28e010..3aae58def106 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -207,6 +207,26 @@ static int ccp_sha_digest(struct ahash_request *req) return ccp_sha_finup(req); } +static int ccp_sha_export(struct ahash_request *req, void *out) +{ + struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_sha_req_ctx *state = out; + + *state = *rctx; + + return 0; +} + +static int ccp_sha_import(struct ahash_request *req, const void *in) +{ + struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + const struct ccp_sha_req_ctx *state = in; + + *rctx = *state; + + return 0; +} + static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int key_len) { @@ -403,9 +423,12 @@ static int ccp_register_sha_alg(struct list_head *head, alg->final = ccp_sha_final; alg->finup = ccp_sha_finup; alg->digest = ccp_sha_digest; + alg->export = ccp_sha_export; + alg->import = ccp_sha_import; halg = &alg->halg; halg->digestsize = def->digest_size; + halg->statesize = sizeof(struct ccp_sha_req_ctx); base = &halg->base; snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); From 8c5156ad2da4493e6402bf7335d044445dca8c00 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 29 Jan 2016 12:45:14 -0600 Subject: [PATCH 174/797] crypto: ccp - Limit the amount of information exported commit d1662165ae612ec8b5f94a6b07e65ea58b6dce34 upstream. Since the exported information can be exposed to user-space, instead of exporting the entire request context only export the minimum information needed. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 16 +++++++++++----- drivers/crypto/ccp/ccp-crypto-sha.c | 20 +++++++++++++++----- drivers/crypto/ccp/ccp-crypto.h | 22 ++++++++++++++++++++++ 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index 00207cf5c79b..6a2d836eb2d9 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -223,9 +223,12 @@ static int ccp_aes_cmac_digest(struct ahash_request *req) static int ccp_aes_cmac_export(struct ahash_request *req, void *out) { struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); - struct ccp_aes_cmac_req_ctx *state = out; + struct ccp_aes_cmac_exp_ctx *state = out; - *state = *rctx; + state->null_msg = rctx->null_msg; + memcpy(state->iv, rctx->iv, sizeof(state->iv)); + state->buf_count = rctx->buf_count; + memcpy(state->buf, rctx->buf, sizeof(state->buf)); return 0; } @@ -233,9 +236,12 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out) static int ccp_aes_cmac_import(struct ahash_request *req, const void *in) { struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); - const struct ccp_aes_cmac_req_ctx *state = in; + const struct ccp_aes_cmac_exp_ctx *state = in; - *rctx = *state; + rctx->null_msg = state->null_msg; + memcpy(rctx->iv, state->iv, sizeof(rctx->iv)); + rctx->buf_count = state->buf_count; + memcpy(rctx->buf, state->buf, sizeof(rctx->buf)); return 0; } @@ -378,7 +384,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head) halg = &alg->halg; halg->digestsize = AES_BLOCK_SIZE; - halg->statesize = sizeof(struct ccp_aes_cmac_req_ctx); + halg->statesize = sizeof(struct ccp_aes_cmac_exp_ctx); base = &halg->base; snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)"); diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 3aae58def106..a67128a7af23 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -210,9 +210,14 @@ static int ccp_sha_digest(struct ahash_request *req) static int ccp_sha_export(struct ahash_request *req, void *out) { struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); - struct ccp_sha_req_ctx *state = out; + struct ccp_sha_exp_ctx *state = out; - *state = *rctx; + state->type = rctx->type; + state->msg_bits = rctx->msg_bits; + state->first = rctx->first; + memcpy(state->ctx, rctx->ctx, sizeof(state->ctx)); + state->buf_count = rctx->buf_count; + memcpy(state->buf, rctx->buf, sizeof(state->buf)); return 0; } @@ -220,9 +225,14 @@ static int ccp_sha_export(struct ahash_request *req, void *out) static int ccp_sha_import(struct ahash_request *req, const void *in) { struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); - const struct ccp_sha_req_ctx *state = in; + const struct ccp_sha_exp_ctx *state = in; - *rctx = *state; + rctx->type = state->type; + rctx->msg_bits = state->msg_bits; + rctx->first = state->first; + memcpy(rctx->ctx, state->ctx, sizeof(rctx->ctx)); + rctx->buf_count = state->buf_count; + memcpy(rctx->buf, state->buf, sizeof(rctx->buf)); return 0; } @@ -428,7 +438,7 @@ static int ccp_register_sha_alg(struct list_head *head, halg = &alg->halg; halg->digestsize = def->digest_size; - halg->statesize = sizeof(struct ccp_sha_req_ctx); + halg->statesize = sizeof(struct ccp_sha_exp_ctx); base = &halg->base; snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index 76a96f0f44c6..a326ec20bfa8 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -129,6 +129,15 @@ struct ccp_aes_cmac_req_ctx { struct ccp_cmd cmd; }; +struct ccp_aes_cmac_exp_ctx { + unsigned int null_msg; + + u8 iv[AES_BLOCK_SIZE]; + + unsigned int buf_count; + u8 buf[AES_BLOCK_SIZE]; +}; + /***** SHA related defines *****/ #define MAX_SHA_CONTEXT_SIZE SHA256_DIGEST_SIZE #define MAX_SHA_BLOCK_SIZE SHA256_BLOCK_SIZE @@ -171,6 +180,19 @@ struct ccp_sha_req_ctx { struct ccp_cmd cmd; }; +struct ccp_sha_exp_ctx { + enum ccp_sha_type type; + + u64 msg_bits; + + unsigned int first; + + u8 ctx[MAX_SHA_CONTEXT_SIZE]; + + unsigned int buf_count; + u8 buf[MAX_SHA_BLOCK_SIZE]; +}; + /***** Common Context Structure *****/ struct ccp_ctx { int (*complete)(struct crypto_async_request *req, int ret); From cc78d091bd92300de98b1b372da7d5ee9dcd8e63 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 2 Feb 2016 11:38:21 -0600 Subject: [PATCH 175/797] crypto: ccp - Don't assume export/import areas are aligned commit b31dde2a5cb1bf764282abf934266b7193c2bc7c upstream. Use a local variable for the exported and imported state so that alignment is not an issue. On export, set a local variable from the request context and then memcpy the contents of the local variable to the export memory area. On import, memcpy the import memory area into a local variable and then use the local variable to set the request context. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 26 +++++++++++------- drivers/crypto/ccp/ccp-crypto-sha.c | 34 ++++++++++++++---------- 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index 6a2d836eb2d9..d095452b8828 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -223,12 +223,15 @@ static int ccp_aes_cmac_digest(struct ahash_request *req) static int ccp_aes_cmac_export(struct ahash_request *req, void *out) { struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); - struct ccp_aes_cmac_exp_ctx *state = out; + struct ccp_aes_cmac_exp_ctx state; - state->null_msg = rctx->null_msg; - memcpy(state->iv, rctx->iv, sizeof(state->iv)); - state->buf_count = rctx->buf_count; - memcpy(state->buf, rctx->buf, sizeof(state->buf)); + state.null_msg = rctx->null_msg; + memcpy(state.iv, rctx->iv, sizeof(state.iv)); + state.buf_count = rctx->buf_count; + memcpy(state.buf, rctx->buf, sizeof(state.buf)); + + /* 'out' may not be aligned so memcpy from local variable */ + memcpy(out, &state, sizeof(state)); return 0; } @@ -236,12 +239,15 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out) static int ccp_aes_cmac_import(struct ahash_request *req, const void *in) { struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); - const struct ccp_aes_cmac_exp_ctx *state = in; + struct ccp_aes_cmac_exp_ctx state; - rctx->null_msg = state->null_msg; - memcpy(rctx->iv, state->iv, sizeof(rctx->iv)); - rctx->buf_count = state->buf_count; - memcpy(rctx->buf, state->buf, sizeof(rctx->buf)); + /* 'in' may not be aligned so memcpy to local variable */ + memcpy(&state, in, sizeof(state)); + + rctx->null_msg = state.null_msg; + memcpy(rctx->iv, state.iv, sizeof(rctx->iv)); + rctx->buf_count = state.buf_count; + memcpy(rctx->buf, state.buf, sizeof(rctx->buf)); return 0; } diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index a67128a7af23..7002c6b283e5 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -210,14 +210,17 @@ static int ccp_sha_digest(struct ahash_request *req) static int ccp_sha_export(struct ahash_request *req, void *out) { struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); - struct ccp_sha_exp_ctx *state = out; + struct ccp_sha_exp_ctx state; - state->type = rctx->type; - state->msg_bits = rctx->msg_bits; - state->first = rctx->first; - memcpy(state->ctx, rctx->ctx, sizeof(state->ctx)); - state->buf_count = rctx->buf_count; - memcpy(state->buf, rctx->buf, sizeof(state->buf)); + state.type = rctx->type; + state.msg_bits = rctx->msg_bits; + state.first = rctx->first; + memcpy(state.ctx, rctx->ctx, sizeof(state.ctx)); + state.buf_count = rctx->buf_count; + memcpy(state.buf, rctx->buf, sizeof(state.buf)); + + /* 'out' may not be aligned so memcpy from local variable */ + memcpy(out, &state, sizeof(state)); return 0; } @@ -225,14 +228,17 @@ static int ccp_sha_export(struct ahash_request *req, void *out) static int ccp_sha_import(struct ahash_request *req, const void *in) { struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); - const struct ccp_sha_exp_ctx *state = in; + struct ccp_sha_exp_ctx state; - rctx->type = state->type; - rctx->msg_bits = state->msg_bits; - rctx->first = state->first; - memcpy(rctx->ctx, state->ctx, sizeof(rctx->ctx)); - rctx->buf_count = state->buf_count; - memcpy(rctx->buf, state->buf, sizeof(rctx->buf)); + /* 'in' may not be aligned so memcpy to local variable */ + memcpy(&state, in, sizeof(state)); + + rctx->type = state.type; + rctx->msg_bits = state.msg_bits; + rctx->first = state.first; + memcpy(rctx->ctx, state.ctx, sizeof(rctx->ctx)); + rctx->buf_count = state.buf_count; + memcpy(rctx->buf, state.buf, sizeof(rctx->buf)); return 0; } From 0cdc91f539d77f4be720330f577b708a56cc9391 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 25 Feb 2016 16:48:13 -0600 Subject: [PATCH 176/797] crypto: ccp - memset request context to zero during import commit ce0ae266feaf35930394bd770c69778e4ef03ba9 upstream. Since a crypto_ahash_import() can be called against a request context that has not had a crypto_ahash_init() performed, the request context needs to be cleared to insure there is no random data present. If not, the random data can result in a kernel oops during crypto_ahash_update(). Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 1 + drivers/crypto/ccp/ccp-crypto-sha.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index d095452b8828..3d9acc53d247 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -244,6 +244,7 @@ static int ccp_aes_cmac_import(struct ahash_request *req, const void *in) /* 'in' may not be aligned so memcpy to local variable */ memcpy(&state, in, sizeof(state)); + memset(rctx, 0, sizeof(*rctx)); rctx->null_msg = state.null_msg; memcpy(rctx->iv, state.iv, sizeof(rctx->iv)); rctx->buf_count = state.buf_count; diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 7002c6b283e5..8ef06fad8b14 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -233,6 +233,7 @@ static int ccp_sha_import(struct ahash_request *req, const void *in) /* 'in' may not be aligned so memcpy to local variable */ memcpy(&state, in, sizeof(state)); + memset(rctx, 0, sizeof(*rctx)); rctx->type = state.type; rctx->msg_bits = state.msg_bits; rctx->first = state.first; From f69c1b51f6da629e6f03b336ffec8c31b56e6f8a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 Jan 2016 17:38:28 +0300 Subject: [PATCH 177/797] crypto: keywrap - memzero the correct memory commit 2b8b28fd232233c22fb61009dd8b0587390d2875 upstream. We're clearing the wrong memory. The memory corruption is likely harmless because we weren't going to use that stack memory again but not zeroing is a potential information leak. Fixes: e28facde3c39 ('crypto: keywrap - add key wrapping block chaining mode') Signed-off-by: Dan Carpenter Acked-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/keywrap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/keywrap.c b/crypto/keywrap.c index b1d106ce55f3..72014f963ba7 100644 --- a/crypto/keywrap.c +++ b/crypto/keywrap.c @@ -212,7 +212,7 @@ static int crypto_kw_decrypt(struct blkcipher_desc *desc, SEMIBSIZE)) ret = -EBADMSG; - memzero_explicit(&block, sizeof(struct crypto_kw_block)); + memzero_explicit(block, sizeof(struct crypto_kw_block)); return ret; } @@ -297,7 +297,7 @@ static int crypto_kw_encrypt(struct blkcipher_desc *desc, /* establish the IV for the caller to pick up */ memcpy(desc->info, block->A, SEMIBSIZE); - memzero_explicit(&block, sizeof(struct crypto_kw_block)); + memzero_explicit(block, sizeof(struct crypto_kw_block)); return 0; } From 90933f3fb612574e326f9872c5bd2121f8e2da39 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Sun, 6 Mar 2016 03:21:52 +0200 Subject: [PATCH 178/797] crypto: atmel - fix checks of error code returned by devm_ioremap_resource() commit 9b52d55f4f0e2bb9a34abbcf99e05e17f1b3b281 upstream. The change fixes potential oops while accessing iomem on invalid address, if devm_ioremap_resource() fails due to some reason. The devm_ioremap_resource() function returns ERR_PTR() and never returns NULL, which makes useless a following check for NULL. Signed-off-by: Vladimir Zapolskiy Fixes: b0e8b3417a62 ("crypto: atmel - use devm_xxx() managed function") Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-aes.c | 4 ++-- drivers/crypto/atmel-sha.c | 4 ++-- drivers/crypto/atmel-tdes.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index fb16d812c8f5..1dffb13e5c2f 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -1396,9 +1396,9 @@ static int atmel_aes_probe(struct platform_device *pdev) } aes_dd->io_base = devm_ioremap_resource(&pdev->dev, aes_res); - if (!aes_dd->io_base) { + if (IS_ERR(aes_dd->io_base)) { dev_err(dev, "can't ioremap\n"); - err = -ENOMEM; + err = PTR_ERR(aes_dd->io_base); goto res_err; } diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 3178f84d2757..0dadb6332f0e 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -1405,9 +1405,9 @@ static int atmel_sha_probe(struct platform_device *pdev) } sha_dd->io_base = devm_ioremap_resource(&pdev->dev, sha_res); - if (!sha_dd->io_base) { + if (IS_ERR(sha_dd->io_base)) { dev_err(dev, "can't ioremap\n"); - err = -ENOMEM; + err = PTR_ERR(sha_dd->io_base); goto res_err; } diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 2c7a628d0375..bf467d7be35c 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -1417,9 +1417,9 @@ static int atmel_tdes_probe(struct platform_device *pdev) } tdes_dd->io_base = devm_ioremap_resource(&pdev->dev, tdes_res); - if (!tdes_dd->io_base) { + if (IS_ERR(tdes_dd->io_base)) { dev_err(dev, "can't ioremap\n"); - err = -ENOMEM; + err = PTR_ERR(tdes_dd->io_base); goto res_err; } From 75efb5fe5ce23f72ef8641f593b5829152c6203c Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Sun, 6 Mar 2016 03:22:04 +0200 Subject: [PATCH 179/797] crypto: ux500 - fix checks of error code returned by devm_ioremap_resource() commit b62917a2622ebcb03a500ef20da47be80d8c8951 upstream. The change fixes potential oops while accessing iomem on invalid address, if devm_ioremap_resource() fails due to some reason. The devm_ioremap_resource() function returns ERR_PTR() and never returns NULL, which makes useless a following check for NULL. Signed-off-by: Vladimir Zapolskiy Fixes: 5a4eea2658c93 ("crypto: ux500 - Use devm_xxx() managed function") Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ux500/cryp/cryp_core.c | 4 ++-- drivers/crypto/ux500/hash/hash_core.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index 4c243c1ffc7f..790f7cadc1ed 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -1440,9 +1440,9 @@ static int ux500_cryp_probe(struct platform_device *pdev) device_data->phybase = res->start; device_data->base = devm_ioremap_resource(dev, res); - if (!device_data->base) { + if (IS_ERR(device_data->base)) { dev_err(dev, "[%s]: ioremap failed!", __func__); - ret = -ENOMEM; + ret = PTR_ERR(device_data->base); goto out; } diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index f47d112041b2..66b1c3313e2e 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -1675,9 +1675,9 @@ static int ux500_hash_probe(struct platform_device *pdev) device_data->phybase = res->start; device_data->base = devm_ioremap_resource(dev, res); - if (!device_data->base) { + if (IS_ERR(device_data->base)) { dev_err(dev, "%s: ioremap() failed!\n", __func__); - ret = -ENOMEM; + ret = PTR_ERR(device_data->base); goto out; } spin_lock_init(&device_data->ctx_lock); From f08fc4eed81e135c687ac246531a6dbbd236eb14 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Thu, 17 Mar 2016 10:47:10 +0100 Subject: [PATCH 180/797] crypto: marvell/cesa - forward devm_ioremap_resource() error code commit dfe97ad30e8c038261663a18b9e04b8b5bc07bea upstream. Forward devm_ioremap_resource() error code instead of returning -ENOMEM. Signed-off-by: Boris Brezillon Reported-by: Russell King - ARM Linux Fixes: f63601fd616a ("crypto: marvell/cesa - add a new driver for Marvell's CESA") Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/marvell/cesa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index c0656e7f37b5..80239ae69527 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -420,7 +420,7 @@ static int mv_cesa_probe(struct platform_device *pdev) res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); cesa->regs = devm_ioremap_resource(dev, res); if (IS_ERR(cesa->regs)) - return -ENOMEM; + return PTR_ERR(cesa->regs); ret = mv_cesa_dev_dma_init(cesa); if (ret) From 499f9ff872f8792d3318b0bd5e6533bfe48abf0d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Feb 2016 14:37:15 +0000 Subject: [PATCH 181/797] X.509: Fix leap year handling again commit ac4cbedfdf55455b4c447f17f0fa027dbf02b2a6 upstream. There are still a couple of minor issues in the X.509 leap year handling: (1) To avoid doing a modulus-by-400 in addition to a modulus-by-100 when determining whether the year is a leap year or not, I divided the year by 100 after doing the modulus-by-100, thereby letting the compiler do one instruction for both, and then did a modulus-by-4. Unfortunately, I then passed the now-modified year value to mktime64() to construct a time value. Since this isn't a fast path and since mktime64() does a bunch of divisions, just condense down to "% 400". It's also easier to read. (2) The default month length for any February where the year doesn't divide by four exactly is obtained from the month_length[] array where the value is 29, not 28. This is fixed by altering the table. Reported-by: Rudolf Polzer Signed-off-by: David Howells Acked-by: David Woodhouse Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/x509_cert_parser.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 021d39c0ba75..13c4e5a5fe8c 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -494,7 +494,7 @@ int x509_decode_time(time64_t *_t, size_t hdrlen, unsigned char tag, const unsigned char *value, size_t vlen) { - static const unsigned char month_lengths[] = { 31, 29, 31, 30, 31, 30, + static const unsigned char month_lengths[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; const unsigned char *p = value; unsigned year, mon, day, hour, min, sec, mon_len; @@ -540,9 +540,9 @@ int x509_decode_time(time64_t *_t, size_t hdrlen, if (year % 4 == 0) { mon_len = 29; if (year % 100 == 0) { - year /= 100; - if (year % 4 != 0) - mon_len = 28; + mon_len = 28; + if (year % 400 == 0) + mon_len = 29; } } } From fa0ae4f223ab29ab853959b08449b5c08c9cb7a0 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 7 Feb 2016 23:35:32 +0200 Subject: [PATCH 182/797] mei: bus: check if the device is enabled before data transfer commit 15c13dfcad883a1e76b714480fb27be96247fd82 upstream. The bus data transfer interface was missing the check if the device is in enabled state, this may lead to stack corruption during link reset. Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 0b05aa938799..1a173d0af694 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -53,6 +53,11 @@ ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, bus = cl->dev; mutex_lock(&bus->device_lock); + if (bus->dev_state != MEI_DEV_ENABLED) { + rets = -ENODEV; + goto out; + } + if (!mei_cl_is_connected(cl)) { rets = -ENODEV; goto out; @@ -109,6 +114,10 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length) bus = cl->dev; mutex_lock(&bus->device_lock); + if (bus->dev_state != MEI_DEV_ENABLED) { + rets = -ENODEV; + goto out; + } cb = mei_cl_read_cb(cl, NULL); if (cb) From 3e69549b48e39e72c7aad093e0c5e29ab547a9f1 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 29 Jan 2016 09:47:22 -0800 Subject: [PATCH 183/797] tpm: fix the rollback in tpm_chip_register() commit 72c91ce8523ae5828fe5e4417ae0aaab53707a08 upstream. Fixed the rollback and gave more self-documenting names for the functions. Fixes: d972b0523f ("tpm: fix call order in tpm-chip.c") Signed-off-by: Jarkko Sakkinen Reviewed-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-chip.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 45cc39aabeee..1a9dcee8da5a 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -140,7 +140,7 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, } EXPORT_SYMBOL_GPL(tpmm_chip_alloc); -static int tpm_dev_add_device(struct tpm_chip *chip) +static int tpm_add_char_device(struct tpm_chip *chip) { int rc; @@ -151,7 +151,6 @@ static int tpm_dev_add_device(struct tpm_chip *chip) chip->devname, MAJOR(chip->dev.devt), MINOR(chip->dev.devt), rc); - device_unregister(&chip->dev); return rc; } @@ -162,13 +161,14 @@ static int tpm_dev_add_device(struct tpm_chip *chip) chip->devname, MAJOR(chip->dev.devt), MINOR(chip->dev.devt), rc); + cdev_del(&chip->cdev); return rc; } return rc; } -static void tpm_dev_del_device(struct tpm_chip *chip) +static void tpm_del_char_device(struct tpm_chip *chip) { cdev_del(&chip->cdev); device_unregister(&chip->dev); @@ -222,7 +222,7 @@ int tpm_chip_register(struct tpm_chip *chip) tpm_add_ppi(chip); - rc = tpm_dev_add_device(chip); + rc = tpm_add_char_device(chip); if (rc) goto out_err; @@ -274,6 +274,6 @@ void tpm_chip_unregister(struct tpm_chip *chip) sysfs_remove_link(&chip->pdev->kobj, "ppi"); tpm1_chip_unregister(chip); - tpm_dev_del_device(chip); + tpm_del_char_device(chip); } EXPORT_SYMBOL_GPL(tpm_chip_unregister); From 160f50a3f40f3aa32ec7c7c9d18b3ffb5bdf12e2 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Thu, 18 Feb 2016 22:11:29 +0200 Subject: [PATCH 184/797] tpm_crb: tpm2_shutdown() must be called before tpm_chip_unregister() commit 99cda8cb4639de81cde785b5bab9bc52e916e594 upstream. Wrong call order. Reported-by: Jason Gunthorpe Fixes: 74d6b3ceaa17 Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_crb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 4bb9727c1047..61e64293b765 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -310,11 +310,11 @@ static int crb_acpi_remove(struct acpi_device *device) struct device *dev = &device->dev; struct tpm_chip *chip = dev_get_drvdata(dev); - tpm_chip_unregister(chip); - if (chip->flags & TPM_CHIP_FLAG_TPM2) tpm2_shutdown(chip, TPM2_SU_CLEAR); + tpm_chip_unregister(chip); + return 0; } From 062c8a4ff40fedf82c1ec177a63b06c41801c2e9 Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Sat, 6 Feb 2016 15:44:42 +0100 Subject: [PATCH 185/797] tpm_eventlog.c: fix binary_bios_measurements commit 186d124f07da193a8f47e491af85cb695d415f2f upstream. The commit 0cc698af36ff ("vTPM: support little endian guests") copied the event, but without the event data, did an endian conversion on the size and tried to output the event data from the copied version, which has only have one byte of the data, resulting in garbage event data. [jarkko.sakkinen@linux.intel.com: fixed minor coding style issues and renamed the local variable tempPtr as temp_ptr now that there is an excuse to do this.] Signed-off-by: Harald Hoyer Fixes: 0cc698af36ff ("vTPM: support little endian guests") Reviewed-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_eventlog.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm_eventlog.c b/drivers/char/tpm/tpm_eventlog.c index bd72fb04225e..4e6940acf639 100644 --- a/drivers/char/tpm/tpm_eventlog.c +++ b/drivers/char/tpm/tpm_eventlog.c @@ -232,7 +232,7 @@ static int tpm_binary_bios_measurements_show(struct seq_file *m, void *v) { struct tcpa_event *event = v; struct tcpa_event temp_event; - char *tempPtr; + char *temp_ptr; int i; memcpy(&temp_event, event, sizeof(struct tcpa_event)); @@ -242,10 +242,16 @@ static int tpm_binary_bios_measurements_show(struct seq_file *m, void *v) temp_event.event_type = do_endian_conversion(event->event_type); temp_event.event_size = do_endian_conversion(event->event_size); - tempPtr = (char *)&temp_event; + temp_ptr = (char *) &temp_event; - for (i = 0; i < sizeof(struct tcpa_event) + temp_event.event_size; i++) - seq_putc(m, tempPtr[i]); + for (i = 0; i < (sizeof(struct tcpa_event) - 1) ; i++) + seq_putc(m, temp_ptr[i]); + + temp_ptr = (char *) v; + + for (i = (sizeof(struct tcpa_event) - 1); + i < (sizeof(struct tcpa_event) + temp_event.event_size); i++) + seq_putc(m, temp_ptr[i]); return 0; From 158c0029a8861591d42c6ad144d09cbd2c2dfe81 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 8 Feb 2016 22:31:08 +0200 Subject: [PATCH 186/797] tpm: fix the cleanup of struct tpm_chip commit 8e0ee3c9faed7ca68807ea45141775856c438ac0 upstream. If the initialization fails before tpm_chip_register(), put_device() will be not called, which causes release callback not to be called. This patch fixes the issue by adding put_device() to devres list of the parent device. Fixes: 313d21eeab ("tpm: device class for tpm") Signed-off-by: Jarkko Sakkinen Reviewed-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-chip.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 1a9dcee8da5a..252142524ff2 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -136,6 +136,8 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, chip->cdev.owner = chip->pdev->driver->owner; chip->cdev.kobj.parent = &chip->dev.kobj; + devm_add_action(dev, (void (*)(void *)) put_device, &chip->dev); + return chip; } EXPORT_SYMBOL_GPL(tpmm_chip_alloc); @@ -171,7 +173,7 @@ static int tpm_add_char_device(struct tpm_chip *chip) static void tpm_del_char_device(struct tpm_chip *chip) { cdev_del(&chip->cdev); - device_unregister(&chip->dev); + device_del(&chip->dev); } static int tpm1_chip_register(struct tpm_chip *chip) From 72c49c697bb63054c7c3a26b90d15fb3a95e30eb Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sat, 13 Feb 2016 22:41:51 +0200 Subject: [PATCH 187/797] HID: logitech: fix Dual Action gamepad support commit 5d74325a2201376a95520a4a38a1ce2c65761c49 upstream. The patch that added Logitech Dual Action gamepad support forgot to update the special driver list for the device. This caused the logitech driver not to probe unless kernel module load order was favorable. Update the special driver list to fix it. Thanks to Simon Wood for the idea. Cc: Vitaly Katraew Fixes: 56d0c8b7c8fb ("HID: add support for Logitech Dual Action gamepads") Signed-off-by: Grazvydas Ignotas Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index c6f7a694f67a..f16155f5b6e2 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1897,6 +1897,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_ELITE_KBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_CORDLESS_DESKTOP_LX500) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_EXTREME_3D) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DUAL_ACTION) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WHEEL) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD_CORD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD) }, From 773332f0e2b1b530079c812975833c8c2d59e4d7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 14 Mar 2016 15:21:04 -0700 Subject: [PATCH 188/797] HID: i2c-hid: fix OOB write in i2c_hid_set_or_send_report() commit 3b654288b196ceaa156029d9457ccbded0489b98 upstream. Even though hid_hw_* checks that passed in data_len is less than HID_MAX_BUFFER_SIZE it is not enough, as i2c-hid does not necessarily allocate buffers of HID_MAX_BUFFER_SIZE but rather checks all device reports and select largest size. In-kernel users normally just send as much data as report needs, so there is no problem, but hidraw users can do whatever they please: BUG: KASAN: slab-out-of-bounds in memcpy+0x34/0x54 at addr ffffffc07135ea80 Write of size 4101 by task syz-executor/8747 CPU: 2 PID: 8747 Comm: syz-executor Tainted: G BU 3.18.0 #37 Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT) Call trace: [] dump_backtrace+0x0/0x258 arch/arm64/kernel/traps.c:83 [] show_stack+0x1c/0x2c arch/arm64/kernel/traps.c:172 [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x90/0x140 lib/dump_stack.c:50 [< inline >] print_error_description mm/kasan/report.c:97 [< inline >] kasan_report_error mm/kasan/report.c:278 [] kasan_report+0x268/0x530 mm/kasan/report.c:305 [] __asan_storeN+0x20/0x150 mm/kasan/kasan.c:718 [] memcpy+0x30/0x54 mm/kasan/kasan.c:299 [] __i2c_hid_command+0x2b0/0x7b4 drivers/hid/i2c-hid/i2c-hid.c:178 [< inline >] i2c_hid_set_or_send_report drivers/hid/i2c-hid/i2c-hid.c:321 [] i2c_hid_output_raw_report.isra.2+0x3d4/0x4b8 drivers/hid/i2c-hid/i2c-hid.c:589 [] i2c_hid_output_report+0x54/0x68 drivers/hid/i2c-hid/i2c-hid.c:602 [< inline >] hid_hw_output_report include/linux/hid.h:1039 [] hidraw_send_report+0x400/0x414 drivers/hid/hidraw.c:154 [] hidraw_write+0x40/0x64 drivers/hid/hidraw.c:177 [] vfs_write+0x1d4/0x3cc fs/read_write.c:534 [< inline >] SYSC_pwrite64 fs/read_write.c:627 [] SyS_pwrite64+0xec/0x144 fs/read_write.c:614 Object at ffffffc07135ea80, in cache kmalloc-512 Object allocated with size 268 bytes. Let's check data length against the buffer size before attempting to copy data over. Reported-by: Alexander Potapenko Signed-off-by: Dmitry Torokhov Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 10bd8e6e4c9c..0b80633bae91 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -282,18 +282,22 @@ static int i2c_hid_set_or_send_report(struct i2c_client *client, u8 reportType, u16 dataRegister = le16_to_cpu(ihid->hdesc.wDataRegister); u16 outputRegister = le16_to_cpu(ihid->hdesc.wOutputRegister); u16 maxOutputLength = le16_to_cpu(ihid->hdesc.wMaxOutputLength); - - /* hid_hw_* already checked that data_len < HID_MAX_BUFFER_SIZE */ - u16 size = 2 /* size */ + - (reportID ? 1 : 0) /* reportID */ + - data_len /* buf */; - int args_len = (reportID >= 0x0F ? 1 : 0) /* optional third byte */ + - 2 /* dataRegister */ + - size /* args */; + u16 size; + int args_len; int index = 0; i2c_hid_dbg(ihid, "%s\n", __func__); + if (data_len > ihid->bufsize) + return -EINVAL; + + size = 2 /* size */ + + (reportID ? 1 : 0) /* reportID */ + + data_len /* buf */; + args_len = (reportID >= 0x0F ? 1 : 0) /* optional third byte */ + + 2 /* dataRegister */ + + size /* args */; + if (!use_data && maxOutputLength == 0) return -ENOSYS; From cca86656508d12f998e11ee7f71b8a40864af3b4 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 7 Mar 2016 11:02:38 +0100 Subject: [PATCH 189/797] HID: multitouch: force retrieving of Win8 signature blob commit 45c5c6828214605eaefa6755c47bd1a2c7eb203e upstream. The Synaptics 0x11e5 over I2C found in the Asus T100-CHI requires to fetch the signature blob to actually start sending events. With this patch, we should be close enough to the Windows driver which checks the content of the blob at plugin to validate or not the touchscreen. Link: https://bugzilla.kernel.org/show_bug.cgi?id=113481 Fixes: 6d4f5440 ("HID: multitouch: Fetch feature reports on demand for Win8 devices") Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-multitouch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 2b8ff18d3713..c5ec4f915594 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -396,6 +396,11 @@ static void mt_feature_mapping(struct hid_device *hdev, td->is_buttonpad = true; break; + case 0xff0000c5: + /* Retrieve the Win8 blob once to enable some devices */ + if (usage->usage_index == 0) + mt_get_feature(hdev, field->report); + break; } } From b2fb06096e2538e131bd9551eb5a70ba42c1b3f7 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 12 Feb 2016 17:10:37 +0100 Subject: [PATCH 190/797] HID: fix hid_ignore_special_drivers module parameter commit 4392bf333388cabdad5afe5b1500002d7b9c318e upstream. hid_ignore_special_drivers works fine until hid_scan_report autodetects and reassign devices (for hid-multitouch, hid-microsoft and hid-rmi). Simplify the handling of the parameter: if it is there, use hid-generic, no matter what, and if not, scan the device or rely on the hid_have_special_driver table. This was detected while trying to disable hid-multitouch on a Surface Pro cover which prevented to use the keyboard. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index f16155f5b6e2..ec791e169f8f 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2616,9 +2616,10 @@ int hid_add_device(struct hid_device *hdev) /* * Scan generic devices for group information */ - if (hid_ignore_special_drivers || - (!hdev->group && - !hid_match_id(hdev, hid_have_special_driver))) { + if (hid_ignore_special_drivers) { + hdev->group = HID_GROUP_GENERIC; + } else if (!hdev->group && + !hid_match_id(hdev, hid_have_special_driver)) { ret = hid_scan_report(hdev); if (ret) hid_warn(hdev, "bad device descriptor (%d)\n", ret); From a5e8deb7f07fb8f7d34c908dea303ba948752916 Mon Sep 17 00:00:00 2001 From: "Spencer E. Olson" Date: Tue, 12 Jan 2016 10:33:18 -0700 Subject: [PATCH 191/797] staging: comedi: ni_tiocmd: change mistaken use of start_src for start_arg commit 1fd24a4702d2af0ea4d5845126cf57d4d1796216 upstream. This fixes a bug in function ni_tio_input_inttrig(). The trigger number should be compared to cmd->start_arg, not cmd->start_src. Fixes: 6a760394d7eb ("staging: comedi: ni_tiocmd: clarify the cmd->start_arg validation and use") Signed-off-by: Spencer E. Olson Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_tiocmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_tiocmd.c b/drivers/staging/comedi/drivers/ni_tiocmd.c index 437f723bb34d..823e47910004 100644 --- a/drivers/staging/comedi/drivers/ni_tiocmd.c +++ b/drivers/staging/comedi/drivers/ni_tiocmd.c @@ -92,7 +92,7 @@ static int ni_tio_input_inttrig(struct comedi_device *dev, unsigned long flags; int ret = 0; - if (trig_num != cmd->start_src) + if (trig_num != cmd->start_arg) return -EINVAL; spin_lock_irqsave(&counter->lock, flags); From 263b0af7cc419a6f5254269a30e9784b5476f433 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 23 Mar 2016 00:38:43 +0200 Subject: [PATCH 192/797] staging: android: ion_test: fix check of platform_device_register_simple() error code commit ccbc2a9e7878ff09bcaed4893c2a2d3adbb797e2 upstream. On error platform_device_register_simple() returns ERR_PTR() value, check for NULL always fails. The change corrects the check itself and propagates the returned error upwards. Fixes: 81fb0b901397 ("staging: android: ion_test: unregister the platform device") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/android/ion/ion_test.c b/drivers/staging/android/ion/ion_test.c index b8dcf5a26cc4..58d46893e5ff 100644 --- a/drivers/staging/android/ion/ion_test.c +++ b/drivers/staging/android/ion/ion_test.c @@ -285,8 +285,8 @@ static int __init ion_test_init(void) { ion_test_pdev = platform_device_register_simple("ion-test", -1, NULL, 0); - if (!ion_test_pdev) - return -ENODEV; + if (IS_ERR(ion_test_pdev)) + return PTR_ERR(ion_test_pdev); return platform_driver_probe(&ion_test_platform_driver, ion_test_probe); } From 62fe263236e09a5b4ec64bc71a42d37caf943eca Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Tue, 22 Mar 2016 10:04:48 -0700 Subject: [PATCH 193/797] staging: comedi: ni_mio_common: fix the ni_write[blw]() functions commit bd3a3cd6c27b117fb9a43a38c8072c95332beecc upstream. Memory mapped io (dev->mmio) should not also be writing to the ioport (dev->iobase) registers. Add the missing 'else' to these functions. Fixes: 0953ee4acca0 ("staging: comedi: ni_mio_common: checkpatch.pl cleanup (else not useful)") Signed-off-by: H Hartley Sweeten Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_mio_common.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 6cc304a4c59b..27fbf1a81097 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -246,24 +246,24 @@ static void ni_writel(struct comedi_device *dev, uint32_t data, int reg) { if (dev->mmio) writel(data, dev->mmio + reg); - - outl(data, dev->iobase + reg); + else + outl(data, dev->iobase + reg); } static void ni_writew(struct comedi_device *dev, uint16_t data, int reg) { if (dev->mmio) writew(data, dev->mmio + reg); - - outw(data, dev->iobase + reg); + else + outw(data, dev->iobase + reg); } static void ni_writeb(struct comedi_device *dev, uint8_t data, int reg) { if (dev->mmio) writeb(data, dev->mmio + reg); - - outb(data, dev->iobase + reg); + else + outb(data, dev->iobase + reg); } static uint32_t ni_readl(struct comedi_device *dev, int reg) From 583aacb1f69b733e6808625fbc44da8f499c9bfb Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 10 Jan 2016 20:36:12 -0800 Subject: [PATCH 194/797] tty: Fix GPF in flush_to_ldisc(), part 2 commit f33798deecbd59a2955f40ac0ae2bc7dff54c069 upstream. commit 9ce119f318ba ("tty: Fix GPF in flush_to_ldisc()") fixed a GPF caused by a line discipline which does not define a receive_buf() method. However, the vt driver (and speakup driver also) pushes selection data directly to the line discipline receive_buf() method via tty_ldisc_receive_buf(). Fix the same problem in tty_ldisc_receive_buf(). Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/tty.h b/include/linux/tty.h index 6b6e811f4575..3bf03b6b52e9 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -594,7 +594,7 @@ static inline int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p, count = ld->ops->receive_buf2(ld->tty, p, f, count); else { count = min_t(int, count, ld->tty->receive_room); - if (count) + if (count && ld->ops->receive_buf) ld->ops->receive_buf(ld->tty, p, f, count); } return count; From 456db805f77c45611caa217996dcb93f24afb2e5 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Jan 2016 17:48:45 -0800 Subject: [PATCH 195/797] net: irda: Fix use-after-free in irtty_open() commit 401879c57f01cbf2da204ad2e8db910525c6dbea upstream. The N_IRDA line discipline may access the previous line discipline's closed and already-fre private data on open [1]. The tty->disc_data field _never_ refers to valid data on entry to the line discipline's open() method. Rather, the ldisc is expected to initialize that field for its own use for the lifetime of the instance (ie. from open() to close() only). [1] ================================================================== BUG: KASAN: use-after-free in irtty_open+0x422/0x550 at addr ffff8800331dd068 Read of size 4 by task a.out/13960 ============================================================================= BUG kmalloc-512 (Tainted: G B ): kasan: bad access detected ----------------------------------------------------------------------------- ... Call Trace: [] __asan_report_load4_noabort+0x3e/0x40 mm/kasan/report.c:279 [] irtty_open+0x422/0x550 drivers/net/irda/irtty-sir.c:436 [] tty_ldisc_open.isra.2+0x60/0xa0 drivers/tty/tty_ldisc.c:447 [] tty_set_ldisc+0x1a0/0x940 drivers/tty/tty_ldisc.c:567 [< inline >] tiocsetd drivers/tty/tty_io.c:2650 [] tty_ioctl+0xace/0x1fd0 drivers/tty/tty_io.c:2883 [< inline >] vfs_ioctl fs/ioctl.c:43 [] do_vfs_ioctl+0x57c/0xe60 fs/ioctl.c:607 [< inline >] SYSC_ioctl fs/ioctl.c:622 [] SyS_ioctl+0x74/0x80 fs/ioctl.c:613 [] entry_SYSCALL_64_fastpath+0x16/0x7a Reported-and-tested-by: Dmitry Vyukov Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/net/irda/irtty-sir.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index 696852eb23c3..7a3f990c1935 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -430,16 +430,6 @@ static int irtty_open(struct tty_struct *tty) /* Module stuff handled via irda_ldisc.owner - Jean II */ - /* First make sure we're not already connected. */ - if (tty->disc_data != NULL) { - priv = tty->disc_data; - if (priv && priv->magic == IRTTY_MAGIC) { - ret = -EEXIST; - goto out; - } - tty->disc_data = NULL; /* ### */ - } - /* stop the underlying driver */ irtty_stop_receiver(tty, TRUE); if (tty->ops->stop) From fc0768092cebd0b70a08f5423263669ea3849ef9 Mon Sep 17 00:00:00 2001 From: Sebastian Frias Date: Fri, 18 Dec 2015 17:40:05 +0100 Subject: [PATCH 196/797] 8250: use callbacks to access UART_DLL/UART_DLM commit 0b41ce991052022c030fd868e03877700220b090 upstream. Some UART HW has a single register combining UART_DLL/UART_DLM (this was probably forgotten in the change that introduced the callbacks, commit b32b19b8ffc05cbd3bf91c65e205f6a912ca15d9) Fixes: b32b19b8ffc0 ("[SERIAL] 8250: set divisor register correctly ...") Signed-off-by: Sebastian Frias Reviewed-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 52d82d2ac726..56ccbcefdd85 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -713,22 +713,16 @@ static int size_fifo(struct uart_8250_port *up) */ static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p) { - unsigned char old_dll, old_dlm, old_lcr; - unsigned int id; + unsigned char old_lcr; + unsigned int id, old_dl; old_lcr = serial_in(p, UART_LCR); serial_out(p, UART_LCR, UART_LCR_CONF_MODE_A); + old_dl = serial_dl_read(p); + serial_dl_write(p, 0); + id = serial_dl_read(p); + serial_dl_write(p, old_dl); - old_dll = serial_in(p, UART_DLL); - old_dlm = serial_in(p, UART_DLM); - - serial_out(p, UART_DLL, 0); - serial_out(p, UART_DLM, 0); - - id = serial_in(p, UART_DLL) | serial_in(p, UART_DLM) << 8; - - serial_out(p, UART_DLL, old_dll); - serial_out(p, UART_DLM, old_dlm); serial_out(p, UART_LCR, old_lcr); return id; From 4fe401d63051c524e45d2a1c57bf1e34ecc19a7b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 14 Feb 2016 17:51:37 -0200 Subject: [PATCH 197/797] saa7134: Fix bytesperline not being set correctly for planar formats commit 3e71da19f9dc22e39a755d6ae9678661abb66adc upstream. bytesperline should be the bytesperline for the first plane for planar formats, not that of all planes combined. This fixes a crash in xawtv caused by the wrong bpl. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1305389 Reported-and-tested-by: Stas Sergeev Signed-off-by: Hans de Goede Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/saa7134/saa7134-video.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c index 518086c7aed5..15e56c07b217 100644 --- a/drivers/media/pci/saa7134/saa7134-video.c +++ b/drivers/media/pci/saa7134/saa7134-video.c @@ -1219,10 +1219,13 @@ static int saa7134_g_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.height = dev->height; f->fmt.pix.field = dev->field; f->fmt.pix.pixelformat = dev->fmt->fourcc; - f->fmt.pix.bytesperline = - (f->fmt.pix.width * dev->fmt->depth) >> 3; + if (dev->fmt->planar) + f->fmt.pix.bytesperline = f->fmt.pix.width; + else + f->fmt.pix.bytesperline = + (f->fmt.pix.width * dev->fmt->depth) / 8; f->fmt.pix.sizeimage = - f->fmt.pix.height * f->fmt.pix.bytesperline; + (f->fmt.pix.height * f->fmt.pix.width * dev->fmt->depth) / 8; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; return 0; } @@ -1298,10 +1301,13 @@ static int saa7134_try_fmt_vid_cap(struct file *file, void *priv, if (f->fmt.pix.height > maxh) f->fmt.pix.height = maxh; f->fmt.pix.width &= ~0x03; - f->fmt.pix.bytesperline = - (f->fmt.pix.width * fmt->depth) >> 3; + if (fmt->planar) + f->fmt.pix.bytesperline = f->fmt.pix.width; + else + f->fmt.pix.bytesperline = + (f->fmt.pix.width * fmt->depth) / 8; f->fmt.pix.sizeimage = - f->fmt.pix.height * f->fmt.pix.bytesperline; + (f->fmt.pix.height * f->fmt.pix.width * fmt->depth) / 8; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; return 0; From 454b8cbea62141cdaee224a118678c890c7aa186 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Feb 2016 09:32:25 -0200 Subject: [PATCH 198/797] adv7511: TX_EDID_PRESENT is still 1 after a disconnect commit b339a72e04a62f0b1882c43492fc712f1176b3e6 upstream. The V4L2_CID_TX_EDID_PRESENT control reports if an EDID is present. The adv7511 however still reported the EDID present after disconnecting the HDMI cable. Fix the logic regarding this control. And when the EDID is disconnected also call ADV7511_EDID_DETECT to notify the bridge driver. This was also missing. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/adv7511.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/media/i2c/adv7511.c b/drivers/media/i2c/adv7511.c index e4900df1140b..c24839cfcc35 100644 --- a/drivers/media/i2c/adv7511.c +++ b/drivers/media/i2c/adv7511.c @@ -1161,12 +1161,23 @@ static void adv7511_dbg_dump_edid(int lvl, int debug, struct v4l2_subdev *sd, in } } +static void adv7511_notify_no_edid(struct v4l2_subdev *sd) +{ + struct adv7511_state *state = get_adv7511_state(sd); + struct adv7511_edid_detect ed; + + /* We failed to read the EDID, so send an event for this. */ + ed.present = false; + ed.segment = adv7511_rd(sd, 0xc4); + v4l2_subdev_notify(sd, ADV7511_EDID_DETECT, (void *)&ed); + v4l2_ctrl_s_ctrl(state->have_edid0_ctrl, 0x0); +} + static void adv7511_edid_handler(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct adv7511_state *state = container_of(dwork, struct adv7511_state, edid_handler); struct v4l2_subdev *sd = &state->sd; - struct adv7511_edid_detect ed; v4l2_dbg(1, debug, sd, "%s:\n", __func__); @@ -1191,9 +1202,7 @@ static void adv7511_edid_handler(struct work_struct *work) } /* We failed to read the EDID, so send an event for this. */ - ed.present = false; - ed.segment = adv7511_rd(sd, 0xc4); - v4l2_subdev_notify(sd, ADV7511_EDID_DETECT, (void *)&ed); + adv7511_notify_no_edid(sd); v4l2_dbg(1, debug, sd, "%s: no edid found\n", __func__); } @@ -1264,7 +1273,6 @@ static void adv7511_check_monitor_present_status(struct v4l2_subdev *sd) /* update read only ctrls */ v4l2_ctrl_s_ctrl(state->hotplug_ctrl, adv7511_have_hotplug(sd) ? 0x1 : 0x0); v4l2_ctrl_s_ctrl(state->rx_sense_ctrl, adv7511_have_rx_sense(sd) ? 0x1 : 0x0); - v4l2_ctrl_s_ctrl(state->have_edid0_ctrl, state->edid.segments ? 0x1 : 0x0); if ((status & MASK_ADV7511_HPD_DETECT) && ((status & MASK_ADV7511_MSEN_DETECT) || state->edid.segments)) { v4l2_dbg(1, debug, sd, "%s: hotplug and (rx-sense or edid)\n", __func__); @@ -1294,6 +1302,7 @@ static void adv7511_check_monitor_present_status(struct v4l2_subdev *sd) } adv7511_s_power(sd, false); memset(&state->edid, 0, sizeof(struct adv7511_state_edid)); + adv7511_notify_no_edid(sd); } } @@ -1370,6 +1379,7 @@ static bool adv7511_check_edid_status(struct v4l2_subdev *sd) } /* one more segment read ok */ state->edid.segments = segment + 1; + v4l2_ctrl_s_ctrl(state->have_edid0_ctrl, 0x1); if (((state->edid.data[0x7e] >> 1) + 1) > state->edid.segments) { /* Request next EDID segment */ v4l2_dbg(1, debug, sd, "%s: request segment %d\n", __func__, state->edid.segments); @@ -1389,7 +1399,6 @@ static bool adv7511_check_edid_status(struct v4l2_subdev *sd) ed.present = true; ed.segment = 0; state->edid_detect_counter++; - v4l2_ctrl_s_ctrl(state->have_edid0_ctrl, state->edid.segments ? 0x1 : 0x0); v4l2_subdev_notify(sd, ADV7511_EDID_DETECT, (void *)&ed); return ed.present; } From 3c26bcd82af503a92ae4e10223707f2b51ec3cd9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 7 Feb 2016 09:24:29 -0200 Subject: [PATCH 199/797] bttv: Width must be a multiple of 16 when capturing planar formats commit 5c915c68763889f0183a1cc61c84bb228b60124a upstream. On my bttv card "Hauppauge WinTV [card=10]" capturing in YV12 fmt at max size results in a solid green rectangle being captured (all colors 0 in YUV). This turns out to be caused by max-width (924) not being a multiple of 16. We've likely never hit this problem before since normally xawtv / tvtime, etc. will prefer packed pixel formats. But when using a video card which is using xf86-video-modesetting + glamor, only planar XVideo fmts are available, and xawtv will chose a matching capture format to avoid needing to do conversion, triggering the solid green window problem. Signed-off-by: Hans de Goede Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/bt8xx/bttv-driver.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c index 15a4ebc2844d..51dbef2f9a48 100644 --- a/drivers/media/pci/bt8xx/bttv-driver.c +++ b/drivers/media/pci/bt8xx/bttv-driver.c @@ -2334,6 +2334,19 @@ static int bttv_g_fmt_vid_overlay(struct file *file, void *priv, return 0; } +static void bttv_get_width_mask_vid_cap(const struct bttv_format *fmt, + unsigned int *width_mask, + unsigned int *width_bias) +{ + if (fmt->flags & FORMAT_FLAGS_PLANAR) { + *width_mask = ~15; /* width must be a multiple of 16 pixels */ + *width_bias = 8; /* nearest */ + } else { + *width_mask = ~3; /* width must be a multiple of 4 pixels */ + *width_bias = 2; /* nearest */ + } +} + static int bttv_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { @@ -2343,6 +2356,7 @@ static int bttv_try_fmt_vid_cap(struct file *file, void *priv, enum v4l2_field field; __s32 width, height; __s32 height2; + unsigned int width_mask, width_bias; int rc; fmt = format_by_fourcc(f->fmt.pix.pixelformat); @@ -2375,9 +2389,9 @@ static int bttv_try_fmt_vid_cap(struct file *file, void *priv, width = f->fmt.pix.width; height = f->fmt.pix.height; + bttv_get_width_mask_vid_cap(fmt, &width_mask, &width_bias); rc = limit_scaled_size_lock(fh, &width, &height, field, - /* width_mask: 4 pixels */ ~3, - /* width_bias: nearest */ 2, + width_mask, width_bias, /* adjust_size */ 1, /* adjust_crop */ 0); if (0 != rc) @@ -2410,6 +2424,7 @@ static int bttv_s_fmt_vid_cap(struct file *file, void *priv, struct bttv_fh *fh = priv; struct bttv *btv = fh->btv; __s32 width, height; + unsigned int width_mask, width_bias; enum v4l2_field field; retval = bttv_switch_type(fh, f->type); @@ -2424,9 +2439,10 @@ static int bttv_s_fmt_vid_cap(struct file *file, void *priv, height = f->fmt.pix.height; field = f->fmt.pix.field; + fmt = format_by_fourcc(f->fmt.pix.pixelformat); + bttv_get_width_mask_vid_cap(fmt, &width_mask, &width_bias); retval = limit_scaled_size_lock(fh, &width, &height, f->fmt.pix.field, - /* width_mask: 4 pixels */ ~3, - /* width_bias: nearest */ 2, + width_mask, width_bias, /* adjust_size */ 1, /* adjust_crop */ 1); if (0 != retval) @@ -2434,8 +2450,6 @@ static int bttv_s_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.field = field; - fmt = format_by_fourcc(f->fmt.pix.pixelformat); - /* update our state informations */ fh->fmt = fmt; fh->cap.field = f->fmt.pix.field; From 6aed423a0e54306003228d3b68196c542af6cd59 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 4 Jan 2016 17:30:09 -0200 Subject: [PATCH 200/797] coda: fix first encoded frame payload commit 74dc385cb450089b28c28be2c8a0baca296b95f9 upstream. During the recent vb2_buffer restructuring, the calculation of the buffer payload reported to userspace was accidentally broken for the first encoded frame, counting only the length of the headers. This patch re-adds the length of the actual frame data. Fixes: 2d7007153f0c ("[media] media: videobuf2: Restructure vb2_buffer") Reported-by: Michael Olbrich Signed-off-by: Philipp Zabel Tested-by: Jan Luebbe Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/coda/coda-bit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/coda/coda-bit.c b/drivers/media/platform/coda/coda-bit.c index 654e964f84a2..d76511c1c1e3 100644 --- a/drivers/media/platform/coda/coda-bit.c +++ b/drivers/media/platform/coda/coda-bit.c @@ -1342,7 +1342,7 @@ static void coda_finish_encode(struct coda_ctx *ctx) /* Calculate bytesused field */ if (dst_buf->sequence == 0) { - vb2_set_plane_payload(&dst_buf->vb2_buf, 0, + vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr + ctx->vpu_header_size[0] + ctx->vpu_header_size[1] + ctx->vpu_header_size[2]); From 8c1fa99764bd76be5707447b40aa0806acfc61bf Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Tue, 19 Jan 2016 05:56:50 -0200 Subject: [PATCH 201/797] media: v4l2-compat-ioctl32: fix missing length copy in put_v4l2_buffer32 commit 7df5ab8774aa383c6d2bff00688d004585d96dfd upstream. In v4l2-compliance utility, test QUERYBUF required correct length value to go through each planar to check planar's length in multi-planar buffer type Signed-off-by: Tiffany Lin Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 327e83ac2469..f38c076752ce 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -415,7 +415,8 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user get_user(kp->index, &up->index) || get_user(kp->type, &up->type) || get_user(kp->flags, &up->flags) || - get_user(kp->memory, &up->memory)) + get_user(kp->memory, &up->memory) || + get_user(kp->length, &up->length)) return -EFAULT; if (V4L2_TYPE_IS_OUTPUT(kp->type)) @@ -427,9 +428,6 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { - if (get_user(kp->length, &up->length)) - return -EFAULT; - num_planes = kp->length; if (num_planes == 0) { kp->m.planes = NULL; @@ -462,16 +460,14 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user } else { switch (kp->memory) { case V4L2_MEMORY_MMAP: - if (get_user(kp->length, &up->length) || - get_user(kp->m.offset, &up->m.offset)) + if (get_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; case V4L2_MEMORY_USERPTR: { compat_long_t tmp; - if (get_user(kp->length, &up->length) || - get_user(tmp, &up->m.userptr)) + if (get_user(tmp, &up->m.userptr)) return -EFAULT; kp->m.userptr = (unsigned long)compat_ptr(tmp); @@ -513,7 +509,8 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) || put_user(kp->sequence, &up->sequence) || put_user(kp->reserved2, &up->reserved2) || - put_user(kp->reserved, &up->reserved)) + put_user(kp->reserved, &up->reserved) || + put_user(kp->length, &up->length)) return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { @@ -536,13 +533,11 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user } else { switch (kp->memory) { case V4L2_MEMORY_MMAP: - if (put_user(kp->length, &up->length) || - put_user(kp->m.offset, &up->m.offset)) + if (put_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; case V4L2_MEMORY_USERPTR: - if (put_user(kp->length, &up->length) || - put_user(kp->m.userptr, &up->m.userptr)) + if (put_user(kp->m.userptr, &up->m.userptr)) return -EFAULT; break; case V4L2_MEMORY_OVERLAY: From ef87aef51d0de4220bf9c4de34de89f2716b3398 Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Wed, 24 Feb 2016 21:17:32 -0800 Subject: [PATCH 202/797] mtip32xx: Avoid issuing standby immediate cmd during FTL rebuild commit d8a18d2d8f5de55666c6011ed175939d22c8e3d8 upstream. Prevent standby immediate command from being issued in remove, suspend and shutdown paths, while drive is in FTL rebuild process. Signed-off-by: Selvan Mani Signed-off-by: Vignesh Gunasekaran Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/mtip32xx/mtip32xx.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3457ac8c03e2..deb0c761d5db 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3270,20 +3270,25 @@ static int mtip_hw_init(struct driver_data *dd) return rv; } -static void mtip_standby_drive(struct driver_data *dd) +static int mtip_standby_drive(struct driver_data *dd) { - if (dd->sr) - return; + int rv = 0; + if (dd->sr || !dd->port) + return -ENODEV; /* * Send standby immediate (E0h) to the drive so that it * saves its state. */ if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) && - !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) - if (mtip_standby_immediate(dd->port)) + !test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag) && + !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) { + rv = mtip_standby_immediate(dd->port); + if (rv) dev_warn(&dd->pdev->dev, "STANDBY IMMEDIATE failed\n"); + } + return rv; } /* @@ -3341,8 +3346,7 @@ static int mtip_hw_shutdown(struct driver_data *dd) * Send standby immediate (E0h) to the drive so that it * saves its state. */ - if (!dd->sr && dd->port) - mtip_standby_immediate(dd->port); + mtip_standby_drive(dd); return 0; } @@ -3365,7 +3369,7 @@ static int mtip_hw_suspend(struct driver_data *dd) * Send standby immediate (E0h) to the drive * so that it saves its state. */ - if (mtip_standby_immediate(dd->port) != 0) { + if (mtip_standby_drive(dd) != 0) { dev_err(&dd->pdev->dev, "Failed standby-immediate command\n"); return -EFAULT; From 1b899eb4833d3394f37272d38b4b1a26eac30feb Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Wed, 24 Feb 2016 21:16:00 -0800 Subject: [PATCH 203/797] mtip32xx: Fix broken service thread handling commit cfc05bd31384c4898bf2437a4de5557f3cf9803a upstream. Service thread does not detect the need for taskfile error hanlding. Fixed the flag condition to process taskfile error. Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/mtip32xx/mtip32xx.c | 6 +++--- drivers/block/mtip32xx/mtip32xx.h | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index deb0c761d5db..de4d965139ed 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2924,9 +2924,7 @@ static int mtip_service_thread(void *data) * is in progress nor error handling is active */ wait_event_interruptible(port->svc_wait, (port->flags) && - !(port->flags & MTIP_PF_PAUSE_IO)); - - set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); + (port->flags & MTIP_PF_SVC_THD_WORK)); if (kthread_should_stop() || test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) @@ -2936,6 +2934,8 @@ static int mtip_service_thread(void *data) &dd->dd_flag))) goto st_out; + set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); + restart_eh: /* Demux bits: start with error handling */ if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) { diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 3274784008eb..8635239c521f 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -144,6 +144,11 @@ enum { MTIP_PF_REBUILD_BIT = 6, MTIP_PF_SVC_THD_STOP_BIT = 8, + MTIP_PF_SVC_THD_WORK = ((1 << MTIP_PF_EH_ACTIVE_BIT) | + (1 << MTIP_PF_ISSUE_CMDS_BIT) | + (1 << MTIP_PF_REBUILD_BIT) | + (1 << MTIP_PF_SVC_THD_STOP_BIT)), + /* below are bit numbers in 'dd_flag' defined in driver_data */ MTIP_DDF_SEC_LOCK_BIT = 0, MTIP_DDF_REMOVE_PENDING_BIT = 1, From 828e9f2e8aa20881e1fc46152590d09520161ef8 Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Wed, 24 Feb 2016 21:16:21 -0800 Subject: [PATCH 204/797] mtip32xx: Remove unwanted code from taskfile error handler commit e35b94738a2f7caa12017f69ef385cb6b8028965 upstream. Remove setting and clearing MTIP_PF_EH_ACTIVE_BIT flag in mtip_handle_tfe() as they are redundant. Also avoid waking up service thread from mtip_handle_tfe() because it is already woken up in case of taskfile error. Signed-off-by: Selvan Mani Signed-off-by: Rajesh Kumar Sambandam Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/mtip32xx/mtip32xx.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index de4d965139ed..a9cc83d6e185 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -618,8 +618,6 @@ static void mtip_handle_tfe(struct driver_data *dd) port = dd->port; - set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); @@ -628,7 +626,7 @@ static void mtip_handle_tfe(struct driver_data *dd) cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, PORT_IRQ_TF_ERR); } - goto handle_tfe_exit; + return; } /* clear the tag accumulator */ @@ -771,11 +769,6 @@ static void mtip_handle_tfe(struct driver_data *dd) } } print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); - -handle_tfe_exit: - /* clear eh_active */ - clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - wake_up_interruptible(&port->svc_wait); } /* From 3d58a02e4fdad9cb7ce0799363eaa54c012fcd33 Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Wed, 24 Feb 2016 21:16:38 -0800 Subject: [PATCH 205/797] mtip32xx: Print exact time when an internal command is interrupted commit 5b7e0a8ac85e2dfd83830dc9e0b3554d153a37e3 upstream. Print exact time when an internal command is interrupted. Signed-off-by: Selvan Mani Signed-off-by: Rajesh Kumar Sambandam Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/mtip32xx/mtip32xx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index a9cc83d6e185..27feff9a5604 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -1092,6 +1092,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, struct mtip_cmd *int_cmd; struct driver_data *dd = port->dd; int rv = 0; + unsigned long start; /* Make sure the buffer is 8 byte aligned. This is asic specific. */ if (buffer & 0x00000007) { @@ -1155,6 +1156,8 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* Populate the command header */ int_cmd->command_header->byte_count = 0; + start = jiffies; + /* Issue the command to the hardware */ mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); @@ -1165,8 +1168,9 @@ static int mtip_exec_internal_command(struct mtip_port *port, msecs_to_jiffies(timeout))) <= 0) { if (rv == -ERESTARTSYS) { /* interrupted */ dev_err(&dd->pdev->dev, - "Internal command [%02X] was interrupted after %lu ms\n", - fis->command, timeout); + "Internal command [%02X] was interrupted after %u ms\n", + fis->command, + jiffies_to_msecs(jiffies - start)); rv = -EINTR; goto exec_ic_exit; } else if (rv == 0) /* timeout */ From e241a8dab98aff4d79d36e8cc71c4487c909bdd5 Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Wed, 24 Feb 2016 21:17:47 -0800 Subject: [PATCH 206/797] mtip32xx: Fix for rmmod crash when drive is in FTL rebuild commit 59cf70e236c96594d9f1e065755d8fce9df5356b upstream. When FTL rebuild is in progress, alloc_disk() initializes the disk but device node will be created by add_disk() only after successful completion of FTL rebuild. So, skip deletion of device node in removal path when FTL rebuild is in progress. Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/mtip32xx/mtip32xx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 27feff9a5604..47d96bdf5f9e 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2975,10 +2975,8 @@ static int mtip_service_thread(void *data) } if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) { - if (mtip_ftl_rebuild_poll(dd) < 0) - set_bit(MTIP_DDF_REBUILD_FAILED_BIT, - &dd->dd_flag); - clear_bit(MTIP_PF_REBUILD_BIT, &port->flags); + if (mtip_ftl_rebuild_poll(dd) == 0) + clear_bit(MTIP_PF_REBUILD_BIT, &port->flags); } } @@ -3858,7 +3856,6 @@ static int mtip_block_initialize(struct driver_data *dd) mtip_hw_debugfs_init(dd); -skip_create_disk: memset(&dd->tags, 0, sizeof(dd->tags)); dd->tags.ops = &mtip_mq_ops; dd->tags.nr_hw_queues = 1; @@ -3888,6 +3885,7 @@ static int mtip_block_initialize(struct driver_data *dd) dd->disk->queue = dd->queue; dd->queue->queuedata = dd; +skip_create_disk: /* Initialize the protocol layer. */ wait_for_rebuild = mtip_hw_get_identify(dd); if (wait_for_rebuild < 0) { @@ -4048,7 +4046,8 @@ static int mtip_block_remove(struct driver_data *dd) dd->bdev = NULL; } if (dd->disk) { - del_gendisk(dd->disk); + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) + del_gendisk(dd->disk); if (dd->disk->queue) { blk_cleanup_queue(dd->queue); blk_mq_free_tag_set(&dd->tags); @@ -4089,7 +4088,8 @@ static int mtip_block_shutdown(struct driver_data *dd) dev_info(&dd->pdev->dev, "Shutting down %s ...\n", dd->disk->disk_name); - del_gendisk(dd->disk); + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) + del_gendisk(dd->disk); if (dd->disk->queue) { blk_cleanup_queue(dd->queue); blk_mq_free_tag_set(&dd->tags); From e2e6e22464ac6fb53e60d74ddf02fb829959ab9c Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Wed, 24 Feb 2016 21:18:10 -0800 Subject: [PATCH 207/797] mtip32xx: Handle safe removal during IO commit 51c6570eb922146470c2fe660c34585414679bd6 upstream. Flush inflight IOs using fsync_bdev() when the device is safely removed. Also, block further IOs in device open function. Signed-off-by: Selvan Mani Signed-off-by: Rajesh Kumar Sambandam Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/mtip32xx/mtip32xx.c | 34 +++++++++++++++++++++++++++++-- drivers/block/mtip32xx/mtip32xx.h | 1 + 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 47d96bdf5f9e..cda3efecab8d 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3602,6 +3602,28 @@ static int mtip_block_getgeo(struct block_device *dev, return 0; } +static int mtip_block_open(struct block_device *dev, fmode_t mode) +{ + struct driver_data *dd; + + if (dev && dev->bd_disk) { + dd = (struct driver_data *) dev->bd_disk->private_data; + + if (dd) { + if (test_bit(MTIP_DDF_REMOVAL_BIT, + &dd->dd_flag)) { + return -ENODEV; + } + return 0; + } + } + return -ENODEV; +} + +void mtip_block_release(struct gendisk *disk, fmode_t mode) +{ +} + /* * Block device operation function. * @@ -3609,6 +3631,8 @@ static int mtip_block_getgeo(struct block_device *dev, * layer. */ static const struct block_device_operations mtip_block_ops = { + .open = mtip_block_open, + .release = mtip_block_release, .ioctl = mtip_block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = mtip_block_compat_ioctl, @@ -4434,7 +4458,7 @@ static void mtip_pci_remove(struct pci_dev *pdev) struct driver_data *dd = pci_get_drvdata(pdev); unsigned long flags, to; - set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); + set_bit(MTIP_DDF_REMOVAL_BIT, &dd->dd_flag); spin_lock_irqsave(&dev_lock, flags); list_del_init(&dd->online_list); @@ -4451,12 +4475,18 @@ static void mtip_pci_remove(struct pci_dev *pdev) } while (atomic_read(&dd->irq_workers_active) != 0 && time_before(jiffies, to)); + fsync_bdev(dd->bdev); + if (atomic_read(&dd->irq_workers_active) != 0) { dev_warn(&dd->pdev->dev, "Completion workers still active!\n"); } - blk_mq_stop_hw_queues(dd->queue); + if (dd->sr) + blk_mq_stop_hw_queues(dd->queue); + + set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); + /* Clean up the block layer. */ mtip_block_remove(dd); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 8635239c521f..50af742421e2 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -158,6 +158,7 @@ enum { MTIP_DDF_RESUME_BIT = 6, MTIP_DDF_INIT_DONE_BIT = 7, MTIP_DDF_REBUILD_FAILED_BIT = 8, + MTIP_DDF_REMOVAL_BIT = 9, MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | (1 << MTIP_DDF_SEC_LOCK_BIT) | From f75d029f84a7c85a0d0875506c823ba50b10b3a1 Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Wed, 24 Feb 2016 21:18:20 -0800 Subject: [PATCH 208/797] mtip32xx: Handle FTL rebuild failure state during device initialization commit aae4a033868c496adae86fc6f9c3e0c405bbf360 upstream. Allow device initialization to finish gracefully when it is in FTL rebuild failure state. Also, recover device out of this state after successfully secure erasing it. Signed-off-by: Selvan Mani Signed-off-by: Vignesh Gunasekaran Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/mtip32xx/mtip32xx.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index cda3efecab8d..6bec93997948 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -699,7 +699,7 @@ static void mtip_handle_tfe(struct driver_data *dd) fail_reason = "thermal shutdown"; } if (buf[288] == 0xBF) { - set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag); + set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag); dev_info(&dd->pdev->dev, "Drive indicates rebuild has failed. Secure erase required.\n"); fail_all_ncq_cmds = 1; @@ -1000,6 +1000,7 @@ static bool mtip_pause_ncq(struct mtip_port *port, (fis->features == 0x27 || fis->features == 0x72 || fis->features == 0x62 || fis->features == 0x26))) { clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); + clear_bit(MTIP_DDF_REBUILD_FAILED_BIT, &port->dd->dd_flag); /* Com reset after secure erase or lowlevel format */ mtip_restart_port(port); clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); @@ -1166,6 +1167,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, if ((rv = wait_for_completion_interruptible_timeout( &wait, msecs_to_jiffies(timeout))) <= 0) { + if (rv == -ERESTARTSYS) { /* interrupted */ dev_err(&dd->pdev->dev, "Internal command [%02X] was interrupted after %u ms\n", @@ -3091,7 +3093,7 @@ static int mtip_hw_get_identify(struct driver_data *dd) if (buf[288] == 0xBF) { dev_info(&dd->pdev->dev, "Drive indicates rebuild has failed.\n"); - /* TODO */ + set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag); } } @@ -3694,10 +3696,9 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq) rq_data_dir(rq))) { return -ENODATA; } - if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) + if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag) || + test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))) return -ENODATA; - if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) - return -ENXIO; } if (rq->cmd_flags & REQ_DISCARD) { From d45d26e491c01c98934a7d941343df8477cad38f Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Wed, 24 Feb 2016 21:21:13 -0800 Subject: [PATCH 209/797] mtip32xx: Implement timeout handler commit abb0ccd185c9e31847709b86192e6c815d1f57ad upstream. Added timeout handler. Replaced blk_mq_end_request() with blk_mq_complete_request() to avoid double completion of a request. Signed-off-by: Selvan Mani Signed-off-by: Rajesh Kumar Sambandam Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/mtip32xx/mtip32xx.c | 95 ++++++++++++++++++++++++++++--- drivers/block/mtip32xx/mtip32xx.h | 7 ++- 2 files changed, 92 insertions(+), 10 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 6bec93997948..2a9001edce1d 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -233,15 +233,9 @@ static void mtip_async_complete(struct mtip_port *port, "Command tag %d failed due to TFE\n", tag); } - /* Unmap the DMA scatter list entries */ - dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents, cmd->direction); - rq = mtip_rq_from_tag(dd, tag); - if (unlikely(cmd->unaligned)) - up(&port->cmd_slot_unal); - - blk_mq_end_request(rq, status ? -EIO : 0); + blk_mq_complete_request(rq, status); } /* @@ -2896,6 +2890,42 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd) return -EFAULT; } +static void mtip_softirq_done_fn(struct request *rq) +{ + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct driver_data *dd = rq->q->queuedata; + + /* Unmap the DMA scatter list entries */ + dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents, + cmd->direction); + + if (unlikely(cmd->unaligned)) + up(&dd->port->cmd_slot_unal); + + blk_mq_end_request(rq, rq->errors); +} + +static void mtip_abort_cmd(struct request *req, void *data, + bool reserved) +{ + struct driver_data *dd = data; + + dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); + + clear_bit(req->tag, dd->port->cmds_to_issue); + req->errors = -EIO; + mtip_softirq_done_fn(req); +} + +static void mtip_queue_cmd(struct request *req, void *data, + bool reserved) +{ + struct driver_data *dd = data; + + set_bit(req->tag, dd->port->cmds_to_issue); + blk_abort_request(req); +} + /* * service thread to issue queued commands * @@ -2908,7 +2938,7 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd) static int mtip_service_thread(void *data) { struct driver_data *dd = (struct driver_data *)data; - unsigned long slot, slot_start, slot_wrap; + unsigned long slot, slot_start, slot_wrap, to; unsigned int num_cmd_slots = dd->slot_groups * 32; struct mtip_port *port = dd->port; @@ -2945,6 +2975,32 @@ static int mtip_service_thread(void *data) if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) goto restart_eh; + if (test_bit(MTIP_PF_TO_ACTIVE_BIT, &port->flags)) { + to = jiffies + msecs_to_jiffies(5000); + + do { + mdelay(100); + } while (atomic_read(&dd->irq_workers_active) != 0 && + time_before(jiffies, to)); + + if (atomic_read(&dd->irq_workers_active) != 0) + dev_warn(&dd->pdev->dev, + "Completion workers still active!"); + + spin_lock(dd->queue->queue_lock); + blk_mq_all_tag_busy_iter(*dd->tags.tags, + mtip_queue_cmd, dd); + spin_unlock(dd->queue->queue_lock); + + set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags); + + if (mtip_device_reset(dd)) + blk_mq_all_tag_busy_iter(*dd->tags.tags, + mtip_abort_cmd, dd); + + clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags); + } + if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { slot = 1; /* used to restrict the loop to one iteration */ @@ -3810,11 +3866,33 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, return 0; } +static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, + bool reserved) +{ + struct driver_data *dd = req->q->queuedata; + int ret = BLK_EH_RESET_TIMER; + + if (reserved) + goto exit_handler; + + if (test_bit(req->tag, dd->port->cmds_to_issue)) + goto exit_handler; + + if (test_and_set_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags)) + goto exit_handler; + + wake_up_interruptible(&dd->port->svc_wait); +exit_handler: + return ret; +} + static struct blk_mq_ops mtip_mq_ops = { .queue_rq = mtip_queue_rq, .map_queue = blk_mq_map_queue, .init_request = mtip_init_cmd, .exit_request = mtip_free_cmd, + .complete = mtip_softirq_done_fn, + .timeout = mtip_cmd_timeout, }; /* @@ -3890,6 +3968,7 @@ static int mtip_block_initialize(struct driver_data *dd) dd->tags.numa_node = dd->numa_node; dd->tags.flags = BLK_MQ_F_SHOULD_MERGE; dd->tags.driver_data = dd; + dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS; rv = blk_mq_alloc_tag_set(&dd->tags); if (rv) { diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 50af742421e2..7617888f7944 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -134,10 +134,12 @@ enum { MTIP_PF_EH_ACTIVE_BIT = 1, /* error handling */ MTIP_PF_SE_ACTIVE_BIT = 2, /* secure erase */ MTIP_PF_DM_ACTIVE_BIT = 3, /* download microcde */ + MTIP_PF_TO_ACTIVE_BIT = 9, /* timeout handling */ MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | (1 << MTIP_PF_EH_ACTIVE_BIT) | (1 << MTIP_PF_SE_ACTIVE_BIT) | - (1 << MTIP_PF_DM_ACTIVE_BIT)), + (1 << MTIP_PF_DM_ACTIVE_BIT) | + (1 << MTIP_PF_TO_ACTIVE_BIT)), MTIP_PF_SVC_THD_ACTIVE_BIT = 4, MTIP_PF_ISSUE_CMDS_BIT = 5, @@ -147,7 +149,8 @@ enum { MTIP_PF_SVC_THD_WORK = ((1 << MTIP_PF_EH_ACTIVE_BIT) | (1 << MTIP_PF_ISSUE_CMDS_BIT) | (1 << MTIP_PF_REBUILD_BIT) | - (1 << MTIP_PF_SVC_THD_STOP_BIT)), + (1 << MTIP_PF_SVC_THD_STOP_BIT) | + (1 << MTIP_PF_TO_ACTIVE_BIT)), /* below are bit numbers in 'dd_flag' defined in driver_data */ MTIP_DDF_SEC_LOCK_BIT = 0, From 2c46344a83ea7265d391e978711c07fc6380d8d7 Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Wed, 24 Feb 2016 21:21:20 -0800 Subject: [PATCH 210/797] mtip32xx: Cleanup queued requests after surprise removal commit 008e56d200225321371748d95908e6222436f06d upstream. Fail all pending requests after surprise removal of a drive. Signed-off-by: Vignesh Gunasekaran Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/mtip32xx/mtip32xx.c | 78 ++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 18 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 2a9001edce1d..55d3d1da72de 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -173,7 +173,13 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) { struct request *rq; + if (mtip_check_surprise_removal(dd->pdev)) + return NULL; + rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true); + if (IS_ERR(rq)) + return NULL; + return blk_mq_rq_to_pdu(rq); } @@ -575,6 +581,8 @@ static void mtip_completion(struct mtip_port *port, dev_warn(&port->dd->pdev->dev, "Internal command %d completed with TFE\n", tag); + command->comp_func = NULL; + command->comp_data = NULL; complete(waiting); } @@ -1009,12 +1017,14 @@ static bool mtip_pause_ncq(struct mtip_port *port, * * @port Pointer to port data structure * @timeout Max duration to wait (ms) + * @atomic gfp_t flag to indicate blockable context or not * * return value * 0 Success * -EBUSY Commands still active */ -static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) +static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout, + gfp_t atomic) { unsigned long to; unsigned int n; @@ -1025,16 +1035,21 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) to = jiffies + msecs_to_jiffies(timeout); do { if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) && - test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { + test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags) && + atomic == GFP_KERNEL) { msleep(20); continue; /* svc thd is actively issuing commands */ } - msleep(100); + if (atomic == GFP_KERNEL) + msleep(100); + else { + cpu_relax(); + udelay(100); + } + if (mtip_check_surprise_removal(port->dd->pdev)) goto err_fault; - if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag)) - goto err_fault; /* * Ignore s_active bit 0 of array element 0. @@ -1096,6 +1111,10 @@ static int mtip_exec_internal_command(struct mtip_port *port, } int_cmd = mtip_get_int_command(dd); + if (!int_cmd) { + dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n"); + return -EFAULT; + } set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1108,7 +1127,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, if (fis->command != ATA_CMD_STANDBYNOW1) { /* wait for io to complete if non atomic */ if (mtip_quiesce_io(port, - MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) { + MTIP_QUIESCE_IO_TIMEOUT_MS, atomic) < 0) { dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); mtip_put_int_command(dd, int_cmd); @@ -3354,10 +3373,6 @@ static int mtip_standby_drive(struct driver_data *dd) */ static int mtip_hw_exit(struct driver_data *dd) { - /* - * Send standby immediate (E0h) to the drive so that it - * saves its state. - */ if (!dd->sr) { /* de-initialize the port. */ mtip_deinit_port(dd->port); @@ -3974,7 +3989,7 @@ static int mtip_block_initialize(struct driver_data *dd) if (rv) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); - goto block_queue_alloc_init_error; + goto block_queue_alloc_tag_error; } /* Allocate the request queue. */ @@ -4086,8 +4101,9 @@ static int mtip_block_initialize(struct driver_data *dd) read_capacity_error: init_hw_cmds_error: blk_cleanup_queue(dd->queue); - blk_mq_free_tag_set(&dd->tags); block_queue_alloc_init_error: + blk_mq_free_tag_set(&dd->tags); +block_queue_alloc_tag_error: mtip_hw_debugfs_exit(dd); disk_index_error: spin_lock(&rssd_index_lock); @@ -4104,6 +4120,22 @@ static int mtip_block_initialize(struct driver_data *dd) return rv; } +static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) +{ + struct driver_data *dd = (struct driver_data *)data; + struct mtip_cmd *cmd; + + if (likely(!reserv)) + blk_mq_complete_request(rq, -ENODEV); + else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { + + cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); + if (cmd->comp_func) + cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, + cmd, -ENODEV); + } +} + /* * Block layer deinitialization function. * @@ -4135,12 +4167,23 @@ static int mtip_block_remove(struct driver_data *dd) } } - if (!dd->sr) - mtip_standby_drive(dd); + if (!dd->sr) { + /* + * Explicitly wait here for IOs to quiesce, + * as mtip_standby_drive usually won't wait for IOs. + */ + if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS, + GFP_KERNEL)) + mtip_standby_drive(dd); + } else dev_info(&dd->pdev->dev, "device %s surprise removal\n", dd->disk->disk_name); + blk_mq_freeze_queue_start(dd->queue); + blk_mq_stop_hw_queues(dd->queue); + blk_mq_all_tag_busy_iter(dd->tags.tags[0], mtip_no_dev_cleanup, dd); + /* * Delete our gendisk structure. This also removes the device * from /dev @@ -4555,16 +4598,15 @@ static void mtip_pci_remove(struct pci_dev *pdev) } while (atomic_read(&dd->irq_workers_active) != 0 && time_before(jiffies, to)); - fsync_bdev(dd->bdev); + if (!dd->sr) + fsync_bdev(dd->bdev); if (atomic_read(&dd->irq_workers_active) != 0) { dev_warn(&dd->pdev->dev, "Completion workers still active!\n"); } - if (dd->sr) - blk_mq_stop_hw_queues(dd->queue); - + blk_set_queue_dying(dd->queue); set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); /* Clean up the block layer. */ From bbb4bee779b3301a3ab7924f85f38b96bdf1fd4a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 10 Mar 2016 11:33:43 +0100 Subject: [PATCH 211/797] ALSA: hda - Apply reboot D3 fix for CX20724 codec, too commit 56dc66ff1c6d71f9a38c4a7c000b72b921fe4c89 upstream. Just like CX20722, CX7024 codec also requires the power down at reboot in order to reduce the noise at reboot/shutdown. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=113511 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index ef198903c0c3..600af5878e75 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -204,8 +204,13 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; - if (codec->core.vendor_id != 0x14f150f2) + switch (codec->core.vendor_id) { + case 0x14f150f2: /* CX20722 */ + case 0x14f150f4: /* CX20724 */ + break; + default: return; + } /* Turn the CX20722 codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ From ae8168541087b26f946f31555e977c146aba637a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 10 Mar 2016 20:56:20 +0100 Subject: [PATCH 212/797] ALSA: pcm: Avoid "BUG:" string for warnings again commit 0ab1ace856205d10cbc1924b2d931c01ffd216a6 upstream. The commit [d507941beb1e: ALSA: pcm: Correct PCM BUG error message] made the warning prefix back to "BUG:" due to its previous wrong prefix. But a kernel message containing "BUG:" seems taken as an Oops message wrongly by some brain-dead daemons, and it annoys users in the end. Instead of teaching daemons, change the string again to a more reasonable one. Fixes: 507941beb1e ('ALSA: pcm: Correct PCM BUG error message') Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 6b5a811e01a5..3a9b66c6e09c 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -322,7 +322,7 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream, char name[16]; snd_pcm_debug_name(substream, name, sizeof(name)); pcm_err(substream->pcm, - "BUG: %s, pos = %ld, buffer size = %ld, period size = %ld\n", + "invalid position: %s, pos = %ld, buffer size = %ld, period size = %ld\n", name, pos, runtime->buffer_size, runtime->period_size); } From 5fae159b7d25987747919ccb73a8813da81abd97 Mon Sep 17 00:00:00 2001 From: "Vittorio Gambaletta (VittGam)" Date: Sun, 13 Mar 2016 22:19:34 +0100 Subject: [PATCH 213/797] ALSA: intel8x0: Add clock quirk entry for AD1981B on IBM ThinkPad X41. commit 4061db03dd71d195b9973ee466f6ed32f6a3fc16 upstream. The clock measurement on the AC'97 audio card found in the IBM ThinkPad X41 will often fail, so add a quirk entry to fix it. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=441087 Signed-off-by: Vittorio Gambaletta Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/intel8x0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index 42bcbac801a3..ccdab29a8b66 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -2879,6 +2879,7 @@ static void intel8x0_measure_ac97_clock(struct intel8x0 *chip) static struct snd_pci_quirk intel8x0_clock_list[] = { SND_PCI_QUIRK(0x0e11, 0x008a, "AD1885", 41000), + SND_PCI_QUIRK(0x1014, 0x0581, "AD1981B", 48000), SND_PCI_QUIRK(0x1028, 0x00be, "AD1885", 44100), SND_PCI_QUIRK(0x1028, 0x0177, "AD1980", 48000), SND_PCI_QUIRK(0x1028, 0x01ad, "AD1981B", 48000), From ee2a37ab39b013589647ced2e6526c7358cb2111 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 10 Mar 2016 12:02:49 +0100 Subject: [PATCH 214/797] ALSA: hda - Don't handle ELD notify from invalid port commit 4f8e4f3537cafc4de128e6bfdf83baa78bc60eb1 upstream. The current Intel HDMI codec driver supports only three fixed ports from port B to port D. However, i915 driver may assign a DP on other ports, e.g. port A, when no eDP is used. This incompatibility is caught later at pin_nid_to_pin_index() and results in a warning message like "HDMI: pin nid 4 not registered" at each time. This patch filters out such invalid events beforehand, so that the kernel won't be too grumbling. Reported-by: Stefan Assmann Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 70c945603379..f7bcd8dbac14 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2353,6 +2353,10 @@ static void intel_pin_eld_notify(void *audio_ptr, int port) struct hda_codec *codec = audio_ptr; int pin_nid = port + 0x04; + /* we assume only from port-B to port-D */ + if (port < 1 || port > 3) + return; + /* skip notification during system suspend (but not in runtime PM); * the state will be updated at resume */ From db894649e34abceade484c35e5acd346e74e916d Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 11 Mar 2016 12:04:02 +0800 Subject: [PATCH 215/797] ALSA: hda - fix the mic mute button and led problem for a Lenovo AIO commit 6ef2f68fa38bf415830f67903d87180d933e0f47 upstream. This Lenovo ThinkCentre AIO also uses Line2 as mic mute button and uses GPIO2 to control the mic mute led, so applying this quirk can make both the button and led work. BugLink: https://bugs.launchpad.net/bugs/1555912 Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c2430b36e1ce..6968b796baa3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5529,6 +5529,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), + SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), From 1dac534145d0f84b3fcadf5b69d8de3ad147f471 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Mar 2016 16:44:55 +0100 Subject: [PATCH 216/797] ALSA: hda - Fix unconditional GPIO toggle via automute commit 1f7c6658962fa1260c1658d681bd6bb0c746b99a upstream. Cirrus HD-audio driver may adjust GPIO pins for EAPD dynamically depending on the jack plug state. This works fine for the auto-mute mode where the speaker gets muted upon the HP jack plug. OTOH, when the auto-mute mode is off, this turns off the EAPD unexpectedly depending on the jack state, which results in the silent speaker output. This patch fixes the silent speaker output issue by setting GPIO bits constantly when the auto-mute mode is off. Reported-and-tested-by: moosotc@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_cirrus.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index c1c855a6c0af..a47e8ae0eb30 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -174,8 +174,12 @@ static void cs_automute(struct hda_codec *codec) snd_hda_gen_update_outputs(codec); if (spec->gpio_eapd_hp || spec->gpio_eapd_speaker) { - spec->gpio_data = spec->gen.hp_jack_present ? - spec->gpio_eapd_hp : spec->gpio_eapd_speaker; + if (spec->gen.automute_speaker) + spec->gpio_data = spec->gen.hp_jack_present ? + spec->gpio_eapd_hp : spec->gpio_eapd_speaker; + else + spec->gpio_data = + spec->gpio_eapd_hp | spec->gpio_eapd_speaker; snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, spec->gpio_data); } From d2a70d6055c5da54d3f45805a19c37a693acd07f Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Wed, 27 Jan 2016 22:29:33 -0800 Subject: [PATCH 217/797] tools/hv: Use include/uapi with __EXPORTED_HEADERS__ commit 50fe6dd10069e7c062e27f29606f6e91ea979399 upstream. Use the local uapi headers to keep in sync with "recently" added #define's (e.g. VSS_OP_REGISTER1). Fixes: 3eb2094c59e8 ("Adding makefile for tools/hv") Signed-off-by: Kamal Mostafa Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- tools/hv/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/hv/Makefile b/tools/hv/Makefile index a8ab79556926..a8c4644022a6 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -5,6 +5,8 @@ PTHREAD_LIBS = -lpthread WARNINGS = -Wall -Wextra CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) $(shell getconf LFS_CFLAGS) +CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include + all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon %: %.c $(CC) $(CFLAGS) -o $@ $^ From 93272beafa9d9a5933590c90d2fa525e86e67032 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Wed, 9 Mar 2016 23:47:25 -0500 Subject: [PATCH 218/797] jbd2: fix FS corruption possibility in jbd2_journal_destroy() on umount path commit c0a2ad9b50dd80eeccd73d9ff962234590d5ec93 upstream. On umount path, jbd2_journal_destroy() writes latest transaction ID (->j_tail_sequence) to be used at next mount. The bug is that ->j_tail_sequence is not holding latest transaction ID in some cases. So, at next mount, there is chance to conflict with remaining (not overwritten yet) transactions. mount (id=10) write transaction (id=11) write transaction (id=12) umount (id=10) <= the bug doesn't write latest ID mount (id=10) write transaction (id=11) crash mount [recovery process] transaction (id=11) transaction (id=12) <= valid transaction ID, but old commit must not replay Like above, this bug become the cause of recovery failure, or FS corruption. So why ->j_tail_sequence doesn't point latest ID? Because if checkpoint transactions was reclaimed by memory pressure (i.e. bdev_try_to_free_page()), then ->j_tail_sequence is not updated. (And another case is, __jbd2_journal_clean_checkpoint_list() is called with empty transaction.) So in above cases, ->j_tail_sequence is not pointing latest transaction ID at umount path. Plus, REQ_FLUSH for checkpoint is not done too. So, to fix this problem with minimum changes, this patch updates ->j_tail_sequence, and issue REQ_FLUSH. (With more complex changes, some optimizations would be possible to avoid unnecessary REQ_FLUSH for example though.) BTW, journal->j_tail_sequence = ++journal->j_transaction_sequence; Increment of ->j_transaction_sequence seems to be unnecessary, but ext3 does this. Signed-off-by: OGAWA Hirofumi Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/journal.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 81e622681c82..624a57a9c4aa 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1408,11 +1408,12 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, /** * jbd2_mark_journal_empty() - Mark on disk journal as empty. * @journal: The journal to update. + * @write_op: With which operation should we write the journal sb * * Update a journal's dynamic superblock fields to show that journal is empty. * Write updated superblock to disk waiting for IO to complete. */ -static void jbd2_mark_journal_empty(journal_t *journal) +static void jbd2_mark_journal_empty(journal_t *journal, int write_op) { journal_superblock_t *sb = journal->j_superblock; @@ -1430,7 +1431,7 @@ static void jbd2_mark_journal_empty(journal_t *journal) sb->s_start = cpu_to_be32(0); read_unlock(&journal->j_state_lock); - jbd2_write_superblock(journal, WRITE_FUA); + jbd2_write_superblock(journal, write_op); /* Log is no longer empty */ write_lock(&journal->j_state_lock); @@ -1716,7 +1717,13 @@ int jbd2_journal_destroy(journal_t *journal) if (journal->j_sb_buffer) { if (!is_journal_aborted(journal)) { mutex_lock(&journal->j_checkpoint_mutex); - jbd2_mark_journal_empty(journal); + + write_lock(&journal->j_state_lock); + journal->j_tail_sequence = + ++journal->j_transaction_sequence; + write_unlock(&journal->j_state_lock); + + jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } else err = -EIO; @@ -1975,7 +1982,7 @@ int jbd2_journal_flush(journal_t *journal) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ - jbd2_mark_journal_empty(journal); + jbd2_mark_journal_empty(journal, WRITE_FUA); mutex_unlock(&journal->j_checkpoint_mutex); write_lock(&journal->j_state_lock); J_ASSERT(!journal->j_running_transaction); @@ -2021,7 +2028,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) if (write) { /* Lock to make assertions happy... */ mutex_lock(&journal->j_checkpoint_mutex); - jbd2_mark_journal_empty(journal); + jbd2_mark_journal_empty(journal, WRITE_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } From 1a8f4a490871df59d1f3cfe28fae4458d2cbef7f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 15 Dec 2015 16:38:22 +0100 Subject: [PATCH 219/797] brd: Fix discard request processing commit 5e4298be45e83ecdffaabb370eea9396889b07f1 upstream. Avoid that discard requests with size => PAGE_SIZE fail with -EIO. Refuse discard requests if the discard size is not a multiple of the page size. Fixes: 2dbe54957636 ("brd: Refuse improperly aligned discard requests") Signed-off-by: Bart Van Assche Reviewed-by: Jan Kara Cc: Christoph Hellwig Cc: Robert Elliot Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/brd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index a5880f4ab40e..1914c63ca8b1 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -338,7 +338,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) if (unlikely(bio->bi_rw & REQ_DISCARD)) { if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) || - bio->bi_iter.bi_size & PAGE_MASK) + bio->bi_iter.bi_size & ~PAGE_MASK) goto io_error; discard_from_brd(brd, sector, bio->bi_iter.bi_size); goto out; From 84512e476ce92fbdb60d4687e3ea230dbf0655c8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:03:09 -0800 Subject: [PATCH 220/797] IB/srpt: Simplify srpt_handle_tsk_mgmt() commit 51093254bf879bc9ce96590400a87897c7498463 upstream. Let the target core check task existence instead of the SRP target driver. Additionally, let the target core check the validity of the task management request instead of the ib_srpt driver. This patch fixes the following kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 IP: [] srpt_handle_new_iu+0x6d7/0x790 [ib_srpt] Oops: 0002 [#1] SMP Call Trace: [] srpt_process_completion+0xde/0x570 [ib_srpt] [] srpt_compl_thread+0x13f/0x160 [ib_srpt] [] kthread+0xcf/0xe0 [] ret_from_fork+0x7c/0xb0 Signed-off-by: Bart Van Assche Fixes: 3e4f574857ee ("ib_srpt: Convert TMR path to target_submit_tmr") Tested-by: Alex Estrin Reviewed-by: Christoph Hellwig Cc: Nicholas Bellinger Cc: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srpt/ib_srpt.c | 59 +-------------------------- 1 file changed, 1 insertion(+), 58 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 2e2fe818ca9f..eaabf3125846 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1737,47 +1737,6 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, return -1; } -/** - * srpt_rx_mgmt_fn_tag() - Process a task management function by tag. - * @ch: RDMA channel of the task management request. - * @fn: Task management function to perform. - * @req_tag: Tag of the SRP task management request. - * @mgmt_ioctx: I/O context of the task management request. - * - * Returns zero if the target core will process the task management - * request asynchronously. - * - * Note: It is assumed that the initiator serializes tag-based task management - * requests. - */ -static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag) -{ - struct srpt_device *sdev; - struct srpt_rdma_ch *ch; - struct srpt_send_ioctx *target; - int ret, i; - - ret = -EINVAL; - ch = ioctx->ch; - BUG_ON(!ch); - BUG_ON(!ch->sport); - sdev = ch->sport->sdev; - BUG_ON(!sdev); - spin_lock_irq(&sdev->spinlock); - for (i = 0; i < ch->rq_size; ++i) { - target = ch->ioctx_ring[i]; - if (target->cmd.se_lun == ioctx->cmd.se_lun && - target->cmd.tag == tag && - srpt_get_cmd_state(target) != SRPT_STATE_DONE) { - ret = 0; - /* now let the target core abort &target->cmd; */ - break; - } - } - spin_unlock_irq(&sdev->spinlock); - return ret; -} - static int srp_tmr_to_tcm(int fn) { switch (fn) { @@ -1812,7 +1771,6 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, struct se_cmd *cmd; struct se_session *sess = ch->sess; uint64_t unpacked_lun; - uint32_t tag = 0; int tcm_tmr; int rc; @@ -1828,25 +1786,10 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); send_ioctx->cmd.tag = srp_tsk->tag; tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); - if (tcm_tmr < 0) { - send_ioctx->cmd.se_tmr_req->response = - TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED; - goto fail; - } unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun, sizeof(srp_tsk->lun)); - - if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) { - rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag); - if (rc < 0) { - send_ioctx->cmd.se_tmr_req->response = - TMR_TASK_DOES_NOT_EXIST; - goto fail; - } - tag = srp_tsk->task_tag; - } rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun, - srp_tsk, tcm_tmr, GFP_KERNEL, tag, + srp_tsk, tcm_tmr, GFP_KERNEL, srp_tsk->task_tag, TARGET_SCF_ACK_KREF); if (rc != 0) { send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; From 32bb1185093c67c2280e440685ebc6b23fd47743 Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Fri, 26 Feb 2016 14:33:56 -0800 Subject: [PATCH 221/797] bcache: cleaned up error handling around register_cache() commit 9b299728ed777428b3908ac72ace5f8f84b97789 upstream. Fix null pointer dereference by changing register_cache() to return an int instead of being void. This allows it to return -ENOMEM or -ENODEV and enables upper layers to handle the OOM case without NULL pointer issues. See this thread: http://thread.gmane.org/gmane.linux.kernel.bcache.devel/3521 Fixes this error: gargamel:/sys/block/md5/bcache# echo /dev/sdh2 > /sys/fs/bcache/register bcache: register_cache() error opening sdh2: cannot allocate memory BUG: unable to handle kernel NULL pointer dereference at 00000000000009b8 IP: [] cache_set_flush+0x102/0x15c [bcache] PGD 120dff067 PUD 1119a3067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: veth ip6table_filter ip6_tables (...) CPU: 4 PID: 3371 Comm: kworker/4:3 Not tainted 4.4.2-amd64-i915-volpreempt-20160213bc1 #3 Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 3904 04/27/2013 Workqueue: events cache_set_flush [bcache] task: ffff88020d5dc280 ti: ffff88020b6f8000 task.ti: ffff88020b6f8000 RIP: 0010:[] [] cache_set_flush+0x102/0x15c [bcache] Signed-off-by: Eric Wheeler Tested-by: Marc MERLIN Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 8d0ead98eb6e..f3f98c3d7f67 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1828,11 +1828,12 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) return 0; } -static void register_cache(struct cache_sb *sb, struct page *sb_page, +static int register_cache(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cache *ca) { char name[BDEVNAME_SIZE]; - const char *err = "cannot allocate memory"; + const char *err = NULL; + int ret = 0; memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; @@ -1847,27 +1848,35 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page, if (blk_queue_discard(bdev_get_queue(ca->bdev))) ca->discard = CACHE_DISCARD(&ca->sb); - if (cache_alloc(sb, ca) != 0) + ret = cache_alloc(sb, ca); + if (ret != 0) goto err; - err = "error creating kobject"; - if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) - goto err; + if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) { + err = "error calling kobject_add"; + ret = -ENOMEM; + goto out; + } mutex_lock(&bch_register_lock); err = register_cache_set(ca); mutex_unlock(&bch_register_lock); - if (err) - goto err; + if (err) { + ret = -ENODEV; + goto out; + } pr_info("registered cache device %s", bdevname(bdev, name)); + out: kobject_put(&ca->kobj); - return; + err: - pr_notice("error opening %s: %s", bdevname(bdev, name), err); - goto out; + if (err) + pr_notice("error opening %s: %s", bdevname(bdev, name), err); + + return ret; } /* Global interfaces/init */ @@ -1965,7 +1974,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!ca) goto err_close; - register_cache(sb, sb_page, bdev, ca); + if (register_cache(sb, sb_page, bdev, ca) != 0) + goto err_close; } out: if (sb_page) From b58e781068d9a5fd6b0ee77f595c3dbaa0d2b7aa Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Fri, 26 Feb 2016 14:39:06 -0800 Subject: [PATCH 222/797] bcache: fix race of writeback thread starting before complete initialization commit 07cc6ef8edc47f8b4fc1e276d31127a0a5863d4d upstream. The bch_writeback_thread might BUG_ON in read_dirty() if dc->sb==BDEV_STATE_DIRTY and bch_sectors_dirty_init has not yet completed its related initialization. This patch downs the dc->writeback_lock until after initialization is complete, thus preventing bch_writeback_thread from proceeding prematurely. See this thread: http://thread.gmane.org/gmane.linux.kernel.bcache.devel/3453 Signed-off-by: Eric Wheeler Tested-by: Marc MERLIN Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f3f98c3d7f67..6b07a0c8c729 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1015,8 +1015,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) */ atomic_set(&dc->count, 1); - if (bch_cached_dev_writeback_start(dc)) + /* Block writeback thread, but spawn it */ + down_write(&dc->writeback_lock); + if (bch_cached_dev_writeback_start(dc)) { + up_write(&dc->writeback_lock); return -ENOMEM; + } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { bch_sectors_dirty_init(dc); @@ -1028,6 +1032,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev"); + /* Allow the writeback thread to proceed */ + up_write(&dc->writeback_lock); + pr_info("Caching %s as %s on set %pU", bdevname(dc->bdev, buf), dc->disk.disk->disk_name, dc->disk.c->sb.set_uuid); From ca75edc44088cc40792161fd2ca650c5cfe8ee9f Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Mon, 7 Mar 2016 15:17:50 -0800 Subject: [PATCH 223/797] bcache: fix cache_set_flush() NULL pointer dereference on OOM commit f8b11260a445169989d01df75d35af0f56178f95 upstream. When bch_cache_set_alloc() fails to kzalloc the cache_set, the asyncronous closure handling tries to dereference a cache_set that hadn't yet been allocated inside of cache_set_flush() which is called by __cache_set_unregister() during cleanup. This appears to happen only during an OOM condition on bcache_register. Signed-off-by: Eric Wheeler Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6b07a0c8c729..a296425a7270 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1373,6 +1373,9 @@ static void cache_set_flush(struct closure *cl) struct btree *b; unsigned i; + if (!c) + closure_return(cl); + bch_cache_accounting_destroy(&c->accounting); kobject_put(&c->internal); From 8b42fc47e1b64cb661fed8d96f874effbdf1d7f1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 17 Mar 2016 14:20:25 -0700 Subject: [PATCH 224/797] mm: memcontrol: reclaim when shrinking memory.high below usage commit 588083bb37a3cea8533c392370a554417c8f29cb upstream. When setting memory.high below usage, nothing happens until the next charge comes along, and then it will only reclaim its own charge and not the now potentially huge excess of the new memory.high. This can cause groups to stay in excess of their memory.high indefinitely. To fix that, when shrinking memory.high, kick off a reclaim cycle that goes after the delta. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ee6acd279953..5d081ff28663 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5121,6 +5121,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long nr_pages; unsigned long high; int err; @@ -5131,6 +5132,11 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, memcg->high = high; + nr_pages = page_counter_read(&memcg->memory); + if (nr_pages > high) + try_to_free_mem_cgroup_pages(memcg, nr_pages - high, + GFP_KERNEL, true); + memcg_wb_domain_size_changed(memcg); return nbytes; } From 0ccab5b139971a2a3f48df24d1ee8be2dbf84042 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 17 Mar 2016 14:20:28 -0700 Subject: [PATCH 225/797] mm: memcontrol: reclaim and OOM kill when shrinking memory.max below usage commit b6e6edcfa40561e9c8abe5eecf1c96f8e5fd9c6f upstream. Setting the original memory.limit_in_bytes hardlimit is subject to a race condition when the desired value is below the current usage. The code tries a few times to first reclaim and then see if the usage has dropped to where we would like it to be, but there is no locking, and the workload is free to continue making new charges up to the old limit. Thus, attempting to shrink a workload relies on pure luck and hope that the workload happens to cooperate. To fix this in the cgroup2 memory.max knob, do it the other way round: set the limit first, then try enforcement. And if reclaim is not able to succeed, trigger OOM kills in the group. Keep going until the new limit is met, we run out of OOM victims and there's only unreclaimable memory left, or the task writing to memory.max is killed. This allows users to shrink groups reliably, and the behavior is consistent with what happens when new charges are attempted in excess of memory.max. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5d081ff28663..fc0bcc41d57f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1332,7 +1332,7 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) return limit; } -static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, +static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order) { struct oom_control oc = { @@ -1410,6 +1410,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, } unlock: mutex_unlock(&oom_lock); + return chosen; } #if MAX_NUMNODES > 1 @@ -5158,6 +5159,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned int nr_reclaims = MEM_CGROUP_RECLAIM_RETRIES; + bool drained = false; unsigned long max; int err; @@ -5166,9 +5169,36 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, if (err) return err; - err = mem_cgroup_resize_limit(memcg, max); - if (err) - return err; + xchg(&memcg->memory.limit, max); + + for (;;) { + unsigned long nr_pages = page_counter_read(&memcg->memory); + + if (nr_pages <= max) + break; + + if (signal_pending(current)) { + err = -EINTR; + break; + } + + if (!drained) { + drain_all_stock(memcg); + drained = true; + continue; + } + + if (nr_reclaims) { + if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max, + GFP_KERNEL, true)) + nr_reclaims--; + continue; + } + + mem_cgroup_events(memcg, MEMCG_OOM, 1); + if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0)) + break; + } memcg_wb_domain_size_changed(memcg); return nbytes; From ee52f62c636c0c151ea92ffbf5ef940be51b0d22 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 17 Mar 2016 14:17:16 -0700 Subject: [PATCH 226/797] ia64: define ioremap_uc() commit b0f84ac352762ed02d7ea9f284942a8cab7f9077 upstream. All architectures now need ioremap_uc(), ia64 seems defines this already through its ioremap_nocache() and it already ensures it *only* uses UC. This is needed since v4.3 to complete an allyesconfig compile on ia64, there were others archs that needed this, and this one seems to have fallen through the cracks. Signed-off-by: Luis R. Rodriguez Reported-by: kbuild test robot Acked-by: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/ia64/include/asm/io.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index 9041bbe2b7b4..8fdb9c7eeb66 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -436,6 +436,7 @@ static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned lo return ioremap(phys_addr, size); } #define ioremap_cache ioremap_cache +#define ioremap_uc ioremap_nocache /* From 5f4a82d5e3492c26fb0263ca7f007180612e8b54 Mon Sep 17 00:00:00 2001 From: Joshua Hunt Date: Thu, 17 Mar 2016 14:17:23 -0700 Subject: [PATCH 227/797] watchdog: don't run proc_watchdog_update if new value is same as old commit a1ee1932aa6bea0bb074f5e3ced112664e4637ed upstream. While working on a script to restore all sysctl params before a series of tests I found that writing any value into the /proc/sys/kernel/{nmi_watchdog,soft_watchdog,watchdog,watchdog_thresh} causes them to call proc_watchdog_update(). NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter. NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter. NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter. NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter. There doesn't appear to be a reason for doing this work every time a write occurs, so only do it when the values change. Signed-off-by: Josh Hunt Acked-by: Don Zickus Reviewed-by: Aaron Tomlin Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/watchdog.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 18f34cf75f74..198137b1cadc 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -907,6 +907,9 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, * both lockup detectors are disabled if proc_watchdog_update() * returns an error. */ + if (old == new) + goto out; + err = proc_watchdog_update(); } out: @@ -951,7 +954,7 @@ int proc_soft_watchdog(struct ctl_table *table, int write, int proc_watchdog_thresh(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int err, old; + int err, old, new; get_online_cpus(); mutex_lock(&watchdog_proc_mutex); @@ -971,6 +974,10 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, /* * Update the sample period. Restore on failure. */ + new = ACCESS_ONCE(watchdog_thresh); + if (old == new) + goto out; + set_sample_period(); err = proc_watchdog_update(); if (err) { From 6a4cdbf56201f983f8177e3845fcb47d25d4fcb0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Feb 2016 17:44:09 +0200 Subject: [PATCH 228/797] watchdog: rc32434_wdt: fix ioctl error handling commit 10e7ac22cdd4d211cef99afcb9371b70cb175be6 upstream. Calling return copy_to_user(...) in an ioctl will not do the right thing if there's a pagefault: copy_to_user returns the number of bytes not copied in this case. Fix up watchdog/rc32434_wdt to do return copy_to_user(...)) ? -EFAULT : 0; instead. Signed-off-by: Michael S. Tsirkin Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/rc32434_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/rc32434_wdt.c b/drivers/watchdog/rc32434_wdt.c index 71e78ef4b736..3a75f3b53452 100644 --- a/drivers/watchdog/rc32434_wdt.c +++ b/drivers/watchdog/rc32434_wdt.c @@ -237,7 +237,7 @@ static long rc32434_wdt_ioctl(struct file *file, unsigned int cmd, return -EINVAL; /* Fall through */ case WDIOC_GETTIMEOUT: - return copy_to_user(argp, &timeout, sizeof(int)); + return copy_to_user(argp, &timeout, sizeof(int)) ? -EFAULT : 0; default: return -ENOTTY; } From 7435429a8a7f3a83f760a1f72958ee53db408e27 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Wed, 10 Feb 2016 00:49:11 +0300 Subject: [PATCH 229/797] Bluetooth: Add new AR3012 ID 0489:e095 commit 28c971d82fb58ef7cba22e5308be6d2d2590473d upstream. T: Bus=01 Lev=01 Prnt=01 Port=04 Cnt=02 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0489 ProdID=e095 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb This device requires ar3k/AthrBT_0x31010100.dfu and ar3k/ramps_0x31010100_40.dfu firmware files that are not in linux-firmware yet. BugLink: https://bugs.launchpad.net/bugs/1542944 Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 0c4a748fef7a..0beaa52df66b 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -82,6 +82,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0489, 0xe05f) }, { USB_DEVICE(0x0489, 0xe076) }, { USB_DEVICE(0x0489, 0xe078) }, + { USB_DEVICE(0x0489, 0xe095) }, { USB_DEVICE(0x04c5, 0x1330) }, { USB_DEVICE(0x04CA, 0x3004) }, { USB_DEVICE(0x04CA, 0x3005) }, @@ -147,6 +148,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe076), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe078), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe095), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 342ec8d203e3..79107597a594 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -196,6 +196,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe076), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe078), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe095), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 }, From f0de3cec40ee0f365751e11a477e47dd5568c34e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 11 Mar 2016 09:56:33 +0200 Subject: [PATCH 230/797] Bluetooth: Fix potential buffer overflow with Add Advertising commit 6a0e78072c2ae7b20b14e0249d8108441ea928d2 upstream. The Add Advertising command handler does the appropriate checks for the AD and Scan Response data, however fails to take into account the general length of the mgmt command itself, which could lead to potential buffer overflows. This patch adds the necessary check that the mgmt command length is consistent with the given ad and scan_rsp lengths. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7f22119276f3..b1b0a1c0bd8d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7155,6 +7155,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, status); + if (data_len != sizeof(*cp) + cp->adv_data_len + cp->scan_rsp_len) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + flags = __le32_to_cpu(cp->flags); timeout = __le16_to_cpu(cp->timeout); duration = __le16_to_cpu(cp->duration); From 36591ef19ab6e82cfb3580880ba1fefd843aa8ed Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Mar 2016 20:43:04 -0400 Subject: [PATCH 231/797] cgroup: ignore css_sets associated with dead cgroups during migration commit 2b021cbf3cb6208f0d40fd2f1869f237934340ed upstream. Before 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups"), all dead tasks were associated with init_css_set. If a zombie task is requested for migration, while migration prep operations would still be performed on init_css_set, the actual migration would ignore zombie tasks. As init_css_set is always valid, this worked fine. However, after 2e91fa7f6d45, zombie tasks stay with the css_set it was associated with at the time of death. Let's say a task T associated with cgroup A on hierarchy H-1 and cgroup B on hiearchy H-2. After T becomes a zombie, it would still remain associated with A and B. If A only contains zombie tasks, it can be removed. On removal, A gets marked offline but stays pinned until all zombies are drained. At this point, if migration is initiated on T to a cgroup C on hierarchy H-2, migration path would try to prepare T's css_set for migration and trigger the following. WARNING: CPU: 0 PID: 1576 at kernel/cgroup.c:474 cgroup_get+0x121/0x160() CPU: 0 PID: 1576 Comm: bash Not tainted 4.4.0-work+ #289 ... Call Trace: [] dump_stack+0x4e/0x82 [] warn_slowpath_common+0x78/0xb0 [] warn_slowpath_null+0x15/0x20 [] cgroup_get+0x121/0x160 [] link_css_set+0x7b/0x90 [] find_css_set+0x3bc/0x5e0 [] cgroup_migrate_prepare_dst+0x89/0x1f0 [] cgroup_attach_task+0x157/0x230 [] __cgroup_procs_write+0x2b7/0x470 [] cgroup_tasks_write+0xc/0x10 [] cgroup_file_write+0x30/0x1b0 [] kernfs_fop_write+0x13c/0x180 [] __vfs_write+0x23/0xe0 [] vfs_write+0xa4/0x1a0 [] SyS_write+0x44/0xa0 [] entry_SYSCALL_64_fastpath+0x12/0x6f It doesn't make sense to prepare migration for css_sets pointing to dead cgroups as they are guaranteed to contain only zombies which are ignored later during migration. This patch makes cgroup destruction path mark all affected css_sets as dead and updates the migration path to ignore them during preparation. Signed-off-by: Tejun Heo Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups") Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup-defs.h | 3 +++ kernel/cgroup.c | 20 ++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8e30faeab183..a7c7f74808a4 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -216,6 +216,9 @@ struct css_set { /* all css_task_iters currently walking this cset */ struct list_head task_iters; + /* dead and being drained, ignore for migration */ + bool dead; + /* For RCU-protected deletion */ struct rcu_head rcu_head; }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fb1ecfd2decd..dc94f8beb097 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2498,6 +2498,14 @@ static void cgroup_migrate_add_src(struct css_set *src_cset, lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&css_set_lock); + /* + * If ->dead, @src_set is associated with one or more dead cgroups + * and doesn't contain any migratable tasks. Ignore it early so + * that the rest of migration path doesn't get confused by it. + */ + if (src_cset->dead) + return; + src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); if (!list_empty(&src_cset->mg_preload_node)) @@ -5131,6 +5139,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) __releases(&cgroup_mutex) __acquires(&cgroup_mutex) { struct cgroup_subsys_state *css; + struct cgrp_cset_link *link; int ssid; lockdep_assert_held(&cgroup_mutex); @@ -5151,11 +5160,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) return -EBUSY; /* - * Mark @cgrp dead. This prevents further task migration and child - * creation by disabling cgroup_lock_live_group(). + * Mark @cgrp and the associated csets dead. The former prevents + * further task migration and child creation by disabling + * cgroup_lock_live_group(). The latter makes the csets ignored by + * the migration path. */ cgrp->self.flags &= ~CSS_ONLINE; + spin_lock_bh(&css_set_lock); + list_for_each_entry(link, &cgrp->cset_links, cset_link) + link->cset->dead = true; + spin_unlock_bh(&css_set_lock); + /* initiate massacre of all css's */ for_each_css(css, ssid, cgrp) kill_css(css); From 9b4a50fae597168ed1eb10e0ca60e73ac649963f Mon Sep 17 00:00:00 2001 From: Dmitri Epshtein Date: Sat, 12 Mar 2016 18:44:18 +0100 Subject: [PATCH 232/797] net: mvneta: enable change MAC address when interface is up commit 928b6519afeb2a5e2dc61154380b545ed66c476a upstream. Function eth_prepare_mac_addr_change() is called as part of MAC address change. This function check if interface is running. To enable change MAC address when interface is running: IFF_LIVE_ADDR_CHANGE flag must be set to dev->priv_flags field Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: Dmitri Epshtein Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ed622fa29dfa..a4ac6fedac75 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3404,7 +3404,7 @@ static int mvneta_probe(struct platform_device *pdev) dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; dev->hw_features |= dev->features; dev->vlan_features |= dev->features; - dev->priv_flags |= IFF_UNICAST_FLT; + dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; dev->gso_max_segs = MVNETA_MAX_TSO_SEGS; err = register_netdev(dev); From da191fac4bfa7ba6372e10365591a2759a3298ea Mon Sep 17 00:00:00 2001 From: Vinayak Menon Date: Mon, 22 Feb 2016 19:15:44 +0530 Subject: [PATCH 233/797] of: alloc anywhere from memblock if range not specified commit e53b50c0cbe392c946807abf7d07615a3c588642 upstream. early_init_dt_alloc_reserved_memory_arch passes end as 0 to __memblock_alloc_base, when limits are not specified. But __memblock_alloc_base takes end value of 0 as MEMBLOCK_ALLOC_ACCESSIBLE and limits the end to memblock.current_limit. This results in regions never being placed in HIGHMEM area, for e.g. CMA. Let __memblock_alloc_base allocate from anywhere in memory if limits are not specified. Acked-by: Marek Szyprowski Signed-off-by: Vinayak Menon Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/of_reserved_mem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 1a3556a9e9ea..ed01c0172e4a 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -32,11 +32,13 @@ int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap, phys_addr_t *res_base) { + phys_addr_t base; /* * We use __memblock_alloc_base() because memblock_alloc_base() * panic()s on allocation failure. */ - phys_addr_t base = __memblock_alloc_base(size, align, end); + end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end; + base = __memblock_alloc_base(size, align, end); if (!base) return -ENOMEM; From 994f9db39113a268394fcba06537cabfbb40cb2b Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 19 Mar 2015 11:10:54 +0000 Subject: [PATCH 234/797] vfs: show_vfsstat: do not ignore errors from show_devname method commit 5f8d498d4364f544fee17125787a47553db02afa upstream. Explicitly check show_devname method return code and bail out in case of an error. This fixes regression introduced by commit 9d4d65748a5c. Signed-off-by: Dmitry V. Levin Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/proc_namespace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 8ebd9a334085..87645955990d 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -197,6 +197,8 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt) if (sb->s_op->show_devname) { seq_puts(m, "device "); err = sb->s_op->show_devname(m, mnt_path.dentry); + if (err) + goto out; } else { if (r->mnt_devname) { seq_puts(m, "device "); From ab14444f6f3dd3cd0a47ad4bcc35fed256d1e9a7 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 10 Mar 2016 21:19:06 +0100 Subject: [PATCH 235/797] splice: handle zero nr_pages in splice_to_pipe() commit d6785d9152147596f60234157da2b02540c3e60f upstream. Running the following command: busybox cat /sys/kernel/debug/tracing/trace_pipe > /dev/null with any tracing enabled pretty very quickly leads to various NULL pointer dereferences and VM BUG_ON()s, such as these: BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] generic_pipe_buf_release+0xc/0x40 Call Trace: [] splice_direct_to_actor+0x143/0x1e0 [] ? generic_pipe_buf_nosteal+0x10/0x10 [] do_splice_direct+0x8f/0xb0 [] do_sendfile+0x199/0x380 [] SyS_sendfile64+0x90/0xa0 [] entry_SYSCALL_64_fastpath+0x12/0x6d page dumped because: VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0) kernel BUG at include/linux/mm.h:367! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC RIP: [] generic_pipe_buf_release+0x3c/0x40 Call Trace: [] splice_direct_to_actor+0x143/0x1e0 [] ? generic_pipe_buf_nosteal+0x10/0x10 [] do_splice_direct+0x8f/0xb0 [] do_sendfile+0x199/0x380 [] SyS_sendfile64+0x90/0xa0 [] tracesys_phase2+0x84/0x89 (busybox's cat uses sendfile(2), unlike the coreutils version) This is because tracing_splice_read_pipe() can call splice_to_pipe() with spd->nr_pages == 0. spd_pages underflows in splice_to_pipe() and we fill the page pointers and the other fields of the pipe_buffers with garbage. All other callers of splice_to_pipe() avoid calling it when nr_pages == 0, and we could make tracing_splice_read_pipe() do that too, but it seems reasonable to have splice_to_page() handle this condition gracefully. Signed-off-by: Rabin Vincent Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/splice.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/splice.c b/fs/splice.c index 4cf700d50b40..0f77e9682857 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -185,6 +185,9 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, unsigned int spd_pages = spd->nr_pages; int ret, do_wakeup, page_nr; + if (!spd_pages) + return 0; + ret = 0; do_wakeup = 0; page_nr = 0; From f2e1e0a0afd3549c87c97ec2b3363b7325c6e3d5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 9 Feb 2016 01:02:38 +0300 Subject: [PATCH 236/797] xtensa: ISS: don't hang if stdin EOF is reached commit 362014c8d9d51d504c167c44ac280169457732be upstream. Simulator stdin may be connected to a file, when its end is reached kernel hangs in infinite loop inside rs_poll, because simc_poll always signals that descriptor 0 is readable and simc_read always returns 0. Check simc_read return value and exit loop if it's not positive. Also don't rewind polling timer if it's zero. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/platforms/iss/console.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 70cb408bc20d..92d785fefb6d 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -100,21 +100,23 @@ static void rs_poll(unsigned long priv) { struct tty_port *port = (struct tty_port *)priv; int i = 0; + int rd = 1; unsigned char c; spin_lock(&timer_lock); while (simc_poll(0)) { - simc_read(0, &c, 1); + rd = simc_read(0, &c, 1); + if (rd <= 0) + break; tty_insert_flip_char(port, c, TTY_NORMAL); i++; } if (i) tty_flip_buffer_push(port); - - - mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE); + if (rd) + mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE); spin_unlock(&timer_lock); } From cd8af682db6a51a5130f48b07d211c808f321997 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 25 Feb 2016 23:27:51 +0300 Subject: [PATCH 237/797] xtensa: fix preemption in {clear,copy}_user_highpage commit a67cc9aa2dfc6e66addf240bbd79e16e01565e81 upstream. Disabling pagefault makes little sense there, preemption disabling is what was meant. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/mm/cache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index d75aa1476da7..1a804a2f9a5b 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -97,11 +97,11 @@ void clear_user_highpage(struct page *page, unsigned long vaddr) unsigned long paddr; void *kvaddr = coherent_kvaddr(page, TLBTEMP_BASE_1, vaddr, &paddr); - pagefault_disable(); + preempt_disable(); kmap_invalidate_coherent(page, vaddr); set_bit(PG_arch_1, &page->flags); clear_page_alias(kvaddr, paddr); - pagefault_enable(); + preempt_enable(); } void copy_user_highpage(struct page *dst, struct page *src, @@ -113,11 +113,11 @@ void copy_user_highpage(struct page *dst, struct page *src, void *src_vaddr = coherent_kvaddr(src, TLBTEMP_BASE_2, vaddr, &src_paddr); - pagefault_disable(); + preempt_disable(); kmap_invalidate_coherent(dst, vaddr); set_bit(PG_arch_1, &dst->flags); copy_page_alias(dst_vaddr, src_vaddr, dst_paddr, src_paddr); - pagefault_enable(); + preempt_enable(); } #endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ From 1f841628aca5d4133ad2aba0e2f279c8ab65b9d8 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 3 Mar 2016 18:34:29 +0300 Subject: [PATCH 238/797] xtensa: clear all DBREAKC registers on start commit 7de7ac785ae18a2cdc78d7560f48e3213d9ea0ab upstream. There are XCHAL_NUM_DBREAK registers, clear them all. This also fixes cryptic assembler error message with binutils 2.25 when XCHAL_NUM_DBREAK is 0: as: out of memory allocating 18446744073709551575 bytes after a total of 495616 bytes Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 9ed55649ac8e..05e1df943856 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -128,7 +128,7 @@ ENTRY(_startup) wsr a0, icountlevel .set _index, 0 - .rept XCHAL_NUM_DBREAK - 1 + .rept XCHAL_NUM_DBREAK wsr a0, SREG_DBREAKC + _index .set _index, _index + 1 .endr From f640dae8943ecbcc9ee6710aec89bba594512336 Mon Sep 17 00:00:00 2001 From: Lada Trimasova Date: Wed, 9 Mar 2016 20:21:04 +0300 Subject: [PATCH 239/797] ARC: [BE] readl()/writel() to work in Big Endian CPU configuration commit f778cc65717687a3d3f26dd21bef62cd059f1b8b upstream. read{l,w}() write{l,w}() primitives should use le{16,32}_to_cpu() and cpu_to_le{16,32}() respectively to ensure device registers are read correctly in Big Endian CPU configuration. Per Arnd Bergmann | Most drivers using readl() or readl_relaxed() expect those to perform byte | swaps on big-endian architectures, as the registers tend to be fixed endian This was needed for getting UART to work correctly on a Big Endian ARC. The ARC accessors originally were fine, and the bug got introduced inadventently by commit b8a033023994 ("ARCv2: barriers") Fixes: b8a033023994 ("ARCv2: barriers") Link: http://lkml.kernel.org/r/201603100845.30602.arnd@arndb.de Cc: Alexey Brodkin Cc: Arnd Bergmann Signed-off-by: Lada Trimasova [vgupta: beefed up changelog, added Fixes/stable tags] Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/io.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index 694ece8a0243..27b17adea50d 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -129,15 +129,23 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) #define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); }) /* - * Relaxed API for drivers which can handle any ordering themselves + * Relaxed API for drivers which can handle barrier ordering themselves + * + * Also these are defined to perform little endian accesses. + * To provide the typical device register semantics of fixed endian, + * swap the byte order for Big Endian + * + * http://lkml.kernel.org/r/201603100845.30602.arnd@arndb.de */ #define readb_relaxed(c) __raw_readb(c) -#define readw_relaxed(c) __raw_readw(c) -#define readl_relaxed(c) __raw_readl(c) +#define readw_relaxed(c) ({ u16 __r = le16_to_cpu((__force __le16) \ + __raw_readw(c)); __r; }) +#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32) \ + __raw_readl(c)); __r; }) #define writeb_relaxed(v,c) __raw_writeb(v,c) -#define writew_relaxed(v,c) __raw_writew(v,c) -#define writel_relaxed(v,c) __raw_writel(v,c) +#define writew_relaxed(v,c) __raw_writew((__force u16) cpu_to_le16(v),c) +#define writel_relaxed(v,c) __raw_writel((__force u32) cpu_to_le32(v),c) #include From f3c5b82c36e98876ab507d3bc062100eecaba158 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 8 Mar 2016 19:31:24 +0530 Subject: [PATCH 240/797] ARC: bitops: Remove non relevant comments commit 2a41b6dc28dc71c1a3f1622612a26edc58f7561e upstream. commit 80f420842ff42 removed the ARC bitops microoptimization but failed to prune the comments to same effect Fixes: 80f420842ff42 ("ARC: Make ARC bitops "safer" (add anti-optimization)") Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/bitops.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 57c1f33844d4..0352fb8d21b9 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -35,21 +35,6 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ \ m += nr >> 5; \ \ - /* \ - * ARC ISA micro-optimization: \ - * \ - * Instructions dealing with bitpos only consider lower 5 bits \ - * e.g (x << 33) is handled like (x << 1) by ASL instruction \ - * (mem pointer still needs adjustment to point to next word) \ - * \ - * Hence the masking to clamp @nr arg can be elided in general. \ - * \ - * However if @nr is a constant (above assumed in a register), \ - * and greater than 31, gcc can optimize away (x << 33) to 0, \ - * as overflow, given the 32-bit ISA. Thus masking needs to be \ - * done for const @nr, but no code is generated due to gcc \ - * const prop. \ - */ \ nr &= 0x1f; \ \ __asm__ __volatile__( \ From 6b3ae95f03d9ff3877549e601c88f7650e3ada29 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 3 Mar 2016 10:54:57 +0100 Subject: [PATCH 241/797] quota: Fix possible GPF due to uninitialised pointers commit ab73ef46398e2c0159f3a71de834586422d2a44a upstream. When dqget() in __dquot_initialize() fails e.g. due to IO error, __dquot_initialize() will pass an array of uninitialized pointers to dqput_all() and thus can lead to deference of random data. Fix the problem by properly initializing the array. Signed-off-by: Nikolay Borisov Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/quota/dquot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index ef0d64b2a6d9..353ff31dcee1 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1398,7 +1398,7 @@ static int dquot_active(const struct inode *inode) static int __dquot_initialize(struct inode *inode, int type) { int cnt, init_needed = 0; - struct dquot **dquots, *got[MAXQUOTAS]; + struct dquot **dquots, *got[MAXQUOTAS] = {}; struct super_block *sb = inode->i_sb; qsize_t rsv; int ret = 0; @@ -1415,7 +1415,6 @@ static int __dquot_initialize(struct inode *inode, int type) int rc; struct dquot *dquot; - got[cnt] = NULL; if (type != -1 && cnt != type) continue; /* From b34291f71d0fb3d09728fd43d6da350ff6a179d6 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 2 Mar 2016 09:51:09 +1100 Subject: [PATCH 242/797] xfs: fix two memory leaks in xfs_attr_list.c error paths commit 2e83b79b2d6c78bf1b4aa227938a214dcbddc83f upstream. This plugs 2 trivial leaks in xfs_attr_shortform_list and xfs_attr3_leaf_list_int. Signed-off-by: Mateusz Guzik Reviewed-by: Eric Sandeen Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_attr_list.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 0ef7c2ed3f8a..4fa14820e2e2 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -202,8 +202,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) sbp->namelen, sbp->valuelen, &sbp->name[sbp->namelen]); - if (error) + if (error) { + kmem_free(sbuf); return error; + } if (context->seen_enough) break; cursor->offset++; @@ -454,14 +456,13 @@ xfs_attr3_leaf_list_int( args.rmtblkcnt = xfs_attr3_rmt_blocks( args.dp->i_mount, valuelen); retval = xfs_attr_rmtval_get(&args); - if (retval) - return retval; - retval = context->put_listent(context, - entry->flags, - name_rmt->name, - (int)name_rmt->namelen, - valuelen, - args.value); + if (!retval) + retval = context->put_listent(context, + entry->flags, + name_rmt->name, + (int)name_rmt->namelen, + valuelen, + args.value); kmem_free(args.value); } else { retval = context->put_listent(context, From 2b9eb2b2234c44c509384549a023c3f3bde5c59a Mon Sep 17 00:00:00 2001 From: Nate Dailey Date: Mon, 29 Feb 2016 10:43:58 -0500 Subject: [PATCH 243/797] raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang commit ccfc7bf1f09d6190ef86693ddc761d5fe3fa47cb upstream. If raid1d is handling a mix of read and write errors, handle_read_error's call to freeze_array can get stuck. This can happen because, though the bio_end_io_list is initially drained, writes can be added to it via handle_write_finished as the retry_list is processed. These writes contribute to nr_pending but are not included in nr_queued. If a later entry on the retry_list triggers a call to handle_read_error, freeze array hangs waiting for nr_pending == nr_queued+extra. The writes on the bio_end_io_list aren't included in nr_queued so the condition will never be satisfied. To prevent the hang, include bio_end_io_list writes in nr_queued. There's probably a better way to handle decrementing nr_queued, but this seemed like the safest way to avoid breaking surrounding code. I'm happy to supply the script I used to repro this hang. Fixes: 55ce74d4bfe1b(md/raid1: ensure device failure recorded before write request returns.) Signed-off-by: Nate Dailey Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c4b913409226..515554c7365b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2274,6 +2274,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) if (fail) { spin_lock_irq(&conf->device_lock); list_add(&r1_bio->retry_list, &conf->bio_end_io_list); + conf->nr_queued++; spin_unlock_irq(&conf->device_lock); md_wakeup_thread(conf->mddev->thread); } else { @@ -2391,8 +2392,10 @@ static void raid1d(struct md_thread *thread) LIST_HEAD(tmp); spin_lock_irqsave(&conf->device_lock, flags); if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { - list_add(&tmp, &conf->bio_end_io_list); - list_del_init(&conf->bio_end_io_list); + while (!list_empty(&conf->bio_end_io_list)) { + list_move(conf->bio_end_io_list.prev, &tmp); + conf->nr_queued--; + } } spin_unlock_irqrestore(&conf->device_lock, flags); while (!list_empty(&tmp)) { From fad8b6fc040bf1d9807ee0e3358275d06b14cf96 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 16 Feb 2016 16:44:24 -0500 Subject: [PATCH 244/797] md/raid5: Compare apples to apples (or sectors to sectors) commit e7597e69dec59b65c5525db1626b9d34afdfa678 upstream. 'max_discard_sectors' is in sectors, while 'stripe' is in bytes. This fixes the problem where DISCARD would get disabled on some larger RAID5 configurations (6 or more drives in my testing), while it worked as expected with smaller configurations. Fixes: 620125f2bf8 ("MD: raid5 trim support") Signed-off-by: Jes Sorensen Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 704ef7fcfbf8..c13921adec93 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7015,8 +7015,8 @@ static int run(struct mddev *mddev) } if (discard_supported && - mddev->queue->limits.max_discard_sectors >= stripe && - mddev->queue->limits.discard_granularity >= stripe) + mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && + mddev->queue->limits.discard_granularity >= stripe) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); else From 8568767fe1d63fbbdeacf2f69da6f84ad277d4f7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 24 Feb 2016 17:38:28 -0800 Subject: [PATCH 245/797] RAID5: check_reshape() shouldn't call mddev_suspend commit 27a353c026a879a1001e5eac4bda75b16262c44a upstream. check_reshape() is called from raid5d thread. raid5d thread shouldn't call mddev_suspend(), because mddev_suspend() waits for all IO finish but IO is handled in raid5d thread, we could easily deadlock here. This issue is introduced by 738a273 ("md/raid5: fix allocation of 'scribble' array.") Reported-and-tested-by: Artur Paszkiewicz Reviewed-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 18 ++++++++++++++++++ drivers/md/raid5.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c13921adec93..d3e747cd0b34 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2091,6 +2091,14 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) unsigned long cpu; int err = 0; + /* + * Never shrink. And mddev_suspend() could deadlock if this is called + * from raid5d. In that case, scribble_disks and scribble_sectors + * should equal to new_disks and new_sectors + */ + if (conf->scribble_disks >= new_disks && + conf->scribble_sectors >= new_sectors) + return 0; mddev_suspend(conf->mddev); get_online_cpus(); for_each_present_cpu(cpu) { @@ -2112,6 +2120,10 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) } put_online_cpus(); mddev_resume(conf->mddev); + if (!err) { + conf->scribble_disks = new_disks; + conf->scribble_sectors = new_sectors; + } return err; } @@ -6414,6 +6426,12 @@ static int raid5_alloc_percpu(struct r5conf *conf) } put_online_cpus(); + if (!err) { + conf->scribble_disks = max(conf->raid_disks, + conf->previous_raid_disks); + conf->scribble_sectors = max(conf->chunk_sectors, + conf->prev_chunk_sectors); + } return err; } diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index a415e1cd39b8..ae6068deefdf 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -510,6 +510,8 @@ struct r5conf { * conversions */ } __percpu *percpu; + int scribble_disks; + int scribble_sectors; #ifdef CONFIG_HOTPLUG_CPU struct notifier_block cpu_notify; #endif From ad072f606586a70eab8fd168465a6df4c695a9e9 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 25 Feb 2016 16:24:42 -0800 Subject: [PATCH 246/797] RAID5: revert e9e4c377e2f563 to fix a livelock commit 6ab2a4b806ae21b6c3e47c5ff1285ec06d505325 upstream. Revert commit e9e4c377e2f563(md/raid5: per hash value and exclusive wait_for_stripe) The problem is raid5_get_active_stripe waits on conf->wait_for_stripe[hash]. Assume hash is 0. My test release stripes in this order: - release all stripes with hash 0 - raid5_get_active_stripe still sleeps since active_stripes > max_nr_stripes * 3 / 4 - release all stripes with hash other than 0. active_stripes becomes 0 - raid5_get_active_stripe still sleeps, since nobody wakes up wait_for_stripe[0] The system live locks. The problem is active_stripes isn't a per-hash count. Revert the patch makes the live lock go away. Cc: Yuanhan Liu Cc: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 27 ++++++++------------------- drivers/md/raid5.h | 2 +- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d3e747cd0b34..22bac0188af6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -340,8 +340,7 @@ static void release_inactive_stripe_list(struct r5conf *conf, int hash) { int size; - unsigned long do_wakeup = 0; - int i = 0; + bool do_wakeup = false; unsigned long flags; if (hash == NR_STRIPE_HASH_LOCKS) { @@ -362,19 +361,15 @@ static void release_inactive_stripe_list(struct r5conf *conf, !list_empty(list)) atomic_dec(&conf->empty_inactive_list_nr); list_splice_tail_init(list, conf->inactive_list + hash); - do_wakeup |= 1 << hash; + do_wakeup = true; spin_unlock_irqrestore(conf->hash_locks + hash, flags); } size--; hash--; } - for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { - if (do_wakeup & (1 << i)) - wake_up(&conf->wait_for_stripe[i]); - } - if (do_wakeup) { + wake_up(&conf->wait_for_stripe); if (atomic_read(&conf->active_stripes) == 0) wake_up(&conf->wait_for_quiescent); if (conf->retry_read_aligned) @@ -687,15 +682,14 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, if (!sh) { set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); - wait_event_exclusive_cmd( - conf->wait_for_stripe[hash], + wait_event_lock_irq( + conf->wait_for_stripe, !list_empty(conf->inactive_list + hash) && (atomic_read(&conf->active_stripes) < (conf->max_nr_stripes * 3 / 4) || !test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)), - spin_unlock_irq(conf->hash_locks + hash), - spin_lock_irq(conf->hash_locks + hash)); + *(conf->hash_locks + hash)); clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); } else { @@ -720,9 +714,6 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, } } while (sh == NULL); - if (!list_empty(conf->inactive_list + hash)) - wake_up(&conf->wait_for_stripe[hash]); - spin_unlock_irq(conf->hash_locks + hash); return sh; } @@ -2204,7 +2195,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) cnt = 0; list_for_each_entry(nsh, &newstripes, lru) { lock_device_hash_lock(conf, hash); - wait_event_exclusive_cmd(conf->wait_for_stripe[hash], + wait_event_cmd(conf->wait_for_stripe, !list_empty(conf->inactive_list + hash), unlock_device_hash_lock(conf, hash), lock_device_hash_lock(conf, hash)); @@ -6522,9 +6513,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) seqcount_init(&conf->gen_lock); mutex_init(&conf->cache_size_mutex); init_waitqueue_head(&conf->wait_for_quiescent); - for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { - init_waitqueue_head(&conf->wait_for_stripe[i]); - } + init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->hold_list); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index ae6068deefdf..517d4b68a1be 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -524,7 +524,7 @@ struct r5conf { atomic_t empty_inactive_list_nr; struct llist_head released_stripes; wait_queue_head_t wait_for_quiescent; - wait_queue_head_t wait_for_stripe[NR_STRIPE_HASH_LOCKS]; + wait_queue_head_t wait_for_stripe; wait_queue_head_t wait_for_overlap; unsigned long cache_state; #define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked, From b82ed7dc00f3472d4a7bca472e05ed9744881bc8 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Mar 2016 11:49:32 -0700 Subject: [PATCH 247/797] raid10: include bio_end_io_list in nr_queued to prevent freeze_array hang commit 23ddba80ebe836476bb2fa1f5ef305dd1c63dc0b upstream. This is the raid10 counterpart of the bug fixed by Nate (raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang) Fixes: 95af587e95(md/raid10: ensure device failure recorded before write request returns) Cc: Nate Dailey Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ce959b4ae4df..ebb0dd612ebd 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2664,6 +2664,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) if (fail) { spin_lock_irq(&conf->device_lock); list_add(&r10_bio->retry_list, &conf->bio_end_io_list); + conf->nr_queued++; spin_unlock_irq(&conf->device_lock); md_wakeup_thread(conf->mddev->thread); } else { @@ -2691,8 +2692,10 @@ static void raid10d(struct md_thread *thread) LIST_HEAD(tmp); spin_lock_irqsave(&conf->device_lock, flags); if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { - list_add(&tmp, &conf->bio_end_io_list); - list_del_init(&conf->bio_end_io_list); + while (!list_empty(&conf->bio_end_io_list)) { + list_move(conf->bio_end_io_list.prev, &tmp); + conf->nr_queued--; + } } spin_unlock_irqrestore(&conf->device_lock, flags); while (!list_empty(&tmp)) { From d5e30f2b934bf9964e1bf5fb1c1bd4d1bc865b5d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 Mar 2016 12:58:25 +1100 Subject: [PATCH 248/797] md/raid5: preserve STRIPE_PREREAD_ACTIVE in break_stripe_batch_list commit 550da24f8d62fe81f3c13e3ec27602d6e44d43dc upstream. break_stripe_batch_list breaks up a batch and copies some flags from the batch head to the members, preserving others. It doesn't preserve or copy STRIPE_PREREAD_ACTIVE. This is not normally a problem as STRIPE_PREREAD_ACTIVE is cleared when a stripe_head is added to a batch, and is not set on stripe_heads already in a batch. However there is no locking to ensure one thread doesn't set the flag after it has just been cleared in another. This does occasionally happen. md/raid5 maintains a count of the number of stripe_heads with STRIPE_PREREAD_ACTIVE set: conf->preread_active_stripes. When break_stripe_batch_list clears STRIPE_PREREAD_ACTIVE inadvertently this could becomes incorrect and will never again return to zero. md/raid5 delays the handling of some stripe_heads until preread_active_stripes becomes zero. So when the above mention race happens, those stripe_heads become blocked and never progress, resulting is write to the array handing. So: change break_stripe_batch_list to preserve STRIPE_PREREAD_ACTIVE in the members of a batch. URL: https://bugzilla.kernel.org/show_bug.cgi?id=108741 URL: https://bugzilla.redhat.com/show_bug.cgi?id=1258153 URL: http://thread.gmane.org/5649C0E9.2030204@zoner.cz Reported-by: Martin Svec (and others) Tested-by: Tom Weber Fixes: 1b956f7a8f9a ("md/raid5: be more selective about distributing flags across batch.") Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 22bac0188af6..10ce885445f6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4241,7 +4241,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | (1 << STRIPE_SYNCING) | (1 << STRIPE_REPLACED) | - (1 << STRIPE_PREREAD_ACTIVE) | (1 << STRIPE_DELAYED) | (1 << STRIPE_BIT_DELAY) | (1 << STRIPE_FULL_WRITE) | @@ -4256,6 +4255,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, (1 << STRIPE_REPLACED))); set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | + (1 << STRIPE_PREREAD_ACTIVE) | (1 << STRIPE_DEGRADED)), head_sh->state & (1 << STRIPE_INSYNC)); From 2775a60447ae12350711b443be1083117b6f13b8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Mar 2016 09:29:40 +0800 Subject: [PATCH 249/797] md: multipath: don't hardcopy bio in .make_request path commit fafcde3ac1a418688a734365203a12483b83907a upstream. Inside multipath_make_request(), multipath maps the incoming bio into low level device's bio, but it is totally wrong to copy the bio into mapped bio via '*mapped_bio = *bio'. For example, .__bi_remaining is kept in the copy, especially if the incoming bio is chained to via bio splitting, so .bi_end_io can't be called for the mapped bio at all in the completing path in this kind of situation. This patch fixes the issue by using clone style. Reported-and-tested-by: Andrea Righi Signed-off-by: Ming Lei Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/multipath.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 0a72ab6e6c20..dd483bb2e111 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -129,7 +129,9 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) } multipath = conf->multipaths + mp_bh->path; - mp_bh->bio = *bio; + bio_init(&mp_bh->bio); + __bio_clone_fast(&mp_bh->bio, bio); + mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; mp_bh->bio.bi_bdev = multipath->rdev->bdev; mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT; From 32b9807433507058fce25092a0d11abac494d00d Mon Sep 17 00:00:00 2001 From: Robert Doebbelin Date: Mon, 7 Mar 2016 09:50:56 +0100 Subject: [PATCH 250/797] fuse: do not use iocb after it may have been freed commit 7cabc61e01a0a8b663bd2b4c982aa53048218734 upstream. There's a race in fuse_direct_IO(), whereby is_sync_kiocb() is called on an iocb that could have been freed if async io has already completed. The fix in this case is simple and obvious: cache the result before starting io. It was discovered by KASan: kernel: ================================================================== kernel: BUG: KASan: use after free in fuse_direct_IO+0xb1a/0xcc0 at addr ffff88036c414390 Signed-off-by: Robert Doebbelin Signed-off-by: Miklos Szeredi Fixes: bcba24ccdc82 ("fuse: enable asynchronous processing direct IO") Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 570ca4053c80..6991b5cc0056 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2786,6 +2786,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) loff_t i_size; size_t count = iov_iter_count(iter); struct fuse_io_priv *io; + bool is_sync = is_sync_kiocb(iocb); pos = offset; inode = file->f_mapping->host; @@ -2825,11 +2826,11 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) * to wait on real async I/O requests, so we must submit this request * synchronously. */ - if (!is_sync_kiocb(iocb) && (offset + count > i_size) && + if (!is_sync && (offset + count > i_size) && iov_iter_rw(iter) == WRITE) io->async = false; - if (io->async && is_sync_kiocb(iocb)) + if (io->async && is_sync) io->done = &wait; if (iov_iter_rw(iter) == WRITE) { @@ -2843,7 +2844,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) fuse_aio_complete(io, ret < 0 ? ret : 0, -1); /* we have a non-extending, async request, so return */ - if (!is_sync_kiocb(iocb)) + if (!is_sync) return -EIOCBQUEUED; wait_for_completion(&wait); From 37bd8c883ea5f3b90ae1788a1f76c93cb0dbeba5 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Fri, 11 Mar 2016 10:35:34 -0600 Subject: [PATCH 251/797] fuse: Add reference counting for fuse_io_priv commit 744742d692e37ad5c20630e57d526c8f2e2fe3c9 upstream. The 'reqs' member of fuse_io_priv serves two purposes. First is to track the number of oustanding async requests to the server and to signal that the io request is completed. The second is to be a reference count on the structure to know when it can be freed. For sync io requests these purposes can be at odds. fuse_direct_IO() wants to block until the request is done, and since the signal is sent when 'reqs' reaches 0 it cannot keep a reference to the object. Yet it needs to use the object after the userspace server has completed processing requests. This leads to some handshaking and special casing that it needlessly complicated and responsible for at least one race condition. It's much cleaner and safer to maintain a separate reference count for the object lifecycle and to let 'reqs' just be a count of outstanding requests to the userspace server. Then we can know for sure when it is safe to free the object without any handshaking or special cases. The catch here is that most of the time these objects are stack allocated and should not be freed. Initializing these objects with a single reference that is never released prevents accidental attempts to free the objects. Fixes: 9d5722b7777e ("fuse: handle synchronous iocbs internally") Signed-off-by: Seth Forshee Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/cuse.c | 4 ++-- fs/fuse/file.c | 28 +++++++++++++++++++++------- fs/fuse/fuse_i.h | 9 +++++++++ 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 8e3ee1936c7e..c5b6b7165489 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -90,7 +90,7 @@ static struct list_head *cuse_conntbl_head(dev_t devt) static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to) { - struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb->ki_filp); loff_t pos = 0; return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE); @@ -98,7 +98,7 @@ static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to) static ssize_t cuse_write_iter(struct kiocb *kiocb, struct iov_iter *from) { - struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb->ki_filp); loff_t pos = 0; /* * No locking or generic_write_checks(), the server is diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6991b5cc0056..c2e340d6ec6e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -528,6 +528,11 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } +static void fuse_io_release(struct kref *kref) +{ + kfree(container_of(kref, struct fuse_io_priv, refcnt)); +} + static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io) { if (io->err) @@ -585,8 +590,9 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) } io->iocb->ki_complete(io->iocb, res, 0); - kfree(io); } + + kref_put(&io->refcnt, fuse_io_release); } static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) @@ -613,6 +619,7 @@ static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req, size_t num_bytes, struct fuse_io_priv *io) { spin_lock(&io->lock); + kref_get(&io->refcnt); io->size += num_bytes; io->reqs++; spin_unlock(&io->lock); @@ -691,7 +698,7 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode, static int fuse_do_readpage(struct file *file, struct page *page) { - struct fuse_io_priv io = { .async = 0, .file = file }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file); struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; @@ -984,7 +991,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, size_t res; unsigned offset; unsigned i; - struct fuse_io_priv io = { .async = 0, .file = file }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file); for (i = 0; i < req->num_pages; i++) fuse_wait_on_page_writeback(inode, req->pages[i]->index); @@ -1398,7 +1405,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct fuse_io_priv io = { .async = 0, .file = iocb->ki_filp }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb->ki_filp); return __fuse_direct_read(&io, to, &iocb->ki_pos); } @@ -1406,7 +1413,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - struct fuse_io_priv io = { .async = 0, .file = file }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file); ssize_t res; if (is_bad_inode(inode)) @@ -2807,6 +2814,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) if (!io) return -ENOMEM; spin_lock_init(&io->lock); + kref_init(&io->refcnt); io->reqs = 1; io->bytes = -1; io->size = 0; @@ -2830,8 +2838,14 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) iov_iter_rw(iter) == WRITE) io->async = false; - if (io->async && is_sync) + if (io->async && is_sync) { + /* + * Additional reference to keep io around after + * calling fuse_aio_complete() + */ + kref_get(&io->refcnt); io->done = &wait; + } if (iov_iter_rw(iter) == WRITE) { ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); @@ -2851,7 +2865,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) ret = fuse_get_res_by_io(io); } - kfree(io); + kref_put(&io->refcnt, fuse_io_release); if (iov_iter_rw(iter) == WRITE) { if (ret > 0) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 405113101db8..604cd42dafef 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -22,6 +22,7 @@ #include #include #include +#include /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 @@ -243,6 +244,7 @@ struct fuse_args { /** The request IO state (for asynchronous processing) */ struct fuse_io_priv { + struct kref refcnt; int async; spinlock_t lock; unsigned reqs; @@ -256,6 +258,13 @@ struct fuse_io_priv { struct completion *done; }; +#define FUSE_IO_PRIV_SYNC(f) \ +{ \ + .refcnt = { ATOMIC_INIT(1) }, \ + .async = 0, \ + .file = f, \ +} + /** * Request flags * From 74b23f79f16802d01315db8b028518ef0abd7bc8 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 22 Mar 2016 14:25:36 -0700 Subject: [PATCH 252/797] fs/coredump: prevent fsuid=0 dumps into user-controlled directories commit 378c6520e7d29280f400ef2ceaf155c86f05a71a upstream. This commit fixes the following security hole affecting systems where all of the following conditions are fulfilled: - The fs.suid_dumpable sysctl is set to 2. - The kernel.core_pattern sysctl's value starts with "/". (Systems where kernel.core_pattern starts with "|/" are not affected.) - Unprivileged user namespace creation is permitted. (This is true on Linux >=3.8, but some distributions disallow it by default using a distro patch.) Under these conditions, if a program executes under secure exec rules, causing it to run with the SUID_DUMP_ROOT flag, then unshares its user namespace, changes its root directory and crashes, the coredump will be written using fsuid=0 and a path derived from kernel.core_pattern - but this path is interpreted relative to the root directory of the process, allowing the attacker to control where a coredump will be written with root privileges. To fix the security issue, always interpret core_pattern for dumps that are written under SUID_DUMP_ROOT relative to the root directory of init. Signed-off-by: Jann Horn Acked-by: Kees Cook Cc: Al Viro Cc: "Eric W. Biederman" Cc: Andy Lutomirski Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/um/drivers/mconsole_kern.c | 2 +- fs/coredump.c | 30 ++++++++++++++++++++++++++---- fs/fhandle.c | 2 +- fs/open.c | 6 ++---- include/linux/fs.h | 2 +- kernel/sysctl_binary.c | 2 +- 6 files changed, 32 insertions(+), 12 deletions(-) diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 29880c9b324e..e22e57298522 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -133,7 +133,7 @@ void mconsole_proc(struct mc_request *req) ptr += strlen("proc"); ptr = skip_spaces(ptr); - file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY); + file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0); if (IS_ERR(file)) { mconsole_reply(req, "Failed to open file", 1, 0); printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file)); diff --git a/fs/coredump.c b/fs/coredump.c index 1777331eee76..dfc87c5f5a54 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -32,6 +32,9 @@ #include #include #include +#include +#include +#include #include #include @@ -627,6 +630,8 @@ void do_coredump(const siginfo_t *siginfo) } } else { struct inode *inode; + int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW | + O_LARGEFILE | O_EXCL; if (cprm.limit < binfmt->min_coredump) goto fail_unlock; @@ -665,10 +670,27 @@ void do_coredump(const siginfo_t *siginfo) * what matters is that at least one of the two processes * writes its coredump successfully, not which one. */ - cprm.file = filp_open(cn.corename, - O_CREAT | 2 | O_NOFOLLOW | - O_LARGEFILE | O_EXCL, - 0600); + if (need_suid_safe) { + /* + * Using user namespaces, normal user tasks can change + * their current->fs->root to point to arbitrary + * directories. Since the intention of the "only dump + * with a fully qualified path" rule is to control where + * coredumps may be placed using root privileges, + * current->fs->root must not be used. Instead, use the + * root directory of init_task. + */ + struct path root; + + task_lock(&init_task); + get_fs_root(init_task.fs, &root); + task_unlock(&init_task); + cprm.file = file_open_root(root.dentry, root.mnt, + cn.corename, open_flags, 0600); + path_put(&root); + } else { + cprm.file = filp_open(cn.corename, open_flags, 0600); + } if (IS_ERR(cprm.file)) goto fail_unlock; diff --git a/fs/fhandle.c b/fs/fhandle.c index d59712dfa3e7..ca3c3dd01789 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -228,7 +228,7 @@ long do_handle_open(int mountdirfd, path_put(&path); return fd; } - file = file_open_root(path.dentry, path.mnt, "", open_flag); + file = file_open_root(path.dentry, path.mnt, "", open_flag, 0); if (IS_ERR(file)) { put_unused_fd(fd); retval = PTR_ERR(file); diff --git a/fs/open.c b/fs/open.c index b6f1e96a7c0b..6a24f988d253 100644 --- a/fs/open.c +++ b/fs/open.c @@ -995,14 +995,12 @@ struct file *filp_open(const char *filename, int flags, umode_t mode) EXPORT_SYMBOL(filp_open); struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, - const char *filename, int flags) + const char *filename, int flags, umode_t mode) { struct open_flags op; - int err = build_open_flags(flags, 0, &op); + int err = build_open_flags(flags, mode, &op); if (err) return ERR_PTR(err); - if (flags & O_CREAT) - return ERR_PTR(-EINVAL); return do_file_open_root(dentry, mnt, filename, &op); } EXPORT_SYMBOL(file_open_root); diff --git a/include/linux/fs.h b/include/linux/fs.h index 3aa514254161..22c5a0cf16e3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2217,7 +2217,7 @@ extern long do_sys_open(int dfd, const char __user *filename, int flags, extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, - const char *, int); + const char *, int, umode_t); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 7e7746a42a62..10a1d7dc9313 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1321,7 +1321,7 @@ static ssize_t binary_sysctl(const int *name, int nlen, } mnt = task_active_pid_ns(current)->proc_mnt; - file = file_open_root(mnt->mnt_root, mnt, pathname, flags); + file = file_open_root(mnt->mnt_root, mnt, pathname, flags, 0); result = PTR_ERR(file); if (IS_ERR(file)) goto out_putname; From 91b95d59aa38ca5a1b9835c108ae1aed533e5e76 Mon Sep 17 00:00:00 2001 From: Aurelien Jacquiot Date: Tue, 22 Mar 2016 14:25:42 -0700 Subject: [PATCH 253/797] rapidio/rionet: fix deadlock on SMP commit 36915976eca58f2eefa040ba8f9939672564df61 upstream. Fix deadlocking during concurrent receive and transmit operations on SMP platforms caused by the use of incorrect lock: on transmit 'tx_lock' spinlock should be used instead of 'lock' which is used for receive operation. This fix is applicable to kernel versions starting from v2.15. Signed-off-by: Aurelien Jacquiot Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Andre van Herk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/net/rionet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 01f08a7751f7..e7034c55e796 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -280,7 +280,7 @@ static void rionet_outb_msg_event(struct rio_mport *mport, void *dev_id, int mbo struct net_device *ndev = dev_id; struct rionet_private *rnet = netdev_priv(ndev); - spin_lock(&rnet->lock); + spin_lock(&rnet->tx_lock); if (netif_msg_intr(rnet)) printk(KERN_INFO @@ -299,7 +299,7 @@ static void rionet_outb_msg_event(struct rio_mport *mport, void *dev_id, int mbo if (rnet->tx_cnt < RIONET_TX_RING_SIZE) netif_wake_queue(ndev); - spin_unlock(&rnet->lock); + spin_unlock(&rnet->tx_lock); } static int rionet_open(struct net_device *ndev) From a918d2bcea6aab6e671bfb0901cbecc3cf68fca1 Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Wed, 6 Jan 2016 12:44:01 -0500 Subject: [PATCH 254/797] ipr: Fix out-of-bounds null overwrite commit d63c7dd5bcb9441af0526d370c43a65ca2c980d9 upstream. Return value of snprintf is not bound by size value, 2nd argument. (https://www.kernel.org/doc/htmldocs/kernel-api/API-snprintf.html). Return value is number of printed chars, can be larger than 2nd argument. Therefore, it can write null byte out of bounds ofbuffer. Since snprintf puts null, it does not need to put additional null byte. Signed-off-by: Insu Yun Reviewed-by: Shane Seymour Signed-off-by: Martin K. Petersen Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 536cd5a80422..1c3759bab80b 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4003,13 +4003,12 @@ static ssize_t ipr_store_update_fw(struct device *dev, struct ipr_sglist *sglist; char fname[100]; char *src; - int len, result, dnld_size; + int result, dnld_size; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - len = snprintf(fname, 99, "%s", buf); - fname[len-1] = '\0'; + snprintf(fname, sizeof(fname), "%s", buf); if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) { dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname); From 6677a2ab036f28134b60ad4ed6fd2e72db579b8a Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 25 Feb 2016 13:54:20 -0300 Subject: [PATCH 255/797] ipr: Fix regression when loading firmware commit 21b81716c6bff24cda52dc75588455f879ddbfe9 upstream. Commit d63c7dd5bcb9 ("ipr: Fix out-of-bounds null overwrite") removed the end of line handling when storing the update_fw sysfs attribute. This changed the userpace API because it started refusing writes terminated by a line feed, which broke the update tools we already have. This patch re-adds that handling, so both a write terminated by a line feed or not can make it through with the update. Fixes: d63c7dd5bcb9 ("ipr: Fix out-of-bounds null overwrite") Signed-off-by: Gabriel Krisman Bertazi Cc: Insu Yun Acked-by: Brian King Signed-off-by: Martin K. Petersen Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 1c3759bab80b..43ac62623bf2 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4003,6 +4003,7 @@ static ssize_t ipr_store_update_fw(struct device *dev, struct ipr_sglist *sglist; char fname[100]; char *src; + char *endline; int result, dnld_size; if (!capable(CAP_SYS_ADMIN)) @@ -4010,6 +4011,10 @@ static ssize_t ipr_store_update_fw(struct device *dev, snprintf(fname, sizeof(fname), "%s", buf); + endline = strchr(fname, '\n'); + if (endline) + *endline = '\0'; + if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) { dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname); return -EIO; From 0ae6554c2bbfa89218cf3a6e8d8d10581334f551 Mon Sep 17 00:00:00 2001 From: Matti Gottlieb Date: Sun, 14 Feb 2016 17:05:39 +0200 Subject: [PATCH 256/797] iwlwifi: mvm: Fix paging memory leak commit 905e36ae172c83a30894a3adefab7d4f850fcf54 upstream. If the opmode is stopped and started again we did not free the paging buffers. Fix that. In addition when freeing the firmware's paging download buffer, set the pointer to NULL. Signed-off-by: Matti Gottlieb Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/fw.c | 4 +++- drivers/net/wireless/iwlwifi/mvm/mvm.h | 3 +++ drivers/net/wireless/iwlwifi/mvm/ops.c | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c index d906fa13ba97..610c442c7ab2 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/iwlwifi/mvm/fw.c @@ -106,7 +106,7 @@ static int iwl_send_tx_ant_cfg(struct iwl_mvm *mvm, u8 valid_tx_ant) sizeof(tx_ant_cmd), &tx_ant_cmd); } -static void iwl_free_fw_paging(struct iwl_mvm *mvm) +void iwl_free_fw_paging(struct iwl_mvm *mvm) { int i; @@ -126,6 +126,8 @@ static void iwl_free_fw_paging(struct iwl_mvm *mvm) get_order(mvm->fw_paging_db[i].fw_paging_size)); } kfree(mvm->trans->paging_download_buf); + mvm->trans->paging_download_buf = NULL; + memset(mvm->fw_paging_db, 0, sizeof(mvm->fw_paging_db)); } diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index 4bde2d027dcd..244e26c26821 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -1190,6 +1190,9 @@ void iwl_mvm_rx_umac_scan_complete_notif(struct iwl_mvm *mvm, void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb); +/* Paging */ +void iwl_free_fw_paging(struct iwl_mvm *mvm); + /* MVM debugfs */ #ifdef CONFIG_IWLWIFI_DEBUGFS int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir); diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index 13c97f665ba8..c3adf2bcdc85 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -645,6 +645,8 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode) for (i = 0; i < NVM_MAX_NUM_SECTIONS; i++) kfree(mvm->nvm_sections[i].data); + iwl_free_fw_paging(mvm); + iwl_mvm_tof_clean(mvm); ieee80211_free_hw(mvm->hw); From 791b5b0d2d01542a87af4b5f8fb2504ce2d5b352 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 2 Mar 2016 11:47:29 -0500 Subject: [PATCH 257/797] drm/radeon: disable runtime pm on PX laptops without dGPU power control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e64c952efb8e0c15ae82cec8e455ab4910690ef1 upstream. Some PX laptops don't provide an ACPI method to control dGPU power. On those systems, the driver is responsible for handling the dGPU power state. Disable runtime PM on them until support for this is implemented. Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 8 ++++---- drivers/gpu/drm/radeon/radeon_device.c | 8 +++++++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index c4b4f298a283..9bc408c9f9f6 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -62,6 +62,10 @@ bool radeon_has_atpx(void) { return radeon_atpx_priv.atpx_detected; } +bool radeon_has_atpx_dgpu_power_cntl(void) { + return radeon_atpx_priv.atpx.functions.power_cntl; +} + /** * radeon_atpx_call - call an ATPX method * @@ -141,10 +145,6 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas */ static int radeon_atpx_validate(struct radeon_atpx *atpx) { - /* make sure required functions are enabled */ - /* dGPU power control is required */ - atpx->functions.power_cntl = true; - if (atpx->functions.px_params) { union acpi_object *info; struct atpx_px_params output; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index c566993a2ec3..f78f111e68de 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -103,6 +103,12 @@ static const char radeon_family_name[][16] = { "LAST", }; +#if defined(CONFIG_VGA_SWITCHEROO) +bool radeon_has_atpx_dgpu_power_cntl(void); +#else +static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; } +#endif + #define RADEON_PX_QUIRK_DISABLE_PX (1 << 0) #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1) @@ -1433,7 +1439,7 @@ int radeon_device_init(struct radeon_device *rdev, * ignore it */ vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode); - if (rdev->flags & RADEON_IS_PX) + if ((rdev->flags & RADEON_IS_PX) && radeon_has_atpx_dgpu_power_cntl()) runtime = true; vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, runtime); if (runtime) From a4ecd0324b3cdea3abc4b0f5d7e2829530400317 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Sun, 6 Mar 2016 02:39:53 +0100 Subject: [PATCH 258/797] drm/radeon: Don't drop DP 2.7 Ghz link setup on some cards. commit 459ee1c3fd097ab56ababd8ff4bb7ef6a792de33 upstream. As observed on Apple iMac10,1, DCE-3.2, RV-730, link rate of 2.7 Ghz is not selected, because the args.v1.ucConfig flag setting for 2.7 Ghz gets overwritten by a following assignment of the transmitter to use. Move link rate setup a few lines down to fix this. In practice this didn't have any positive or negative effect on display setup on the tested iMac10,1 so i don't know if backporting to stable makes sense or not. Signed-off-by: Mario Kleiner Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_encoders.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index bb292143997e..adf74f4366bb 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -892,8 +892,6 @@ atombios_dig_encoder_setup2(struct drm_encoder *encoder, int action, int panel_m else args.v1.ucLaneNum = 4; - if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode) && (dp_clock == 270000)) - args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ; switch (radeon_encoder->encoder_id) { case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1; @@ -910,6 +908,10 @@ atombios_dig_encoder_setup2(struct drm_encoder *encoder, int action, int panel_m args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKB; else args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKA; + + if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode) && (dp_clock == 270000)) + args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ; + break; case 2: case 3: From 5b5abb9b85e97630e07b2b6d33f4739a2eb4e872 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 2 Mar 2016 12:10:20 -0500 Subject: [PATCH 259/797] drm/amdgpu: disable runtime pm on PX laptops without dGPU power control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bedf2a65c1aa8fb29ba8527fd00c0f68ec1f55f1 upstream. Some PX laptops don't provide an ACPI method to control dGPU power. On those systems, the driver is responsible for handling the dGPU power state. Disable runtime PM on them until support for this is implemented. Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +++++++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 5a8fbadbd27b..8ac49812a716 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -63,6 +63,10 @@ bool amdgpu_has_atpx(void) { return amdgpu_atpx_priv.atpx_detected; } +bool amdgpu_has_atpx_dgpu_power_cntl(void) { + return amdgpu_atpx_priv.atpx.functions.power_cntl; +} + /** * amdgpu_atpx_call - call an ATPX method * @@ -142,10 +146,6 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas */ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) { - /* make sure required functions are enabled */ - /* dGPU power control is required */ - atpx->functions.power_cntl = true; - if (atpx->functions.px_params) { union acpi_object *info; struct atpx_px_params output; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c961fe093e12..9d88023df836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -61,6 +61,12 @@ static const char *amdgpu_asic_name[] = { "LAST", }; +#if defined(CONFIG_VGA_SWITCHEROO) +bool amdgpu_has_atpx_dgpu_power_cntl(void); +#else +static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; } +#endif + bool amdgpu_device_is_px(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; @@ -1469,7 +1475,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (amdgpu_runtime_pm == 1) runtime = true; - if (amdgpu_device_is_px(ddev)) + if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl()) runtime = true; vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime); if (runtime) From 044ebd1d5a691b1638ae622994d02f74f6e43591 Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Thu, 17 Mar 2016 17:26:57 +0800 Subject: [PATCH 260/797] drm/amdgpu: include the right version of gmc header files for iceland commit 16a8a49be1b878ef6dd5d1663d456e254e54ae3d upstream. Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2cf50180cc51..b1c7a9b3631b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -32,8 +32,8 @@ #include "oss/oss_2_4_d.h" #include "oss/oss_2_4_sh_mask.h" -#include "gmc/gmc_8_1_d.h" -#include "gmc/gmc_8_1_sh_mask.h" +#include "gmc/gmc_7_1_d.h" +#include "gmc/gmc_7_1_sh_mask.h" #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" From bcda0fd9a8e18ea72f32542d9a1c4b010f89c5d2 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Thu, 11 Feb 2016 16:30:51 -0500 Subject: [PATCH 261/797] IB/ipoib: fix for rare multicast join race condition commit 08bc327629cbd63bb2f66677e4b33b643695097c upstream. A narrow window for race condition still exist between multicast join thread and *dev_flush workers. A kernel crash caused by prolong erratic link state changes was observed (most likely a faulty cabling): [167275.656270] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 [167275.665973] IP: [] ipoib_mcast_join+0xae/0x1d0 [ib_ipoib] [167275.674443] PGD 0 [167275.677373] Oops: 0000 [#1] SMP ... [167275.977530] Call Trace: [167275.982225] [] ? ipoib_mcast_free+0x200/0x200 [ib_ipoib] [167275.992024] [] ipoib_mcast_join_task+0x2a7/0x490 [ib_ipoib] [167276.002149] [] process_one_work+0x17b/0x470 [167276.010754] [] worker_thread+0x11b/0x400 [167276.019088] [] ? rescuer_thread+0x400/0x400 [167276.027737] [] kthread+0xcf/0xe0 Here was a hit spot: ipoib_mcast_join() { .............. rec.qkey = priv->broadcast->mcmember.qkey; ^^^^^^^ ..... } Proposed patch should prevent multicast join task to continue if link state change is detected. Signed-off-by: Alex Estrin Changes from v4: - as suggested by Doug Ledford, optimized spinlock usage, i.e. ipoib_mcast_join() is called with lock held. Changes from v3: - sync with priv->lock before flag check. Chages from v2: - Move check for OPER_UP flag state to mcast_join() to ensure no event worker is in progress. - minor style fixes. Changes from v1: - No need to lock again if error detected. Signed-off-by: Doug Ledford Cc: Nikolay Borisov Signed-off-by: Greg Kroah-Hartman --- .../infiniband/ulp/ipoib/ipoib_multicast.c | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index f357ca67a41c..87799de90a1d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -456,7 +456,10 @@ static int ipoib_mcast_join_complete(int status, return status; } -static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) +/* + * Caller must hold 'priv->lock' + */ +static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_sa_multicast *multicast; @@ -466,6 +469,10 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) ib_sa_comp_mask comp_mask; int ret = 0; + if (!priv->broadcast || + !test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) + return -EINVAL; + ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw); rec.mgid = mcast->mcmember.mgid; @@ -525,20 +532,23 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) rec.join_state = 4; #endif } + spin_unlock_irq(&priv->lock); multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, &rec, comp_mask, GFP_KERNEL, ipoib_mcast_join_complete, mcast); + spin_lock_irq(&priv->lock); if (IS_ERR(multicast)) { ret = PTR_ERR(multicast); ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); - spin_lock_irq(&priv->lock); /* Requeue this join task with a backoff delay */ __ipoib_mcast_schedule_join_thread(priv, mcast, 1); clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); spin_unlock_irq(&priv->lock); complete(&mcast->done); + spin_lock_irq(&priv->lock); } + return 0; } void ipoib_mcast_join_task(struct work_struct *work) @@ -620,9 +630,10 @@ void ipoib_mcast_join_task(struct work_struct *work) /* Found the next unjoined group */ init_completion(&mcast->done); set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - spin_unlock_irq(&priv->lock); - ipoib_mcast_join(dev, mcast); - spin_lock_irq(&priv->lock); + if (ipoib_mcast_join(dev, mcast)) { + spin_unlock_irq(&priv->lock); + return; + } } else if (!delay_until || time_before(mcast->delay_until, delay_until)) delay_until = mcast->delay_until; @@ -641,10 +652,9 @@ void ipoib_mcast_join_task(struct work_struct *work) if (mcast) { init_completion(&mcast->done); set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + ipoib_mcast_join(dev, mcast); } spin_unlock_irq(&priv->lock); - if (mcast) - ipoib_mcast_join(dev, mcast); } int ipoib_mcast_start_thread(struct net_device *dev) From aa60f652eeebb8345ed8c543d1a8d6b71f902de3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 18 Mar 2016 12:27:43 -0400 Subject: [PATCH 262/797] tracing: Have preempt(irqs)off trace preempt disabled functions commit cb86e05390debcc084cfdb0a71ed4c5dbbec517d upstream. Joel Fernandes reported that the function tracing of preempt disabled sections was not being reported when running either the preemptirqsoff or preemptoff tracers. This was due to the fact that the function tracer callback for those tracers checked if irqs were disabled before tracing. But this fails when we want to trace preempt off locations as well. Joel explained that he wanted to see funcitons where interrupts are enabled but preemption was disabled. The expected output he wanted: <...>-2265 1d.h1 3419us : preempt_count_sub <-irq_exit <...>-2265 1d..1 3419us : __do_softirq <-irq_exit <...>-2265 1d..1 3419us : msecs_to_jiffies <-__do_softirq <...>-2265 1d..1 3420us : irqtime_account_irq <-__do_softirq <...>-2265 1d..1 3420us : __local_bh_disable_ip <-__do_softirq <...>-2265 1..s1 3421us : run_timer_softirq <-__do_softirq <...>-2265 1..s1 3421us : hrtimer_run_pending <-run_timer_softirq <...>-2265 1..s1 3421us : _raw_spin_lock_irq <-run_timer_softirq <...>-2265 1d.s1 3422us : preempt_count_add <-_raw_spin_lock_irq <...>-2265 1d.s2 3422us : _raw_spin_unlock_irq <-run_timer_softirq <...>-2265 1..s2 3422us : preempt_count_sub <-_raw_spin_unlock_irq <...>-2265 1..s1 3423us : rcu_bh_qs <-__do_softirq <...>-2265 1d.s1 3423us : irqtime_account_irq <-__do_softirq <...>-2265 1d.s1 3423us : __local_bh_enable <-__do_softirq There's a comment saying that the irq disabled check is because there's a possible race that tracing_cpu may be set when the function is executed. But I don't remember that race. For now, I added a check for preemption being enabled too to not record the function, as there would be no race if that was the case. I need to re-investigate this, as I'm now thinking that the tracing_cpu will always be correct. But no harm in keeping the check for now, except for the slight performance hit. Link: http://lkml.kernel.org/r/1457770386-88717-1-git-send-email-agnel.joel@gmail.com Fixes: 5e6d2b9cfa3a "tracing: Use one prologue for the preempt irqs off tracer function tracers" Reported-by: Joel Fernandes Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_irqsoff.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index e4e56589ec1d..be3222b7d72e 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -109,8 +109,12 @@ static int func_prolog_dec(struct trace_array *tr, return 0; local_save_flags(*flags); - /* slight chance to get a false positive on tracing_cpu */ - if (!irqs_disabled_flags(*flags)) + /* + * Slight chance to get a false positive on tracing_cpu, + * although I'm starting to think there isn't a chance. + * Leave this for now just to be paranoid. + */ + if (!irqs_disabled_flags(*flags) && !preempt_count()) return 0; *data = per_cpu_ptr(tr->trace_buffer.data, cpu); From aab3ba82f8445abe99f33e743e7316d0d55fee7b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 18 Mar 2016 15:46:48 -0400 Subject: [PATCH 263/797] tracing: Fix crash from reading trace_pipe with sendfile commit a29054d9478d0435ab01b7544da4f674ab13f533 upstream. If tracing contains data and the trace_pipe file is read with sendfile(), then it can trigger a NULL pointer dereference and various BUG_ON within the VM code. There's a patch to fix this in the splice_to_pipe() code, but it's also a good idea to not let that happen from trace_pipe either. Link: http://lkml.kernel.org/r/1457641146-9068-1-git-send-email-rabin@rab.in Reported-by: Rabin Vincent Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d9293402ee68..8305cbb2d5a2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4949,7 +4949,10 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, spd.nr_pages = i; - ret = splice_to_pipe(pipe, &spd); + if (i) + ret = splice_to_pipe(pipe, &spd); + else + ret = 0; out: splice_shrink_spd(&spd); return ret; From 3dba3f672dfd0d5d961a294dac5bee18759cda6a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 22 Mar 2016 17:30:58 -0400 Subject: [PATCH 264/797] tracing: Fix trace_printk() to print when not using bprintk() commit 3debb0a9ddb16526de8b456491b7db60114f7b5e upstream. The trace_printk() code will allocate extra buffers if the compile detects that a trace_printk() is used. To do this, the format of the trace_printk() is saved to the __trace_printk_fmt section, and if that section is bigger than zero, the buffers are allocated (along with a message that this has happened). If trace_printk() uses a format that is not a constant, and thus something not guaranteed to be around when the print happens, the compiler optimizes the fmt out, as it is not used, and the __trace_printk_fmt section is not filled. This means the kernel will not allocate the special buffers needed for the trace_printk() and the trace_printk() will not write anything to the tracing buffer. Adding a "__used" to the variable in the __trace_printk_fmt section will keep it around, even though it is set to NULL. This will keep the string from being printed in the debugfs/tracing/printk_formats section as it is not needed. Reported-by: Vlastimil Babka Fixes: 07d777fe8c398 "tracing: Add percpu buffers for trace_printk()" Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel.h | 6 +++--- kernel/trace/trace_printk.c | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 350dfb08aee3..924853d33a13 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -607,7 +607,7 @@ do { \ #define do_trace_printk(fmt, args...) \ do { \ - static const char *trace_printk_fmt \ + static const char *trace_printk_fmt __used \ __attribute__((section("__trace_printk_fmt"))) = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ @@ -651,7 +651,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); */ #define trace_puts(str) ({ \ - static const char *trace_printk_fmt \ + static const char *trace_printk_fmt __used \ __attribute__((section("__trace_printk_fmt"))) = \ __builtin_constant_p(str) ? str : NULL; \ \ @@ -673,7 +673,7 @@ extern void trace_dump_stack(int skip); #define ftrace_vprintk(fmt, vargs) \ do { \ if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ + static const char *trace_printk_fmt __used \ __attribute__((section("__trace_printk_fmt"))) = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 060df67dbdd1..f96f0383f6c6 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -296,6 +296,9 @@ static int t_show(struct seq_file *m, void *v) const char *str = *fmt; int i; + if (!*fmt) + return 0; + seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt); /* From 2bfb8435386c44f89753b0417303110ff596dba9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Mar 2016 12:40:54 +0100 Subject: [PATCH 265/797] bitops: Do not default to __clear_bit() for __clear_bit_unlock() commit f75d48644c56a31731d17fa693c8175328957e1d upstream. __clear_bit_unlock() is a special little snowflake. While it carries the non-atomic '__' prefix, it is specifically documented to pair with test_and_set_bit() and therefore should be 'somewhat' atomic. Therefore the generic implementation of __clear_bit_unlock() cannot use the fully non-atomic __clear_bit() as a default. If an arch is able to do better; is must provide an implementation of __clear_bit_unlock() itself. Specifically, this came up as a result of hackbench livelock'ing in slab_lock() on ARC with SMP + SLUB + !LLSC. The issue was incorrect pairing of atomic ops. slab_lock() -> bit_spin_lock() -> test_and_set_bit() slab_unlock() -> __bit_spin_unlock() -> __clear_bit() The non serializing __clear_bit() was getting "lost" 80543b8e: ld_s r2,[r13,0] <--- (A) Finds PG_locked is set 80543b90: or r3,r2,1 <--- (B) other core unlocks right here 80543b94: st_s r3,[r13,0] <--- (C) sets PG_locked (overwrites unlock) Fixes ARC STAR 9000817404 (and probably more). Reported-by: Vineet Gupta Tested-by: Vineet Gupta Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Christoph Lameter Cc: David Rientjes Cc: Helge Deller Cc: James E.J. Bottomley Cc: Joonsoo Kim Cc: Linus Torvalds Cc: Noam Camus Cc: Paul E. McKenney Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160309114054.GJ6356@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/bitops/lock.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h index c30266e94806..8ef0ccbf8167 100644 --- a/include/asm-generic/bitops/lock.h +++ b/include/asm-generic/bitops/lock.h @@ -29,16 +29,16 @@ do { \ * @nr: the bit to set * @addr: the address to start counting from * - * This operation is like clear_bit_unlock, however it is not atomic. - * It does provide release barrier semantics so it can be used to unlock - * a bit lock, however it would only be used if no other CPU can modify - * any bits in the memory until the lock is released (a good example is - * if the bit lock itself protects access to the other bits in the word). + * A weaker form of clear_bit_unlock() as used by __bit_lock_unlock(). If all + * the bits in the word are protected by this lock some archs can use weaker + * ops to safely unlock. + * + * See for example x86's implementation. */ #define __clear_bit_unlock(nr, addr) \ do { \ - smp_mb(); \ - __clear_bit(nr, addr); \ + smp_mb__before_atomic(); \ + clear_bit(nr, addr); \ } while (0) #endif /* _ASM_GENERIC_BITOPS_LOCK_H_ */ From 0a10eaa1a91a738523ce2f9e55cd9f6f655e31f1 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 18 Feb 2016 00:16:14 +0100 Subject: [PATCH 266/797] scripts/coccinelle: modernize & commit 1b669e713f277a4d4b3cec84e13d16544ac8286d upstream. & is no longer allowed in column 0, since Coccinelle 1.0.4. Signed-off-by: Julia Lawall Tested-by: Nishanth Menon Signed-off-by: Michal Marek Signed-off-by: Greg Kroah-Hartman --- scripts/coccinelle/iterators/use_after_iter.cocci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/coccinelle/iterators/use_after_iter.cocci b/scripts/coccinelle/iterators/use_after_iter.cocci index f085f5968c52..ce8cc9c006e5 100644 --- a/scripts/coccinelle/iterators/use_after_iter.cocci +++ b/scripts/coccinelle/iterators/use_after_iter.cocci @@ -123,7 +123,7 @@ list_remove_head(x,c,...) | sizeof(<+...c...+>) | -&c->member + &c->member | c = E | From a42c9e4f1b3751743dd58773d45d2eb3ee4ca5ac Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 25 Jan 2016 09:45:47 -0700 Subject: [PATCH 267/797] scripts/kconfig: allow building with make 3.80 again commit 42f9d3c6888bceef6dc7ba72c77acf47347dcf05 upstream. Documentation/Changes still lists this as the minimal required version, so it ought to remain usable for the time being. Fixes: d2036f30cf ("scripts/kconfig/Makefile: Allow KBUILD_DEFCONFIG to be a target") Signed-off-by: Jan Beulich Cc: Michael Ellerman Signed-off-by: Michal Marek Signed-off-by: Greg Kroah-Hartman --- scripts/kconfig/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index d79cba4ce3eb..ebced77deb9c 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -96,13 +96,15 @@ savedefconfig: $(obj)/conf defconfig: $(obj)/conf ifeq ($(KBUILD_DEFCONFIG),) $< $(silent) --defconfig $(Kconfig) -else ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)),) +else +ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)),) @$(kecho) "*** Default configuration is based on '$(KBUILD_DEFCONFIG)'" $(Q)$< $(silent) --defconfig=arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG) $(Kconfig) else @$(kecho) "*** Default configuration is based on target '$(KBUILD_DEFCONFIG)'" $(Q)$(MAKE) -f $(srctree)/Makefile $(KBUILD_DEFCONFIG) endif +endif %_defconfig: $(obj)/conf $(Q)$< $(silent) --defconfig=arch/$(SRCARCH)/configs/$@ $(Kconfig) From f24fe0da244641bc4a19cff1cb7c7cb3dd50b88a Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 26 Feb 2016 16:15:17 +0100 Subject: [PATCH 268/797] kbuild/mkspec: fix grub2 installkernel issue commit c8b08ca558c0067bc9e15ce3f1e70af260410bb2 upstream. mkspec is copying built kernel to temporrary location /boot/vmlinuz-$KERNELRELEASE-rpm and runs installkernel on it. This however directly leads to grub2 menuentry for this suffixed binary being generated as well during the run of installkernel script. Later in the process the temporary -rpm suffixed files are removed, and therefore we end up with spurious (and non-functional) grub2 menu entries for each installed kernel RPM. Fix that by using a different temporary name (prefixed by '.'), so that the binary is not recognized as an actual kernel binary and no menuentry is created for it. Signed-off-by: Jiri Kosina Fixes: 3c9c7a14b627 ("rpm-pkg: add %post section to create initramfs and grub hooks") Signed-off-by: Michal Marek Signed-off-by: Greg Kroah-Hartman --- scripts/package/mkspec | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 71004daefe31..fe44d68e9344 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -131,11 +131,11 @@ echo 'rm -rf $RPM_BUILD_ROOT' echo "" echo "%post" echo "if [ -x /sbin/installkernel -a -r /boot/vmlinuz-$KERNELRELEASE -a -r /boot/System.map-$KERNELRELEASE ]; then" -echo "cp /boot/vmlinuz-$KERNELRELEASE /boot/vmlinuz-$KERNELRELEASE-rpm" -echo "cp /boot/System.map-$KERNELRELEASE /boot/System.map-$KERNELRELEASE-rpm" +echo "cp /boot/vmlinuz-$KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm" +echo "cp /boot/System.map-$KERNELRELEASE /boot/.System.map-$KERNELRELEASE-rpm" echo "rm -f /boot/vmlinuz-$KERNELRELEASE /boot/System.map-$KERNELRELEASE" -echo "/sbin/installkernel $KERNELRELEASE /boot/vmlinuz-$KERNELRELEASE-rpm /boot/System.map-$KERNELRELEASE-rpm" -echo "rm -f /boot/vmlinuz-$KERNELRELEASE-rpm /boot/System.map-$KERNELRELEASE-rpm" +echo "/sbin/installkernel $KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm" +echo "rm -f /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm" echo "fi" echo "" echo "%files" From 507fafe0e83fcc011bcb23af37179b7d412624b3 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 22 Mar 2016 15:11:03 -0700 Subject: [PATCH 269/797] MAINTAINERS: Update mailing list and web page for hwmon subsystem commit 968ce1b1f45a7d76b5471b19bd035dbecc72f32d upstream. The old web page for the hwmon subsystem is no longer operational, and the mailing list has become unreliable. Move both to kernel.org. Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 96 ++++++++++++++++++++++++++--------------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d826f1b9eb02..4c3e1d2ac31b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -230,13 +230,13 @@ F: kernel/sys_ni.c ABIT UGURU 1,2 HARDWARE MONITOR DRIVER M: Hans de Goede -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/abituguru.c ABIT UGURU 3 HARDWARE MONITOR DRIVER M: Alistair John Strachan -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/abituguru3.c @@ -373,14 +373,14 @@ S: Maintained ADM1025 HARDWARE MONITOR DRIVER M: Jean Delvare -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/adm1025 F: drivers/hwmon/adm1025.c ADM1029 HARDWARE MONITOR DRIVER M: Corentin Labbe -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/adm1029.c @@ -425,7 +425,7 @@ F: drivers/video/backlight/adp8860_bl.c ADS1015 HARDWARE MONITOR DRIVER M: Dirk Eibach -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/ads1015 F: drivers/hwmon/ads1015.c @@ -438,7 +438,7 @@ F: drivers/macintosh/therm_adt746x.c ADT7475 HARDWARE MONITOR DRIVER M: Jean Delvare -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/adt7475 F: drivers/hwmon/adt7475.c @@ -615,7 +615,7 @@ F: include/linux/ccp.h AMD FAM15H PROCESSOR POWER MONITORING DRIVER M: Andreas Herrmann -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/fam15h_power F: drivers/hwmon/fam15h_power.c @@ -779,7 +779,7 @@ F: drivers/input/mouse/bcm5974.c APPLE SMC DRIVER M: Henrik Rydberg -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Odd fixes F: drivers/hwmon/applesmc.c @@ -1777,7 +1777,7 @@ F: include/media/as3645a.h ASC7621 HARDWARE MONITOR DRIVER M: George Joseph -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/asc7621 F: drivers/hwmon/asc7621.c @@ -1864,7 +1864,7 @@ F: drivers/net/wireless/ath/carl9170/ ATK0110 HWMON DRIVER M: Luca Tettamanti -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/asus_atk0110.c @@ -2984,7 +2984,7 @@ F: mm/swap_cgroup.c CORETEMP HARDWARE MONITORING DRIVER M: Fenghua Yu -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/coretemp F: drivers/hwmon/coretemp.c @@ -3549,7 +3549,7 @@ T: git git://git.infradead.org/users/vkoul/slave-dma.git DME1737 HARDWARE MONITOR DRIVER M: Juerg Haefliger -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/dme1737 F: drivers/hwmon/dme1737.c @@ -4262,7 +4262,7 @@ F: include/video/exynos_mipi* F71805F HARDWARE MONITORING DRIVER M: Jean Delvare -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/f71805f F: drivers/hwmon/f71805f.c @@ -4341,7 +4341,7 @@ F: fs/* FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER M: Riku Voipio -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/f75375s.c F: include/linux/f75375s.h @@ -4883,8 +4883,8 @@ F: drivers/media/usb/hackrf/ HARDWARE MONITORING M: Jean Delvare M: Guenter Roeck -L: lm-sensors@lm-sensors.org -W: http://www.lm-sensors.org/ +L: linux-hwmon@vger.kernel.org +W: http://hwmon.wiki.kernel.org/ T: quilt http://jdelvare.nerim.net/devel/linux/jdelvare-hwmon/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git S: Maintained @@ -5393,7 +5393,7 @@ F: drivers/usb/atm/ueagle-atm.c INA209 HARDWARE MONITOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/ina209 F: Documentation/devicetree/bindings/i2c/ina209.txt @@ -5401,7 +5401,7 @@ F: drivers/hwmon/ina209.c INA2XX HARDWARE MONITOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/ina2xx F: drivers/hwmon/ina2xx.c @@ -5884,7 +5884,7 @@ F: drivers/isdn/hardware/eicon/ IT87 HARDWARE MONITORING DRIVER M: Jean Delvare -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/it87 F: drivers/hwmon/it87.c @@ -5920,7 +5920,7 @@ F: drivers/media/dvb-frontends/ix2505v* JC42.4 TEMPERATURE SENSOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/jc42.c F: Documentation/hwmon/jc42 @@ -5970,14 +5970,14 @@ F: drivers/tty/serial/jsm/ K10TEMP HARDWARE MONITORING DRIVER M: Clemens Ladisch -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/k10temp F: drivers/hwmon/k10temp.c K8TEMP HARDWARE MONITORING DRIVER M: Rudolf Marek -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/k8temp F: drivers/hwmon/k8temp.c @@ -6485,27 +6485,27 @@ F: net/llc/ LM73 HARDWARE MONITOR DRIVER M: Guillaume Ligneul -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/lm73.c LM78 HARDWARE MONITOR DRIVER M: Jean Delvare -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/lm78 F: drivers/hwmon/lm78.c LM83 HARDWARE MONITOR DRIVER M: Jean Delvare -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/lm83 F: drivers/hwmon/lm83.c LM90 HARDWARE MONITOR DRIVER M: Jean Delvare -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/lm90 F: Documentation/devicetree/bindings/hwmon/lm90.txt @@ -6513,7 +6513,7 @@ F: drivers/hwmon/lm90.c LM95234 HARDWARE MONITOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/lm95234 F: drivers/hwmon/lm95234.c @@ -6580,7 +6580,7 @@ F: drivers/scsi/sym53c8xx_2/ LTC4261 HARDWARE MONITOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/ltc4261 F: drivers/hwmon/ltc4261.c @@ -6749,28 +6749,28 @@ F: include/uapi/linux/matroxfb.h MAX16065 HARDWARE MONITOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/max16065 F: drivers/hwmon/max16065.c MAX20751 HARDWARE MONITOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/max20751 F: drivers/hwmon/max20751.c MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER M: "Hans J. Koch" -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/max6650 F: drivers/hwmon/max6650.c MAX6697 HARDWARE MONITOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/max6697 F: Documentation/devicetree/bindings/i2c/max6697.txt @@ -7303,7 +7303,7 @@ F: drivers/scsi/NCR_D700.* NCT6775 HARDWARE MONITOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/nct6775 F: drivers/hwmon/nct6775.c @@ -8064,7 +8064,7 @@ F: drivers/video/logo/logo_parisc* PC87360 HARDWARE MONITORING DRIVER M: Jim Cromie -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/pc87360 F: drivers/hwmon/pc87360.c @@ -8076,7 +8076,7 @@ F: drivers/char/pc8736x_gpio.c PC87427 HARDWARE MONITORING DRIVER M: Jean Delvare -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/pc87427 F: drivers/hwmon/pc87427.c @@ -8415,8 +8415,8 @@ F: drivers/rtc/rtc-puv3.c PMBUS HARDWARE MONITORING DRIVERS M: Guenter Roeck -L: lm-sensors@lm-sensors.org -W: http://www.lm-sensors.org/ +L: linux-hwmon@vger.kernel.org +W: http://hwmon.wiki.kernel.org/ W: http://www.roeck-us.net/linux/drivers/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git S: Maintained @@ -8610,7 +8610,7 @@ F: drivers/media/usb/pwc/* PWM FAN DRIVER M: Kamil Debski -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/hwmon/pwm-fan.txt F: Documentation/hwmon/pwm-fan @@ -9882,28 +9882,28 @@ F: Documentation/devicetree/bindings/media/i2c/nokia,smia.txt SMM665 HARDWARE MONITOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/smm665 F: drivers/hwmon/smm665.c SMSC EMC2103 HARDWARE MONITOR DRIVER M: Steve Glendinning -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/emc2103 F: drivers/hwmon/emc2103.c SMSC SCH5627 HARDWARE MONITOR DRIVER M: Hans de Goede -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Supported F: Documentation/hwmon/sch5627 F: drivers/hwmon/sch5627.c SMSC47B397 HARDWARE MONITOR DRIVER M: Jean Delvare -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/smsc47b397 F: drivers/hwmon/smsc47b397.c @@ -10830,7 +10830,7 @@ F: include/linux/mmc/sh_mobile_sdhi.h TMP401 HARDWARE MONITOR DRIVER M: Guenter Roeck -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/tmp401 F: drivers/hwmon/tmp401.c @@ -11564,14 +11564,14 @@ F: Documentation/networking/vrf.txt VT1211 HARDWARE MONITOR DRIVER M: Juerg Haefliger -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/vt1211 F: drivers/hwmon/vt1211.c VT8231 HARDWARE MONITOR DRIVER M: Roger Lucas -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/vt8231.c @@ -11590,21 +11590,21 @@ F: drivers/w1/ W83791D HARDWARE MONITORING DRIVER M: Marc Hulsman -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/w83791d F: drivers/hwmon/w83791d.c W83793 HARDWARE MONITORING DRIVER M: Rudolf Marek -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/w83793 F: drivers/hwmon/w83793.c W83795 HARDWARE MONITORING DRIVER M: Jean Delvare -L: lm-sensors@lm-sensors.org +L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/w83795.c From 65b55d179b275cec668b2fffa23f884b55575726 Mon Sep 17 00:00:00 2001 From: John Dahlstrom Date: Sat, 27 Feb 2016 00:09:58 -0600 Subject: [PATCH 270/797] ideapad-laptop: Add ideapad Y700 (15) to the no_hw_rfkill DMI list commit 4db9675d927a71faa66e5ab128d2390d6329750b upstream. Some Lenovo ideapad models lack a physical rfkill switch. On Lenovo models ideapad Y700 Touch-15ISK and ideapad Y700-15ISK, ideapad-laptop would wrongly report all radios as blocked by hardware which caused wireless network connections to fail. Add these models without an rfkill switch to the no_hw_rfkill list. Signed-off-by: John Dahlstrom Cc: # 3.17.x-: 4fa9dab: ideapad_laptop: Lenovo G50-30 fix rfkill reports wireless blocked Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index d78ee151c9e4..be3bc2f4edd4 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -864,6 +864,20 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G50-30"), }, }, + { + .ident = "Lenovo ideapad Y700-15ISK", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700-15ISK"), + }, + }, + { + .ident = "Lenovo ideapad Y700 Touch-15ISK", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700 Touch-15ISK"), + }, + }, { .ident = "Lenovo ideapad Y700-17ISK", .matches = { From d69bc6d28c9662bde7230d88e405c8694d05d94c Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 16 Mar 2016 18:15:47 +0800 Subject: [PATCH 271/797] mmc: block: fix ABI regression of mmc_blk_ioctl commit 83c742c344c08c2bbe338d45c6ec63110e9d5e3d upstream. If mmc_blk_ioctl returns -EINVAL, blkdev_ioctl continues to work without returning err to user-space. But now we check CAP_SYS_RAWIO firstly, so we return -EPERM to blkdev_ioctl, which make blkdev_ioctl return -EPERM to user-space directly. So this will break all the ioctl with BLKROSET. Now we find Android-adb suffer it for the following log: remount of /system failed; couldn't make block device writable: Operation not permitted openat(AT_FDCWD, "/dev/block/platform/ff420000.dwmmc/by-name/system", O_RDONLY) = 3 ioctl(3, BLKROSET, 0) = -1 EPERM (Operation not permitted) Fixes: a5f5774c55a2 ("mmc: block: Add new ioctl to send multi commands") Signed-off-by: Shawn Lin Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/card/block.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index d8486168415a..553113eb1bdb 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -596,6 +596,14 @@ static int mmc_blk_ioctl_cmd(struct block_device *bdev, struct mmc_card *card; int err = 0, ioc_err = 0; + /* + * The caller must have CAP_SYS_RAWIO, and must be calling this on the + * whole block device, not on a partition. This prevents overspray + * between sibling partitions. + */ + if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains)) + return -EPERM; + idata = mmc_blk_ioctl_copy_from_user(ic_ptr); if (IS_ERR(idata)) return PTR_ERR(idata); @@ -638,6 +646,14 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev, int i, err = 0, ioc_err = 0; __u64 num_of_cmds; + /* + * The caller must have CAP_SYS_RAWIO, and must be calling this on the + * whole block device, not on a partition. This prevents overspray + * between sibling partitions. + */ + if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains)) + return -EPERM; + if (copy_from_user(&num_of_cmds, &user->num_of_cmds, sizeof(num_of_cmds))) return -EFAULT; @@ -693,14 +709,6 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev, static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - /* - * The caller must have CAP_SYS_RAWIO, and must be calling this on the - * whole block device, not on a partition. This prevents overspray - * between sibling partitions. - */ - if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains)) - return -EPERM; - switch (cmd) { case MMC_IOC_CMD: return mmc_blk_ioctl_cmd(bdev, From 0a060276f0f429bb0402e7fa6581d472934300fb Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 16 Feb 2016 13:06:41 +0900 Subject: [PATCH 272/797] mmc: mmc_spi: Add Card Detect comments and fix CD GPIO case commit bcdc9f260bdce09913db1464be9817170d51044a upstream. This patch fixes the MMC SPI driver from doing polling card detect when a CD GPIO that supports interrupts is specified using the gpios DT property. Without this patch the DT node below results in the following output: spi_gpio: spi-gpio { /* SD2 @ CN12 */ compatible = "spi-gpio"; #address-cells = <1>; #size-cells = <0>; gpio-sck = <&gpio6 16 GPIO_ACTIVE_HIGH>; gpio-mosi = <&gpio6 17 GPIO_ACTIVE_HIGH>; gpio-miso = <&gpio6 18 GPIO_ACTIVE_HIGH>; num-chipselects = <1>; cs-gpios = <&gpio6 21 GPIO_ACTIVE_LOW>; status = "okay"; spi@0 { compatible = "mmc-spi-slot"; reg = <0>; voltage-ranges = <3200 3400>; spi-max-frequency = <25000000>; gpios = <&gpio6 22 GPIO_ACTIVE_LOW>; /* CD */ }; }; # dmesg | grep mmc mmc_spi spi32766.0: SD/MMC host mmc0, no WP, no poweroff, cd polling mmc0: host does not support reading read-only switch, assuming write-enable mmc0: new SDHC card on SPI mmcblk0: mmc0:0000 SU04G 3.69 GiB mmcblk0: p1 With this patch applied the "cd polling" portion above disappears. Signed-off-by: Magnus Damm Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mmc_spi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 1c1b45ef3faf..aad3243a48fc 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1436,6 +1436,12 @@ static int mmc_spi_probe(struct spi_device *spi) host->pdata->cd_debounce); if (status != 0) goto fail_add_host; + + /* The platform has a CD GPIO signal that may support + * interrupts, so let mmc_gpiod_request_cd_irq() decide + * if polling is needed or not. + */ + mmc->caps &= ~MMC_CAP_NEEDS_POLL; mmc_gpiod_request_cd_irq(mmc); } From cd3c71fbb75025f89f64bdd87123d7c247b69a96 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 26 Jan 2016 13:40:58 +0000 Subject: [PATCH 273/797] mmc: sdhci: fix data timeout (part 1) commit fafcfda9e78cae8796d1799f14e6457790797555 upstream. The data timeout gives the minimum amount of time that should be waited before timing out if no data is received from the card. Simply dividing the nanosecond part by 1000 does not give this required guarantee, since such a division rounds down. Use DIV_ROUND_UP() to give the desired timeout. Signed-off-by: Russell King Signed-off-by: Adrian Hunter Tested-by: Gregory CLEMENT Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 8814eb6b83bf..14e118fc0097 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -666,7 +666,7 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) if (!data) target_timeout = cmd->busy_timeout * 1000; else { - target_timeout = data->timeout_ns / 1000; + target_timeout = DIV_ROUND_UP(data->timeout_ns, 1000); if (host->clock) target_timeout += data->timeout_clks / host->clock; } From e113935b8fca965d685084c347864ffb18e32f62 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 26 Jan 2016 13:41:04 +0000 Subject: [PATCH 274/797] mmc: sdhci: fix data timeout (part 2) commit 7f05538af71c7d30b5fc821cbe9f318edc645961 upstream. The calculation for the timeout based on the number of card clocks is incorrect. The calculation assumed: timeout in microseconds = clock cycles / clock in Hz which is clearly a several orders of magnitude wrong. Fix this by multiplying the clock cycles by 1000000 prior to dividing by the Hz based clock. Also, as per part 1, ensure that the division rounds up. As this needs 64-bit math via do_div(), avoid it if the clock cycles is zero. Signed-off-by: Russell King Signed-off-by: Adrian Hunter Tested-by: Gregory CLEMENT Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 14e118fc0097..c993d392b470 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -667,8 +667,19 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) target_timeout = cmd->busy_timeout * 1000; else { target_timeout = DIV_ROUND_UP(data->timeout_ns, 1000); - if (host->clock) - target_timeout += data->timeout_clks / host->clock; + if (host->clock && data->timeout_clks) { + unsigned long long val; + + /* + * data->timeout_clks is in units of clock cycles. + * host->clock is in Hz. target_timeout is in us. + * Hence, us = 1000000 * cycles / Hz. Round up. + */ + val = 1000000 * data->timeout_clks; + if (do_div(val, host->clock)) + target_timeout++; + target_timeout += val; + } } /* From 86beab2febf17b3384f9a1cd333ae7c06a31d0a0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Mar 2016 13:33:55 +0200 Subject: [PATCH 275/797] mmc: sdhci: Fix override of timeout clk wrt max_busy_timeout commit 995136247915c5cee633d55ba23f6eebf67aa567 upstream. Normally the timeout clock frequency is read from the capabilities register. It is also possible to set the value prior to calling sdhci_add_host() in which case that value will override the capabilities register value. However that was being done after calculating max_busy_timeout so that max_busy_timeout was being calculated using the wrong value of timeout_clk. Fix that by moving the override before max_busy_timeout is calculated. The result is that the max_busy_timeout and max_discard increase for BSW devices so that, for example, the time for mkfs.ext4 on a 64GB eMMC drops from about 1 minute 40 seconds to about 20 seconds. Note, in the future, the capabilities setting will be tidied up and this override won't be used anymore. However this fix is needed for stable. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index c993d392b470..1a802af827ed 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -3106,14 +3106,14 @@ int sdhci_add_host(struct sdhci_host *host) if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT) host->timeout_clk *= 1000; + if (override_timeout_clk) + host->timeout_clk = override_timeout_clk; + mmc->max_busy_timeout = host->ops->get_max_timeout_count ? host->ops->get_max_timeout_count(host) : 1 << 27; mmc->max_busy_timeout /= host->timeout_clk; } - if (override_timeout_clk) - host->timeout_clk = override_timeout_clk; - mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23; mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD; From 451e4ff10d95a1390c49afcce5d5c339f97b0048 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 19 Jan 2016 10:01:08 +0100 Subject: [PATCH 276/797] clk: rockchip: rk3368: fix cpuclk mux bit of big cpu-cluster commit 535ebd428aeb07c3327947281306f2943f2c9faa upstream. Both clusters have their mux bit in bit 7 of their respective register. For whatever reason the big cluster currently lists bit 15 which is definitly wrong. Fixes: 3536c97a52db ("clk: rockchip: add rk3368 clock controller") Reported-by: Zhang Qing Signed-off-by: Heiko Stuebner Reviewed-by: zhangqing Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-rk3368.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c index 7e6b783e6eee..c3b2da08e015 100644 --- a/drivers/clk/rockchip/clk-rk3368.c +++ b/drivers/clk/rockchip/clk-rk3368.c @@ -165,7 +165,7 @@ static const struct rockchip_cpuclk_reg_data rk3368_cpuclkb_data = { .core_reg = RK3368_CLKSEL_CON(0), .div_core_shift = 0, .div_core_mask = 0x1f, - .mux_core_shift = 15, + .mux_core_shift = 7, }; static const struct rockchip_cpuclk_reg_data rk3368_cpuclkl_data = { From 20c736ddb2819e071dcefa087b5d33e19b462e4e Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 19 Jan 2016 10:09:22 +0100 Subject: [PATCH 277/797] clk: rockchip: rk3368: fix cpuclk core dividers commit c6d5fe2ca8286f35a79f7345c9378c39d48a1527 upstream. Similar to commit 9880d4277f6a ("clk: rockchip: fix rk3288 cpuclk core dividers") it seems the cpuclk dividers are one to high on the rk3368 as well. And again similar to the previous fix, we opt to make the divider list contain the values to be written to use the same paradigm for them on all supported socs. Fixes: 3536c97a52db ("clk: rockchip: add rk3368 clock controller") Reported-by: Zhang Qing Signed-off-by: Heiko Stuebner Reviewed-by: zhangqing Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-rk3368.c | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c index c3b2da08e015..4e9907272b12 100644 --- a/drivers/clk/rockchip/clk-rk3368.c +++ b/drivers/clk/rockchip/clk-rk3368.c @@ -218,29 +218,29 @@ static const struct rockchip_cpuclk_reg_data rk3368_cpuclkl_data = { } static struct rockchip_cpuclk_rate_table rk3368_cpuclkb_rates[] __initdata = { - RK3368_CPUCLKB_RATE(1512000000, 2, 6, 6), - RK3368_CPUCLKB_RATE(1488000000, 2, 5, 5), - RK3368_CPUCLKB_RATE(1416000000, 2, 5, 5), - RK3368_CPUCLKB_RATE(1200000000, 2, 4, 4), - RK3368_CPUCLKB_RATE(1008000000, 2, 4, 4), - RK3368_CPUCLKB_RATE( 816000000, 2, 3, 3), - RK3368_CPUCLKB_RATE( 696000000, 2, 3, 3), - RK3368_CPUCLKB_RATE( 600000000, 2, 2, 2), - RK3368_CPUCLKB_RATE( 408000000, 2, 2, 2), - RK3368_CPUCLKB_RATE( 312000000, 2, 2, 2), + RK3368_CPUCLKB_RATE(1512000000, 1, 5, 5), + RK3368_CPUCLKB_RATE(1488000000, 1, 4, 4), + RK3368_CPUCLKB_RATE(1416000000, 1, 4, 4), + RK3368_CPUCLKB_RATE(1200000000, 1, 3, 3), + RK3368_CPUCLKB_RATE(1008000000, 1, 3, 3), + RK3368_CPUCLKB_RATE( 816000000, 1, 2, 2), + RK3368_CPUCLKB_RATE( 696000000, 1, 2, 2), + RK3368_CPUCLKB_RATE( 600000000, 1, 1, 1), + RK3368_CPUCLKB_RATE( 408000000, 1, 1, 1), + RK3368_CPUCLKB_RATE( 312000000, 1, 1, 1), }; static struct rockchip_cpuclk_rate_table rk3368_cpuclkl_rates[] __initdata = { - RK3368_CPUCLKL_RATE(1512000000, 2, 7, 7), - RK3368_CPUCLKL_RATE(1488000000, 2, 6, 6), - RK3368_CPUCLKL_RATE(1416000000, 2, 6, 6), - RK3368_CPUCLKL_RATE(1200000000, 2, 5, 5), - RK3368_CPUCLKL_RATE(1008000000, 2, 5, 5), - RK3368_CPUCLKL_RATE( 816000000, 2, 4, 4), - RK3368_CPUCLKL_RATE( 696000000, 2, 3, 3), - RK3368_CPUCLKL_RATE( 600000000, 2, 3, 3), - RK3368_CPUCLKL_RATE( 408000000, 2, 2, 2), - RK3368_CPUCLKL_RATE( 312000000, 2, 2, 2), + RK3368_CPUCLKL_RATE(1512000000, 1, 6, 6), + RK3368_CPUCLKL_RATE(1488000000, 1, 5, 5), + RK3368_CPUCLKL_RATE(1416000000, 1, 5, 5), + RK3368_CPUCLKL_RATE(1200000000, 1, 4, 4), + RK3368_CPUCLKL_RATE(1008000000, 1, 4, 4), + RK3368_CPUCLKL_RATE( 816000000, 1, 3, 3), + RK3368_CPUCLKL_RATE( 696000000, 1, 2, 2), + RK3368_CPUCLKL_RATE( 600000000, 1, 2, 2), + RK3368_CPUCLKL_RATE( 408000000, 1, 1, 1), + RK3368_CPUCLKL_RATE( 312000000, 1, 1, 1), }; static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = { From c7e33d74fbd86e518c6f98b9c6f35badc1d3719d Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 20 Jan 2016 19:22:38 +0100 Subject: [PATCH 278/797] clk: rockchip: rk3368: fix parents of video encoder/decoder commit 0f28d98463498c61c61a38aacbf9f69e92e85e9d upstream. The vdpu and vepu clocks can also be parented to the npll and current parent list also is wrong as it would use the npll as "usbphy" source, so adapt the parent to the correct one. Fixes: 3536c97a52db ("clk: rockchip: add rk3368 clock controller") Signed-off-by: Heiko Stuebner Reviewed-by: zhangqing Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-rk3368.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c index 4e9907272b12..aa419373d35a 100644 --- a/drivers/clk/rockchip/clk-rk3368.c +++ b/drivers/clk/rockchip/clk-rk3368.c @@ -384,10 +384,10 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = { * Clock-Architecture Diagram 3 */ - COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_usb_p, 0, + COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_npll_usb_p, 0, RK3368_CLKSEL_CON(15), 6, 2, MFLAGS, 0, 5, DFLAGS, RK3368_CLKGATE_CON(4), 6, GFLAGS), - COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_usb_p, 0, + COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_npll_usb_p, 0, RK3368_CLKSEL_CON(15), 14, 2, MFLAGS, 8, 5, DFLAGS, RK3368_CLKGATE_CON(4), 7, GFLAGS), From afb1f06758abb5cfa01a54298b57e0cce3bf3273 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 20 Jan 2016 21:47:57 +0100 Subject: [PATCH 279/797] clk: rockchip: rk3368: fix hdmi_cec gate-register commit fd0c0740fac17a014704ef89d8c8b1768711ca59 upstream. Fix a typo making the sclk_hdmi_cec access a wrong register to handle its gate. Fixes: 3536c97a52db ("clk: rockchip: add rk3368 clock controller") Signed-off-by: Heiko Stuebner Reviewed-by: zhangqing Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-rk3368.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c index aa419373d35a..1b148694b633 100644 --- a/drivers/clk/rockchip/clk-rk3368.c +++ b/drivers/clk/rockchip/clk-rk3368.c @@ -442,7 +442,7 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = { GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0, RK3368_CLKGATE_CON(4), 13, GFLAGS), GATE(SCLK_HDMI_CEC, "sclk_hdmi_cec", "xin32k", 0, - RK3368_CLKGATE_CON(5), 12, GFLAGS), + RK3368_CLKGATE_CON(4), 12, GFLAGS), COMPOSITE_NODIV(0, "vip_src", mux_pll_src_cpll_gpll_p, 0, RK3368_CLKSEL_CON(21), 15, 1, MFLAGS, From 5f9403e710e03098b06c321aee6b31621efca5b1 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Tue, 26 Jan 2016 16:34:00 +0300 Subject: [PATCH 280/797] clk: rockchip: add hclk_cpubus to the list of rk3188 critical clocks commit e8b63288b37dbb8457b510c9d96f6006da4653f6 upstream. hclk_cpubus needs to keep running because it is needed for devices like the rom, i2s0 or spdif to be accessible via cpu. Without that all accesses to devices (readl/writel) return wrong data. So add it to the list of critical clocks. Fixes: 78eaf6095cc763c ("clk: rockchip: disable unused clocks") Signed-off-by: Alexander Kochetkov Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-rk3188.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index abb47608713b..fe728f8dcbe4 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -718,6 +718,7 @@ static const char *const rk3188_critical_clocks[] __initconst = { "hclk_peri", "pclk_cpu", "pclk_peri", + "hclk_cpubus" }; static void __init rk3188_common_clk_init(struct device_node *np) From b3822a1078c87a6f74270741bc4cc660e4f11bae Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Feb 2016 19:03:57 -0800 Subject: [PATCH 281/797] clk: bcm2835: Fix setting of PLL divider clock rates commit 773b3966dd3cdaeb68e7f2edfe5656abac1dc411 upstream. Our dividers weren't being set successfully because CM_PASSWORD wasn't included in the register write. It looks easier to just compute the divider to write ourselves than to update clk-divider for the ability to OR in some arbitrary bits on write. Fixes about half of the video modes on my HDMI monitor (everything except 720x400). Signed-off-by: Eric Anholt Signed-off-by: Michael Turquette Signed-off-by: Greg Kroah-Hartman --- drivers/clk/bcm/clk-bcm2835.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 39bf5820297e..4f9830c1b121 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -1097,13 +1097,15 @@ static int bcm2835_pll_divider_set_rate(struct clk_hw *hw, struct bcm2835_pll_divider *divider = bcm2835_pll_divider_from_hw(hw); struct bcm2835_cprman *cprman = divider->cprman; const struct bcm2835_pll_divider_data *data = divider->data; - u32 cm; - int ret; + u32 cm, div, max_div = 1 << A2W_PLL_DIV_BITS; - ret = clk_divider_ops.set_rate(hw, rate, parent_rate); - if (ret) - return ret; + div = DIV_ROUND_UP_ULL(parent_rate, rate); + div = min(div, max_div); + if (div == max_div) + div = 0; + + cprman_write(cprman, data->a2w_reg, div); cm = cprman_read(cprman, data->cm_reg); cprman_write(cprman, data->cm_reg, cm | data->load_mask); cprman_write(cprman, data->cm_reg, cm & ~data->load_mask); From 9b3e8070857db21b9d3973387cd9aae5944d71ad Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Mon, 14 Mar 2016 22:47:37 -0700 Subject: [PATCH 282/797] target: Fix target_release_cmd_kref shutdown comp leak commit 5e47f1985d7107331c3f64fb3ec83d66fd73577e upstream. This patch fixes an active I/O shutdown bug for fabric drivers using target_wait_for_sess_cmds(), where se_cmd descriptor shutdown would result in hung tasks waiting indefinitely for se_cmd->cmd_wait_comp to complete(). To address this bug, drop the incorrect list_del_init() usage in target_wait_for_sess_cmds() and always complete() during se_cmd target_release_cmd_kref() put, in order to let caller invoke the final fabric release callback into se_cmd->se_tfo->release_cmd() code. Reported-by: Himanshu Madhani Tested-by: Himanshu Madhani Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_transport.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 94f4ffac723f..d151bc3d6971 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2618,8 +2618,6 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) list_for_each_entry_safe(se_cmd, tmp_cmd, &se_sess->sess_wait_list, se_cmd_list) { - list_del_init(&se_cmd->se_cmd_list); - pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:" " %d\n", se_cmd, se_cmd->t_state, se_cmd->se_tfo->get_cmd_state(se_cmd)); From a91eb042e1bacdc61a2ca5cb184aa99c24ed5cdb Mon Sep 17 00:00:00 2001 From: Jenny Derzhavetz Date: Wed, 24 Feb 2016 19:23:58 +0200 Subject: [PATCH 283/797] iser-target: Fix identification of login rx descriptor type commit b89a7c25462b164db280abc3b05d4d9d888d40e9 upstream. Once connection request is accepted, one rx descriptor is posted to receive login request. This descriptor has rx type, but is outside the main pool of rx descriptors, and thus was mistreated as tx type. Signed-off-by: Jenny Derzhavetz Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 8a51c3b5d657..addb57265cb2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2035,7 +2035,8 @@ is_isert_tx_desc(struct isert_conn *isert_conn, void *wr_id) void *start = isert_conn->rx_descs; int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->rx_descs); - if (wr_id >= start && wr_id < start + len) + if ((wr_id >= start && wr_id < start + len) || + (wr_id == isert_conn->login_req_buf)) return false; return true; From b0d31bbebb1f293ef196c4c84e682b58b0e9cf7b Mon Sep 17 00:00:00 2001 From: Jenny Derzhavetz Date: Wed, 24 Feb 2016 19:23:59 +0200 Subject: [PATCH 284/797] iser-target: Add new state ISER_CONN_BOUND to isert_conn commit aea92980601f7ddfcb3c54caa53a43726314fe46 upstream. We need an indication that isert_conn->iscsi_conn binding has happened so we'll know not to invoke a connection reinstatement on an unbound connection which will lead to a bogus isert_conn->conn dereferece. Signed-off-by: Jenny Derzhavetz Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 7 +++++-- drivers/infiniband/ulp/isert/ib_isert.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index addb57265cb2..0919d6add4e5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -820,7 +820,7 @@ isert_put_conn(struct isert_conn *isert_conn) * @isert_conn: isert connection struct * * Notes: - * In case the connection state is FULL_FEATURE, move state + * In case the connection state is BOUND, move state * to TEMINATING and start teardown sequence (rdma_disconnect). * In case the connection state is UP, complete flush as well. * @@ -836,6 +836,7 @@ isert_conn_terminate(struct isert_conn *isert_conn) case ISER_CONN_TERMINATING: break; case ISER_CONN_UP: + case ISER_CONN_BOUND: case ISER_CONN_FULL_FEATURE: /* FALLTHRU */ isert_info("Terminating conn %p state %d\n", isert_conn, isert_conn->state); @@ -2062,7 +2063,8 @@ isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc) isert_completion_put(desc, isert_cmd, ib_dev, true); } else { isert_conn->post_recv_buf_count--; - if (!isert_conn->post_recv_buf_count) + if (!isert_conn->post_recv_buf_count && + isert_conn->state >= ISER_CONN_BOUND) iscsit_cause_connection_reinstatement(isert_conn->conn, 0); } } @@ -3194,6 +3196,7 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) conn->context = isert_conn; isert_conn->conn = conn; + isert_conn->state = ISER_CONN_BOUND; isert_set_conn_info(np, conn, isert_conn); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 3d7fbc47c343..d9635203be63 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -50,6 +50,7 @@ enum iser_ib_op_code { enum iser_conn_state { ISER_CONN_INIT, ISER_CONN_UP, + ISER_CONN_BOUND, ISER_CONN_FULL_FEATURE, ISER_CONN_TERMINATING, ISER_CONN_DOWN, From 60f0f01da74b14a0b27becf30a70155a8db23445 Mon Sep 17 00:00:00 2001 From: Jenny Derzhavetz Date: Wed, 24 Feb 2016 19:24:00 +0200 Subject: [PATCH 285/797] iser-target: Separate flows for np listeners and connections cma events commit f81bf458208ef6d12b2fc08091204e3859dcdba4 upstream. No need to restrict this check to specific events. Signed-off-by: Jenny Derzhavetz Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 0919d6add4e5..48e2394cd8c7 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -884,14 +884,9 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id, enum rdma_cm_event_type event) { struct isert_np *isert_np = cma_id->context; - struct isert_conn *isert_conn; + struct isert_conn *isert_conn = cma_id->qp->qp_context; bool terminating = false; - if (isert_np->cm_id == cma_id) - return isert_np_cma_handler(cma_id->context, event); - - isert_conn = cma_id->qp->qp_context; - mutex_lock(&isert_conn->mutex); terminating = (isert_conn->state == ISER_CONN_TERMINATING); isert_conn_terminate(isert_conn); @@ -930,12 +925,16 @@ isert_connect_error(struct rdma_cm_id *cma_id) static int isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { + struct isert_np *isert_np = cma_id->context; int ret = 0; isert_info("%s (%d): status %d id %p np %p\n", rdma_event_msg(event->event), event->event, event->status, cma_id, cma_id->context); + if (isert_np->cm_id == cma_id) + return isert_np_cma_handler(cma_id->context, event->event); + switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: ret = isert_connect_request(cma_id, event); From 48f447bcebd889aab7193659841de7962bf52a56 Mon Sep 17 00:00:00 2001 From: Jenny Derzhavetz Date: Wed, 24 Feb 2016 19:24:01 +0200 Subject: [PATCH 286/797] iser-target: Rework connection termination commit 6d1fba0c2cc7efe42fd761ecbba833ed0ea7b07e upstream. When we receive an event that triggers connection termination, we have a a couple of things we may want to do: 1. In case we are already terminating, bailout early 2. In case we are connected but not bound, disconnect and schedule a connection cleanup silently (don't reinstate) 3. In case we are connected and bound, disconnect and reinstate the connection This rework fixes a bug that was detected against a mis-behaved initiator which rejected our rdma_cm accept, in this stage the isert_conn is no bound and reinstate caused a bogus dereference. What's great about this is that we don't need the post_recv_buf_count anymore, so get rid of it. Signed-off-by: Jenny Derzhavetz Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 107 ++++++++++++------------ drivers/infiniband/ulp/isert/ib_isert.h | 1 - 2 files changed, 52 insertions(+), 56 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 48e2394cd8c7..b0edb66a291b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -66,6 +66,7 @@ isert_rdma_accept(struct isert_conn *isert_conn); struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np); static void isert_release_work(struct work_struct *work); +static void isert_wait4flush(struct isert_conn *isert_conn); static inline bool isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) @@ -815,6 +816,25 @@ isert_put_conn(struct isert_conn *isert_conn) kref_put(&isert_conn->kref, isert_release_kref); } +static void +isert_handle_unbound_conn(struct isert_conn *isert_conn) +{ + struct isert_np *isert_np = isert_conn->cm_id->context; + + mutex_lock(&isert_np->mutex); + if (!list_empty(&isert_conn->node)) { + /* + * This means iscsi doesn't know this connection + * so schedule a cleanup ourselves + */ + list_del_init(&isert_conn->node); + isert_put_conn(isert_conn); + complete(&isert_conn->wait); + queue_work(isert_release_wq, &isert_conn->release_work); + } + mutex_unlock(&isert_np->mutex); +} + /** * isert_conn_terminate() - Initiate connection termination * @isert_conn: isert connection struct @@ -832,24 +852,19 @@ isert_conn_terminate(struct isert_conn *isert_conn) { int err; - switch (isert_conn->state) { - case ISER_CONN_TERMINATING: - break; - case ISER_CONN_UP: - case ISER_CONN_BOUND: - case ISER_CONN_FULL_FEATURE: /* FALLTHRU */ - isert_info("Terminating conn %p state %d\n", - isert_conn, isert_conn->state); - isert_conn->state = ISER_CONN_TERMINATING; - err = rdma_disconnect(isert_conn->cm_id); - if (err) - isert_warn("Failed rdma_disconnect isert_conn %p\n", - isert_conn); - break; - default: - isert_warn("conn %p teminating in state %d\n", - isert_conn, isert_conn->state); - } + if (isert_conn->state >= ISER_CONN_TERMINATING) + return; + + isert_info("Terminating conn %p state %d\n", + isert_conn, isert_conn->state); + isert_conn->state = ISER_CONN_TERMINATING; + err = rdma_disconnect(isert_conn->cm_id); + if (err) + isert_warn("Failed rdma_disconnect isert_conn %p\n", + isert_conn); + + isert_info("conn %p completing wait\n", isert_conn); + complete(&isert_conn->wait); } static int @@ -883,30 +898,27 @@ static int isert_disconnected_handler(struct rdma_cm_id *cma_id, enum rdma_cm_event_type event) { - struct isert_np *isert_np = cma_id->context; struct isert_conn *isert_conn = cma_id->qp->qp_context; - bool terminating = false; mutex_lock(&isert_conn->mutex); - terminating = (isert_conn->state == ISER_CONN_TERMINATING); - isert_conn_terminate(isert_conn); + switch (isert_conn->state) { + case ISER_CONN_TERMINATING: + break; + case ISER_CONN_UP: + isert_conn_terminate(isert_conn); + isert_wait4flush(isert_conn); + isert_handle_unbound_conn(isert_conn); + break; + case ISER_CONN_BOUND: + case ISER_CONN_FULL_FEATURE: /* FALLTHRU */ + iscsit_cause_connection_reinstatement(isert_conn->conn, 0); + break; + default: + isert_warn("conn %p teminating in state %d\n", + isert_conn, isert_conn->state); + } mutex_unlock(&isert_conn->mutex); - isert_info("conn %p completing wait\n", isert_conn); - complete(&isert_conn->wait); - - if (terminating) - goto out; - - mutex_lock(&isert_np->mutex); - if (!list_empty(&isert_conn->node)) { - list_del_init(&isert_conn->node); - isert_put_conn(isert_conn); - queue_work(isert_release_wq, &isert_conn->release_work); - } - mutex_unlock(&isert_np->mutex); - -out: return 0; } @@ -980,13 +992,10 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count) rx_wr--; rx_wr->next = NULL; /* mark end of work requests list */ - isert_conn->post_recv_buf_count += count; ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, &rx_wr_failed); - if (ret) { + if (ret) isert_err("ib_post_recv() failed with ret: %d\n", ret); - isert_conn->post_recv_buf_count -= count; - } return ret; } @@ -1002,12 +1011,9 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) rx_wr.num_sge = 1; rx_wr.next = NULL; - isert_conn->post_recv_buf_count++; ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed); - if (ret) { + if (ret) isert_err("ib_post_recv() failed with ret: %d\n", ret); - isert_conn->post_recv_buf_count--; - } return ret; } @@ -1120,12 +1126,9 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn) rx_wr.sg_list = &sge; rx_wr.num_sge = 1; - isert_conn->post_recv_buf_count++; ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail); - if (ret) { + if (ret) isert_err("ib_post_recv() failed: %d\n", ret); - isert_conn->post_recv_buf_count--; - } return ret; } @@ -1620,7 +1623,6 @@ isert_rcv_completion(struct iser_rx_desc *desc, ib_dma_sync_single_for_device(ib_dev, rx_dma, rx_buflen, DMA_FROM_DEVICE); - isert_conn->post_recv_buf_count--; } static int @@ -2060,11 +2062,6 @@ isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc) isert_unmap_tx_desc(desc, ib_dev); else isert_completion_put(desc, isert_cmd, ib_dev, true); - } else { - isert_conn->post_recv_buf_count--; - if (!isert_conn->post_recv_buf_count && - isert_conn->state >= ISER_CONN_BOUND) - iscsit_cause_connection_reinstatement(isert_conn->conn, 0); } } diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index d9635203be63..1874d21daee0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -145,7 +145,6 @@ struct isert_device; struct isert_conn { enum iser_conn_state state; - int post_recv_buf_count; u32 responder_resources; u32 initiator_depth; bool pi_support; From 9ef1ecc409b1e1113bf5e4b6bdd47137e5f9cebb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 29 Feb 2016 20:21:21 -0500 Subject: [PATCH 287/797] nfsd4: fix bad bounds checking commit 4aed9c46afb80164401143aa0fdcfe3798baa9d5 upstream. A number of spots in the xdr decoding follow a pattern like n = be32_to_cpup(p++); READ_BUF(n + 4); where n is a u32. The only bounds checking is done in READ_BUF itself, but since it's checking (n + 4), it won't catch cases where n is very large, (u32)(-4) or higher. I'm not sure exactly what the consequences are, but we've seen crashes soon after. Instead, just break these up into two READ_BUF()s. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 51c9e9ca39a4..12935209deca 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1072,8 +1072,9 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename READ_BUF(4); rename->rn_snamelen = be32_to_cpup(p++); - READ_BUF(rename->rn_snamelen + 4); + READ_BUF(rename->rn_snamelen); SAVEMEM(rename->rn_sname, rename->rn_snamelen); + READ_BUF(4); rename->rn_tnamelen = be32_to_cpup(p++); READ_BUF(rename->rn_tnamelen); SAVEMEM(rename->rn_tname, rename->rn_tnamelen); @@ -1155,13 +1156,14 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient READ_BUF(8); setclientid->se_callback_prog = be32_to_cpup(p++); setclientid->se_callback_netid_len = be32_to_cpup(p++); - - READ_BUF(setclientid->se_callback_netid_len + 4); + READ_BUF(setclientid->se_callback_netid_len); SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); + READ_BUF(4); setclientid->se_callback_addr_len = be32_to_cpup(p++); - READ_BUF(setclientid->se_callback_addr_len + 4); + READ_BUF(setclientid->se_callback_addr_len); SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); + READ_BUF(4); setclientid->se_callback_ident = be32_to_cpup(p++); DECODE_TAIL; @@ -1815,8 +1817,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) READ_BUF(4); argp->taglen = be32_to_cpup(p++); - READ_BUF(argp->taglen + 8); + READ_BUF(argp->taglen); SAVEMEM(argp->tag, argp->taglen); + READ_BUF(8); argp->minorversion = be32_to_cpup(p++); argp->opcnt = be32_to_cpup(p++); max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); From 56fb92d684cc51baf3a421a3ac19af441a52f413 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 2 Mar 2016 16:36:21 -0800 Subject: [PATCH 288/797] nfsd: fix deadlock secinfo+readdir compound commit 2f6fc056e899bd0144a08da5cacaecbe8997cd74 upstream. nfsd_lookup_dentry exits with the parent filehandle locked. fh_put also unlocks if necessary (nfsd filehandle locking is probably too lenient), so it gets unlocked eventually, but if the following op in the compound needs to lock it again, we can deadlock. A fuzzer ran into this; normal clients don't send a secinfo followed by a readdir in the same compound. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a9f096c7e99f..7d5351cd67fb 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -877,6 +877,7 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &exp, &dentry); if (err) return err; + fh_unlock(&cstate->current_fh); if (d_really_is_negative(dentry)) { exp_put(exp); err = nfserr_noent; From d287698c43d7915e422d298985891eb609b511b8 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Fri, 11 Mar 2016 11:43:39 +0100 Subject: [PATCH 289/797] ARM: dts: at91: sama5d3 Xplained: don't disable hsmci regulator commit ae3fc8ea08e405682f1fa959f94b6e4126afbc1b upstream. If enabling the hsmci regulator on card detection, the board can reboot on sd card insertion. Keeping the regulator always enabled fixes this issue. Signed-off-by: Ludovic Desroches Fixes: 1b53e3416dd0 ("ARM: at91/dt: sama5d3 xplained: add fixed regulator for vmmc0") Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91-sama5d3_xplained.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts index ff888d21c786..f3e2b96c06a3 100644 --- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts @@ -303,6 +303,7 @@ vcc_mmc0_reg: fixedregulator@0 { regulator-name = "mmc0-card-supply"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; + regulator-always-on; }; gpio_keys { From fe81b4d996fbf36cf4e84869b6c5f4394f5e5af9 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Fri, 11 Mar 2016 11:35:10 +0100 Subject: [PATCH 290/797] ARM: dts: at91: sama5d4 Xplained: don't disable hsmci regulator commit b02acd4e62602a6ab307da84388a16bf60106c48 upstream. If enabling the hsmci regulator on card detection, the board can reboot on sd card insertion. Keeping the regulator always enabled fixes this issue. Signed-off-by: Ludovic Desroches Fixes: 8d545f32bd77 ("ARM: at91/dt: sama5d4 xplained: add regulators for v(q)mmc1 supplies") Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91-sama5d4_xplained.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index 569026e8f96c..da84e65b56ef 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -268,5 +268,6 @@ vcc_mmc1_reg: fixedregulator@1 { regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; vin-supply = <&vcc_3v3_reg>; + regulator-always-on; }; }; From b05e5a587ddc11255de76657e6b4b0e960783cc3 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 23 Mar 2016 00:11:20 +0100 Subject: [PATCH 291/797] ACPI / PM: Runtime resume devices when waking from hibernate commit fbda4b38fa3995aa0777fe9cbbdcb223c6292083 upstream. Commit 58a1fbbb2ee8 ("PM / PCI / ACPI: Kick devices that might have been reset by firmware") added a runtime resume for devices that were runtime suspended when the system entered suspend-to-RAM. Briefly, the motivation was to ensure that devices did not remain in a reset-power-on state after resume, potentially preventing deep SoC-wide low-power states from being entered on idle. Currently we're not doing the same when leaving suspend-to-disk and this asymmetry is a problem if drivers rely on the automatic resume triggered by pm_complete_with_resume_check(). Fix it. Fixes: 58a1fbbb2ee8 (PM / PCI / ACPI: Kick devices that might have been reset by firmware) Signed-off-by: Lukas Wunner Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sleep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 0d94621dc856..e3322adaaae0 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -714,6 +714,7 @@ static int acpi_hibernation_enter(void) static void acpi_hibernation_leave(void) { + pm_set_resume_via_firmware(); /* * If ACPI is not enabled by the BIOS and the boot kernel, we need to * enable it here. From d78ddcfbe7ab8c5f4ff0b8f20b2bbda710fc0e91 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Mar 2016 13:50:03 -0400 Subject: [PATCH 292/797] writeback, cgroup: fix premature wb_put() in locked_inode_to_wb_and_lock_list() commit 614a4e3773148a31f58dc174bbf578ceb63510c2 upstream. locked_inode_to_wb_and_lock_list() wb_get()'s the wb associated with the target inode, unlocks inode, locks the wb's list_lock and verifies that the inode is still associated with the wb. To prevent the wb going away between dropping inode lock and acquiring list_lock, the wb is pinned while inode lock is held. The wb reference is put right after acquiring list_lock citing that the wb won't be dereferenced anymore. This isn't true. If the inode is still associated with the wb, the inode has reference and it's safe to return the wb; however, if inode has been switched, the wb still needs to be unlocked which is a dereference and can lead to use-after-free if it it races with wb destruction. Fix it by putting the reference after releasing list_lock. Signed-off-by: Tejun Heo Fixes: 87e1d789bf55 ("writeback: implement [locked_]inode_to_wb_and_lock_list()") Tested-by: Tahsin Erdogan Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 7a8ea1351584..e84b698e1d6c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -281,13 +281,15 @@ locked_inode_to_wb_and_lock_list(struct inode *inode) wb_get(wb); spin_unlock(&inode->i_lock); spin_lock(&wb->list_lock); - wb_put(wb); /* not gonna deref it anymore */ /* i_wb may have changed inbetween, can't use inode_to_wb() */ - if (likely(wb == inode->i_wb)) - return wb; /* @inode already has ref */ + if (likely(wb == inode->i_wb)) { + wb_put(wb); /* @inode already has ref */ + return wb; + } spin_unlock(&wb->list_lock); + wb_put(wb); cpu_relax(); spin_lock(&inode->i_lock); } From 842ec116c7070c8bfc785b609acb19fb29a59cb0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Mar 2016 13:52:04 -0400 Subject: [PATCH 293/797] writeback, cgroup: fix use of the wrong bdi_writeback which mismatches the inode commit aaf2559332ba272671bb870464a99b909b29a3a1 upstream. When cgroup writeback is in use, there can be multiple wb's (bdi_writeback's) per bdi and an inode may switch among them dynamically. In a couple places, the wrong wb was used leading to performing operations on the wrong list under the wrong lock corrupting the io lists. * writeback_single_inode() was taking @wb parameter and used it to remove the inode from io lists if it becomes clean after writeback. The callers of this function were always passing in the root wb regardless of the actual wb that the inode was associated with, which could also change while writeback is in progress. Fix it by dropping the @wb parameter and using inode_to_wb_and_lock_list() to determine and lock the associated wb. * After writeback_sb_inodes() writes out an inode, it re-locks @wb and inode to remove it from or move it to the right io list. It assumes that the inode is still associated with @wb; however, the inode may have switched to another wb while writeback was in progress. Fix it by using inode_to_wb_and_lock_list() to determine and lock the associated wb after writeback is complete. As the function requires the original @wb->list_lock locked for the next iteration, in the unlikely case where the inode has changed association, switch the locks. Kudos to Tahsin for pinpointing these subtle breakages. Signed-off-by: Tejun Heo Fixes: d10c80955265 ("writeback: implement foreign cgroup inode bdi_writeback switching") Link: http://lkml.kernel.org/g/CAAeU0aMYeM_39Y2+PaRvyB1nqAPYZSNngJ1eBRmrxn7gKAt2Mg@mail.gmail.com Reported-and-diagnosed-by: Tahsin Erdogan Tested-by: Tahsin Erdogan Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e84b698e1d6c..60d6fc2e0e4b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1341,10 +1341,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode() * and does more profound writeback list handling in writeback_sb_inodes(). */ -static int -writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, - struct writeback_control *wbc) +static int writeback_single_inode(struct inode *inode, + struct writeback_control *wbc) { + struct bdi_writeback *wb; int ret = 0; spin_lock(&inode->i_lock); @@ -1382,7 +1382,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, ret = __writeback_single_inode(inode, wbc); wbc_detach_inode(wbc); - spin_lock(&wb->list_lock); + + wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); /* * If inode is clean, remove it from writeback lists. Otherwise don't @@ -1457,6 +1458,7 @@ static long writeback_sb_inodes(struct super_block *sb, while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); + struct bdi_writeback *tmp_wb; if (inode->i_sb != sb) { if (work->sb) { @@ -1547,15 +1549,23 @@ static long writeback_sb_inodes(struct super_block *sb, cond_resched(); } - - spin_lock(&wb->list_lock); + /* + * Requeue @inode if still dirty. Be careful as @inode may + * have been switched to another wb in the meantime. + */ + tmp_wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) wrote++; - requeue_inode(inode, wb, &wbc); + requeue_inode(inode, tmp_wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); + if (unlikely(tmp_wb != wb)) { + spin_unlock(&tmp_wb->list_lock); + spin_lock(&wb->list_lock); + } + /* * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. @@ -2342,7 +2352,6 @@ EXPORT_SYMBOL(sync_inodes_sb); */ int write_inode_now(struct inode *inode, int sync) { - struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, @@ -2354,7 +2363,7 @@ int write_inode_now(struct inode *inode, int sync) wbc.nr_to_write = 0; might_sleep(); - return writeback_single_inode(inode, wb, &wbc); + return writeback_single_inode(inode, &wbc); } EXPORT_SYMBOL(write_inode_now); @@ -2371,7 +2380,7 @@ EXPORT_SYMBOL(write_inode_now); */ int sync_inode(struct inode *inode, struct writeback_control *wbc) { - return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc); + return writeback_single_inode(inode, wbc); } EXPORT_SYMBOL(sync_inode); From ed12031e713305ef7836d5672ae2ab4e1446ce08 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 17 Mar 2016 17:12:54 -0700 Subject: [PATCH 294/797] Input: synaptics - handle spurious release of trackstick buttons, again commit 82be788c96ed5978d3cb4a00079e26b981a3df3f upstream. Looks like the fimware 8.2 still has the extra buttons spurious release bug. Link: https://bugzilla.kernel.org/show_bug.cgi?id=114321 Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 6025eb430c0a..a41d8328c064 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -862,8 +862,9 @@ static void synaptics_report_ext_buttons(struct psmouse *psmouse, if (!SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap)) return; - /* Bug in FW 8.1, buttons are reported only when ExtBit is 1 */ - if (SYN_ID_FULL(priv->identity) == 0x801 && + /* Bug in FW 8.1 & 8.2, buttons are reported only when ExtBit is 1 */ + if ((SYN_ID_FULL(priv->identity) == 0x801 || + SYN_ID_FULL(priv->identity) == 0x802) && !((psmouse->packet[0] ^ psmouse->packet[3]) & 0x02)) return; From af18c4ca4b1728e2149844656bbf1aa8d7382682 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 17 Mar 2016 14:00:17 -0700 Subject: [PATCH 295/797] Input: ims-pcu - sanity check against missing interfaces commit a0ad220c96692eda76b2e3fd7279f3dcd1d8a8ff upstream. A malicious device missing interface can make the driver oops. Add sanity checking. Signed-off-by: Oliver Neukum Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/ims-pcu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index ac1fa5f44580..9c0ea36913b4 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1663,6 +1663,8 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc pcu->ctrl_intf = usb_ifnum_to_if(pcu->udev, union_desc->bMasterInterface0); + if (!pcu->ctrl_intf) + return -EINVAL; alt = pcu->ctrl_intf->cur_altsetting; pcu->ep_ctrl = &alt->endpoint[0].desc; @@ -1670,6 +1672,8 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc pcu->data_intf = usb_ifnum_to_if(pcu->udev, union_desc->bSlaveInterface0); + if (!pcu->data_intf) + return -EINVAL; alt = pcu->data_intf->cur_altsetting; if (alt->desc.bNumEndpoints != 2) { From a1d0a23831ccde9dbd5279a5d45790a96f18ad32 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Wed, 23 Mar 2016 11:53:46 -0700 Subject: [PATCH 296/797] Input: ati_remote2 - fix crashes on detecting device with invalid descriptor commit 950336ba3e4a1ffd2ca60d29f6ef386dd2c7351d upstream. The ati_remote2 driver expects at least two interfaces with one endpoint each. If given malicious descriptor that specify one interface or no endpoints, it will crash in the probe function. Ensure there is at least two interfaces and one endpoint for each interface before using it. The full disclosure: http://seclists.org/bugtraq/2016/Mar/90 Reported-by: Ralf Spenneberg Signed-off-by: Vladis Dronov Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/ati_remote2.c | 36 ++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c index cfd58e87da26..1c5914cae853 100644 --- a/drivers/input/misc/ati_remote2.c +++ b/drivers/input/misc/ati_remote2.c @@ -817,26 +817,49 @@ static int ati_remote2_probe(struct usb_interface *interface, const struct usb_d ar2->udev = udev; + /* Sanity check, first interface must have an endpoint */ + if (alt->desc.bNumEndpoints < 1 || !alt->endpoint) { + dev_err(&interface->dev, + "%s(): interface 0 must have an endpoint\n", __func__); + r = -ENODEV; + goto fail1; + } ar2->intf[0] = interface; ar2->ep[0] = &alt->endpoint[0].desc; + /* Sanity check, the device must have two interfaces */ ar2->intf[1] = usb_ifnum_to_if(udev, 1); + if ((udev->actconfig->desc.bNumInterfaces < 2) || !ar2->intf[1]) { + dev_err(&interface->dev, "%s(): need 2 interfaces, found %d\n", + __func__, udev->actconfig->desc.bNumInterfaces); + r = -ENODEV; + goto fail1; + } + r = usb_driver_claim_interface(&ati_remote2_driver, ar2->intf[1], ar2); if (r) goto fail1; + + /* Sanity check, second interface must have an endpoint */ alt = ar2->intf[1]->cur_altsetting; + if (alt->desc.bNumEndpoints < 1 || !alt->endpoint) { + dev_err(&interface->dev, + "%s(): interface 1 must have an endpoint\n", __func__); + r = -ENODEV; + goto fail2; + } ar2->ep[1] = &alt->endpoint[0].desc; r = ati_remote2_urb_init(ar2); if (r) - goto fail2; + goto fail3; ar2->channel_mask = channel_mask; ar2->mode_mask = mode_mask; r = ati_remote2_setup(ar2, ar2->channel_mask); if (r) - goto fail2; + goto fail3; usb_make_path(udev, ar2->phys, sizeof(ar2->phys)); strlcat(ar2->phys, "/input0", sizeof(ar2->phys)); @@ -845,11 +868,11 @@ static int ati_remote2_probe(struct usb_interface *interface, const struct usb_d r = sysfs_create_group(&udev->dev.kobj, &ati_remote2_attr_group); if (r) - goto fail2; + goto fail3; r = ati_remote2_input_init(ar2); if (r) - goto fail3; + goto fail4; usb_set_intfdata(interface, ar2); @@ -857,10 +880,11 @@ static int ati_remote2_probe(struct usb_interface *interface, const struct usb_d return 0; - fail3: + fail4: sysfs_remove_group(&udev->dev.kobj, &ati_remote2_attr_group); - fail2: + fail3: ati_remote2_urb_cleanup(ar2); + fail2: usb_driver_release_interface(&ati_remote2_driver, ar2->intf[1]); fail1: kfree(ar2); From e4b0e673428391a5ce5827d594d9809ad57fba9e Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 25 Mar 2016 14:21:26 -0700 Subject: [PATCH 297/797] ocfs2/dlm: fix race between convert and recovery commit ac7cf246dfdbec3d8fed296c7bf30e16f5099dac upstream. There is a race window between dlmconvert_remote and dlm_move_lockres_to_recovery_list, which will cause a lock with OCFS2_LOCK_BUSY in grant list, thus system hangs. dlmconvert_remote { spin_lock(&res->spinlock); list_move_tail(&lock->list, &res->converting); lock->convert_pending = 1; spin_unlock(&res->spinlock); status = dlm_send_remote_convert_request(); >>>>>> race window, master has queued ast and return DLM_NORMAL, and then down before sending ast. this node detects master down and calls dlm_move_lockres_to_recovery_list, which will revert the lock to grant list. Then OCFS2_LOCK_BUSY won't be cleared as new master won't send ast any more because it thinks already be authorized. spin_lock(&res->spinlock); lock->convert_pending = 0; if (status != DLM_NORMAL) dlm_revert_pending_convert(res, lock); spin_unlock(&res->spinlock); } In this case, check if res->state has DLM_LOCK_RES_RECOVERING bit set (res is still in recovering) or res master changed (new master has finished recovery), reset the status to DLM_RECOVERING, then it will retry convert. Signed-off-by: Joseph Qi Reported-by: Yiwen Jiang Reviewed-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Tariq Saeed Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlm/dlmconvert.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index e36d63ff1783..84de55ed865a 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -262,6 +262,7 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, struct dlm_lock *lock, int flags, int type) { enum dlm_status status; + u8 old_owner = res->owner; mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); @@ -316,11 +317,19 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); res->state &= ~DLM_LOCK_RES_IN_PROGRESS; lock->convert_pending = 0; - /* if it failed, move it back to granted queue */ + /* if it failed, move it back to granted queue. + * if master returns DLM_NORMAL and then down before sending ast, + * it may have already been moved to granted queue, reset to + * DLM_RECOVERING and retry convert */ if (status != DLM_NORMAL) { if (status != DLM_NOTQUEUED) dlm_error(status); dlm_revert_pending_convert(res, lock); + } else if ((res->state & DLM_LOCK_RES_RECOVERING) || + (old_owner != res->owner)) { + mlog(0, "res %.*s is in recovering or has been recovered.\n", + res->lockname.len, res->lockname.name); + status = DLM_RECOVERING; } bail: spin_unlock(&res->spinlock); From eae2b56828230fe326167d67a0ec6e777e69afee Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 25 Mar 2016 14:21:29 -0700 Subject: [PATCH 298/797] ocfs2/dlm: fix BUG in dlm_move_lockres_to_recovery_list commit be12b299a83fc807bbaccd2bcb8ec50cbb0cb55c upstream. When master handles convert request, it queues ast first and then returns status. This may happen that the ast is sent before the request status because the above two messages are sent by two threads. And right after the ast is sent, if master down, it may trigger BUG in dlm_move_lockres_to_recovery_list in the requested node because ast handler moves it to grant list without clear lock->convert_pending. So remove BUG_ON statement and check if the ast is processed in dlmconvert_remote. Signed-off-by: Joseph Qi Reported-by: Yiwen Jiang Cc: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Tariq Saeed Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlm/dlmconvert.c | 13 +++++++++++++ fs/ocfs2/dlm/dlmrecovery.c | 1 - 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 84de55ed865a..f90931335c6b 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -288,6 +288,19 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, status = DLM_DENIED; goto bail; } + + if (lock->ml.type == type && lock->ml.convert_type == LKM_IVMODE) { + mlog(0, "last convert request returned DLM_RECOVERING, but " + "owner has already queued and sent ast to me. res %.*s, " + "(cookie=%u:%llu, type=%d, conv=%d)\n", + res->lockname.len, res->lockname.name, + dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), + dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), + lock->ml.type, lock->ml.convert_type); + status = DLM_NORMAL; + goto bail; + } + res->state |= DLM_LOCK_RES_IN_PROGRESS; /* move lock to local convert queue */ /* do not alter lock refcount. switching lists. */ diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 42f0cae93a0a..4a338803e7e9 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2064,7 +2064,6 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, dlm_lock_get(lock); if (lock->convert_pending) { /* move converting lock back to granted */ - BUG_ON(i != DLM_CONVERTING_LIST); mlog(0, "node died with convert pending " "on %.*s. move back to granted list.\n", res->lockname.len, res->lockname.name); From 5dc7e939b6b8ca3d18d5814504954014578703e2 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 25 Mar 2016 14:21:50 -0700 Subject: [PATCH 299/797] mm/page_alloc: prevent merging between isolated and other pageblocks commit d9dddbf556674bf125ecd925b24e43a5cf2a568a upstream. Hanjun Guo has reported that a CMA stress test causes broken accounting of CMA and free pages: > Before the test, I got: > -bash-4.3# cat /proc/meminfo | grep Cma > CmaTotal: 204800 kB > CmaFree: 195044 kB > > > After running the test: > -bash-4.3# cat /proc/meminfo | grep Cma > CmaTotal: 204800 kB > CmaFree: 6602584 kB > > So the freed CMA memory is more than total.. > > Also the the MemFree is more than mem total: > > -bash-4.3# cat /proc/meminfo > MemTotal: 16342016 kB > MemFree: 22367268 kB > MemAvailable: 22370528 kB Laura Abbott has confirmed the issue and suspected the freepage accounting rewrite around 3.18/4.0 by Joonsoo Kim. Joonsoo had a theory that this is caused by unexpected merging between MIGRATE_ISOLATE and MIGRATE_CMA pageblocks: > CMA isolates MAX_ORDER aligned blocks, but, during the process, > partialy isolated block exists. If MAX_ORDER is 11 and > pageblock_order is 9, two pageblocks make up MAX_ORDER > aligned block and I can think following scenario because pageblock > (un)isolation would be done one by one. > > (each character means one pageblock. 'C', 'I' means MIGRATE_CMA, > MIGRATE_ISOLATE, respectively. > > CC -> IC -> II (Isolation) > II -> CI -> CC (Un-isolation) > > If some pages are freed at this intermediate state such as IC or CI, > that page could be merged to the other page that is resident on > different type of pageblock and it will cause wrong freepage count. This was supposed to be prevented by CMA operating on MAX_ORDER blocks, but since it doesn't hold the zone->lock between pageblocks, a race window does exist. It's also likely that unexpected merging can occur between MIGRATE_ISOLATE and non-CMA pageblocks. This should be prevented in __free_one_page() since commit 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock"). However, we only check the migratetype of the pageblock where buddy merging has been initiated, not the migratetype of the buddy pageblock (or group of pageblocks) which can be MIGRATE_ISOLATE. Joonsoo has suggested checking for buddy migratetype as part of page_is_buddy(), but that would add extra checks in allocator hotpath and bloat-o-meter has shown significant code bloat (the function is inline). This patch reduces the bloat at some expense of more complicated code. The buddy-merging while-loop in __free_one_page() is initially bounded to pageblock_border and without any migratetype checks. The checks are placed outside, bumping the max_order if merging is allowed, and returning to the while-loop with a statement which can't be possibly considered harmful. This fixes the accounting bug and also removes the arguably weird state in the original commit 3c605096d315 where buddies could be left unmerged. Fixes: 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock") Link: https://lkml.org/lkml/2016/3/2/280 Signed-off-by: Vlastimil Babka Reported-by: Hanjun Guo Tested-by: Hanjun Guo Acked-by: Joonsoo Kim Debugged-by: Laura Abbott Debugged-by: Joonsoo Kim Cc: Mel Gorman Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Minchan Kim Cc: Yasuaki Ishimatsu Cc: Zhang Yanfei Cc: Michal Nazarewicz Cc: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 46 +++++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9d666df5ef95..c69531afbd8f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -662,34 +662,28 @@ static inline void __free_one_page(struct page *page, unsigned long combined_idx; unsigned long uninitialized_var(buddy_idx); struct page *buddy; - unsigned int max_order = MAX_ORDER; + unsigned int max_order; + + max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); VM_BUG_ON(!zone_is_initialized(zone)); VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); VM_BUG_ON(migratetype == -1); - if (is_migrate_isolate(migratetype)) { - /* - * We restrict max order of merging to prevent merge - * between freepages on isolate pageblock and normal - * pageblock. Without this, pageblock isolation - * could cause incorrect freepage accounting. - */ - max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); - } else { + if (likely(!is_migrate_isolate(migratetype))) __mod_zone_freepage_state(zone, 1 << order, migratetype); - } - page_idx = pfn & ((1 << max_order) - 1); + page_idx = pfn & ((1 << MAX_ORDER) - 1); VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); VM_BUG_ON_PAGE(bad_range(zone, page), page); +continue_merging: while (order < max_order - 1) { buddy_idx = __find_buddy_index(page_idx, order); buddy = page + (buddy_idx - page_idx); if (!page_is_buddy(page, buddy, order)) - break; + goto done_merging; /* * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page, * merge with it and move up one order. @@ -706,6 +700,32 @@ static inline void __free_one_page(struct page *page, page_idx = combined_idx; order++; } + if (max_order < MAX_ORDER) { + /* If we are here, it means order is >= pageblock_order. + * We want to prevent merge between freepages on isolate + * pageblock and normal pageblock. Without this, pageblock + * isolation could cause incorrect freepage or CMA accounting. + * + * We don't want to hit this code for the more frequent + * low-order merging. + */ + if (unlikely(has_isolate_pageblock(zone))) { + int buddy_mt; + + buddy_idx = __find_buddy_index(page_idx, order); + buddy = page + (buddy_idx - page_idx); + buddy_mt = get_pageblock_migratetype(buddy); + + if (migratetype != buddy_mt + && (is_migrate_isolate(migratetype) || + is_migrate_isolate(buddy_mt))) + goto done_merging; + } + max_order++; + goto continue_merging; + } + +done_merging: set_page_order(page, order); /* From ded1db97aca15db72850c78ad222625386af4ea4 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sat, 20 Feb 2016 22:27:48 +0200 Subject: [PATCH 300/797] mtd: onenand: fix deadlock in onenand_block_markbad commit 5e64c29e98bfbba1b527b0a164f9493f3db9e8cb upstream. Commit 5942ddbc500d ("mtd: introduce mtd_block_markbad interface") incorrectly changed onenand_block_markbad() to call mtd_block_markbad instead of onenand_chip's block_markbad function. As a result the function will now recurse and deadlock. Fix by reverting the change. Fixes: 5942ddbc500d ("mtd: introduce mtd_block_markbad interface") Signed-off-by: Aaro Koskinen Acked-by: Artem Bityutskiy Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/onenand/onenand_base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index 43b3392ffee7..652d01832873 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -2599,6 +2599,7 @@ static int onenand_default_block_markbad(struct mtd_info *mtd, loff_t ofs) */ static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs) { + struct onenand_chip *this = mtd->priv; int ret; ret = onenand_block_isbad(mtd, ofs); @@ -2610,7 +2611,7 @@ static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs) } onenand_get_device(mtd, FL_WRITING); - ret = mtd_block_markbad(mtd, ofs); + ret = this->block_markbad(mtd, ofs); onenand_release_device(mtd); return ret; } From 79d05ce04bfbe9885936ed985c2dd53d8500f617 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 13 Mar 2016 00:33:48 -0500 Subject: [PATCH 301/797] intel_idle: prevent SKL-H boot failure when C8+C9+C10 enabled commit d70e28f57e14a481977436695b0c9ba165472431 upstream. Some SKL-H configurations require "intel_idle.max_cstate=7" to boot. While that is an effective workaround, it disables C10. This patch detects the problematic configuration, and disables C8 and C9, keeping C10 enabled. Note that enabling SGX in BIOS SETUP can also prevent this issue, if the system BIOS provides that option. https://bugzilla.kernel.org/show_bug.cgi?id=109081 "Freezes with Intel i7 6700HQ (Skylake), unless intel_idle.max_cstate=7" Signed-off-by: Len Brown Signed-off-by: Greg Kroah-Hartman --- drivers/idle/intel_idle.c | 112 ++++++++++++++++++++++++++++++-------- 1 file changed, 88 insertions(+), 24 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index cd4510a63375..146eed70bdf4 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -65,7 +65,7 @@ #include #include -#define INTEL_IDLE_VERSION "0.4" +#define INTEL_IDLE_VERSION "0.4.1" #define PREFIX "intel_idle: " static struct cpuidle_driver intel_idle_driver = { @@ -993,37 +993,93 @@ static void intel_idle_cpuidle_devices_uninit(void) return; } +/* + * ivt_idle_state_table_update(void) + * + * Tune IVT multi-socket targets + * Assumption: num_sockets == (max_package_num + 1) + */ +static void ivt_idle_state_table_update(void) +{ + /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ + int cpu, package_num, num_sockets = 1; + + for_each_online_cpu(cpu) { + package_num = topology_physical_package_id(cpu); + if (package_num + 1 > num_sockets) { + num_sockets = package_num + 1; + + if (num_sockets > 4) { + cpuidle_state_table = ivt_cstates_8s; + return; + } + } + } + + if (num_sockets > 2) + cpuidle_state_table = ivt_cstates_4s; + + /* else, 1 and 2 socket systems use default ivt_cstates */ +} +/* + * sklh_idle_state_table_update(void) + * + * On SKL-H (model 0x5e) disable C8 and C9 if: + * C10 is enabled and SGX disabled + */ +static void sklh_idle_state_table_update(void) +{ + unsigned long long msr; + unsigned int eax, ebx, ecx, edx; + + + /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ + if (max_cstate <= 7) + return; + + /* if PC10 not present in CPUID.MWAIT.EDX */ + if ((mwait_substates & (0xF << 28)) == 0) + return; + + rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr); + + /* PC10 is not enabled in PKG C-state limit */ + if ((msr & 0xF) != 8) + return; + + ecx = 0; + cpuid(7, &eax, &ebx, &ecx, &edx); + + /* if SGX is present */ + if (ebx & (1 << 2)) { + + rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); + + /* if SGX is enabled */ + if (msr & (1 << 18)) + return; + } + + skl_cstates[5].disabled = 1; /* C8-SKL */ + skl_cstates[6].disabled = 1; /* C9-SKL */ +} /* * intel_idle_state_table_update() * * Update the default state_table for this CPU-id - * - * Currently used to access tuned IVT multi-socket targets - * Assumption: num_sockets == (max_package_num + 1) */ -void intel_idle_state_table_update(void) + +static void intel_idle_state_table_update(void) { - /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ - if (boot_cpu_data.x86_model == 0x3e) { /* IVT */ - int cpu, package_num, num_sockets = 1; + switch (boot_cpu_data.x86_model) { - for_each_online_cpu(cpu) { - package_num = topology_physical_package_id(cpu); - if (package_num + 1 > num_sockets) { - num_sockets = package_num + 1; - - if (num_sockets > 4) { - cpuidle_state_table = ivt_cstates_8s; - return; - } - } - } - - if (num_sockets > 2) - cpuidle_state_table = ivt_cstates_4s; - /* else, 1 and 2 socket systems use default ivt_cstates */ + case 0x3e: /* IVT */ + ivt_idle_state_table_update(); + break; + case 0x5e: /* SKL-H */ + sklh_idle_state_table_update(); + break; } - return; } /* @@ -1063,6 +1119,14 @@ static int __init intel_idle_cpuidle_driver_init(void) if (num_substates == 0) continue; + /* if state marked as disabled, skip it */ + if (cpuidle_state_table[cstate].disabled != 0) { + pr_debug(PREFIX "state %s is disabled", + cpuidle_state_table[cstate].name); + continue; + } + + if (((mwait_cstate + 1) > 2) && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halts in idle" From 4cd4ebbdf533ed316ce377b66ae508cc6d1d0162 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 23 Mar 2016 00:11:20 +0100 Subject: [PATCH 302/797] PM / sleep: Clear pm_suspend_global_flags upon hibernate commit 276142730c39c9839465a36a90e5674a8c34e839 upstream. When suspending to RAM, waking up and later suspending to disk, we gratuitously runtime resume devices after the thaw phase. This does not occur if we always suspend to RAM or always to disk. pm_complete_with_resume_check(), which gets called from pci_pm_complete() among others, schedules a runtime resume if PM_SUSPEND_FLAG_FW_RESUME is set. The flag is set during a suspend-to-RAM cycle. It is cleared at the beginning of the suspend-to-RAM cycle but not afterwards and it is not cleared during a suspend-to-disk cycle at all. Fix it. Fixes: ef25ba047601 (PM / sleep: Add flags to indicate platform firmware involvement) Signed-off-by: Lukas Wunner Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/hibernate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index b7342a24f559..b7dd5718836e 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -339,6 +339,7 @@ int hibernation_snapshot(int platform_mode) pm_message_t msg; int error; + pm_suspend_clear_flags(); error = platform_begin(platform_mode); if (error) goto Close; From 9835db39bb8151cf4471a792a878318c8d07bf4d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 18 Mar 2016 14:55:38 +0100 Subject: [PATCH 303/797] scsi_common: do not clobber fixed sense information commit ba08311647892cc7912de74525fd78416caf544a upstream. For fixed sense the information field is 32 bits, to we need to truncate the information field to avoid clobbering the sense code. Fixes: a1524f226a02 ("libata-eh: Set 'information' field for autosense") Signed-off-by: Hannes Reinecke Reviewed-by: Lee Duncan Reviewed-by: Bart Van Assche Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_common.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c index c126966130ab..ce79de822e46 100644 --- a/drivers/scsi/scsi_common.c +++ b/drivers/scsi/scsi_common.c @@ -278,8 +278,16 @@ int scsi_set_sense_information(u8 *buf, int buf_len, u64 info) ucp[3] = 0; put_unaligned_be64(info, &ucp[4]); } else if ((buf[0] & 0x7f) == 0x70) { - buf[0] |= 0x80; - put_unaligned_be64(info, &buf[3]); + /* + * Only set the 'VALID' bit if we can represent the value + * correctly; otherwise just fill out the lower bytes and + * clear the 'VALID' flag. + */ + if (info <= 0xffffffffUL) + buf[0] |= 0x80; + else + buf[0] &= 0x7f; + put_unaligned_be32((u32)info, &buf[3]); } return 0; From 2a8225ef46968444fb1c4c632ec28e4cc2be633f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 4 Mar 2016 15:59:42 +0100 Subject: [PATCH 304/797] sched/cputime: Fix steal time accounting vs. CPU hotplug commit e9532e69b8d1d1284e8ecf8d2586de34aec61244 upstream. On CPU hotplug the steal time accounting can keep a stale rq->prev_steal_time value over CPU down and up. So after the CPU comes up again the delta calculation in steal_account_process_tick() wreckages itself due to the unsigned math: u64 steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; So if steal is smaller than rq->prev_steal_time we end up with an insane large value which then gets added to rq->prev_steal_time, resulting in a permanent wreckage of the accounting. As a consequence the per CPU stats in /proc/stat become stale. Nice trick to tell the world how idle the system is (100%) while the CPU is 100% busy running tasks. Though we prefer realistic numbers. None of the accounting values which use a previous value to account for fractions is reset at CPU hotplug time. update_rq_clock_task() has a sanity check for prev_irq_time and prev_steal_time_rq, but that sanity check solely deals with clock warps and limits the /proc/stat visible wreckage. The prev_time values are still wrong. Solution is simple: Reset rq->prev_*_time when the CPU is plugged in again. Signed-off-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Frederic Weisbecker Cc: Glauber Costa Cc: Linus Torvalds Cc: Peter Zijlstra Fixes: commit 095c0aa83e52 "sched: adjust scheduler cpu power for stolen time" Fixes: commit aa483808516c "sched: Remove irq time from available CPU power" Fixes: commit e6e6685accfa "KVM guest: Steal time accounting" Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1603041539490.3686@nanos Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 1 + kernel/sched/sched.h | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index eb70592f03f6..70e5e09341f1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5525,6 +5525,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_UP_PREPARE: rq->calc_load_update = calc_load_update; + account_reset_rq(rq); break; case CPU_ONLINE: diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b242775bf670..0517abd7dd73 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1770,3 +1770,16 @@ static inline u64 irq_time_read(int cpu) } #endif /* CONFIG_64BIT */ #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ + +static inline void account_reset_rq(struct rq *rq) +{ +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + rq->prev_irq_time = 0; +#endif +#ifdef CONFIG_PARAVIRT + rq->prev_steal_time = 0; +#endif +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + rq->prev_steal_time_rq = 0; +#endif +} From d2b56a0758ead5987db9465e120643ede759f3ad Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 3 Mar 2016 20:50:41 +0100 Subject: [PATCH 305/797] perf/x86/pebs: Add workaround for broken OVFL status on HSW+ commit 8077eca079a212f26419c57226f28696b7100683 upstream. This patch fixes an issue with the GLOBAL_OVERFLOW_STATUS bits on Haswell, Broadwell and Skylake processors when using PEBS. The SDM stipulates that when the PEBS iterrupt threshold is crossed, an interrupt is posted and the kernel is interrupted. The kernel will find GLOBAL_OVF_SATUS bit 62 set indicating there are PEBS records to drain. But the bits corresponding to the actual counters should NOT be set. The kernel follows the SDM and assumes that all PEBS events are processed in the drain_pebs() callback. The kernel then checks for remaining overflows on any other (non-PEBS) events and processes these in the for_each_bit_set(&status) loop. As it turns out, under certain conditions on HSW and later processors, on PEBS buffer interrupt, bit 62 is set but the counter bits may be set as well. In that case, the kernel drains PEBS and generates SAMPLES with the EXACT tag, then it processes the counter bits, and generates normal (non-EXACT) SAMPLES. I ran into this problem by trying to understand why on HSW sampling on a PEBS event was sometimes returning SAMPLES without the EXACT tag. This should not happen on user level code because HSW has the eventing_ip which always point to the instruction that caused the event. The workaround in this patch simply ensures that the bits for the counters used for PEBS events are cleared after the PEBS buffer has been drained. With this fix 100% of the PEBS samples on my user code report the EXACT tag. Before: $ perf record -e cpu/event=0xd0,umask=0x81/upp ./multichase $ perf report -D | fgrep SAMPLES PERF_RECORD_SAMPLE(IP, 0x2): 11775/11775: 0x406de5 period: 73469 addr: 0 exact=Y \--- EXACT tag is missing After: $ perf record -e cpu/event=0xd0,umask=0x81/upp ./multichase $ perf report -D | fgrep SAMPLES PERF_RECORD_SAMPLE(IP, 0x4002): 11775/11775: 0x406de5 period: 73469 addr: 0 exact=Y \--- EXACT tag is set The problem tends to appear more often when multiple PEBS events are used. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: namhyung@kernel.org Link: http://lkml.kernel.org/r/1457034642-21837-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index e2a430021e46..ad6768f97b08 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1840,6 +1840,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) if (__test_and_clear_bit(62, (unsigned long *)&status)) { handled++; x86_pmu.drain_pebs(regs); + /* + * There are cases where, even though, the PEBS ovfl bit is set + * in GLOBAL_OVF_STATUS, the PEBS events may also have their + * overflow bits set for their counters. We must clear them + * here because they have been processed as exact samples in + * the drain_pebs() routine. They must not be processed again + * in the for_each_bit_set() loop for regular samples below. + */ + status &= ~cpuc->pebs_enabled; + status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; } /* From 886629ebb2acaeafc3102140c1c8b4ad52792484 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 3 Mar 2016 18:07:28 -0500 Subject: [PATCH 306/797] perf/x86/intel: Fix PEBS warning by only restoring active PMU in pmi commit c3d266c8a9838cc141b69548bc3b1b18808ae8c4 upstream. This patch tries to fix a PEBS warning found in my stress test. The following perf command can easily trigger the pebs warning or spurious NMI error on Skylake/Broadwell/Haswell platforms: sudo perf record -e 'cpu/umask=0x04,event=0xc4/pp,cycles,branches,ref-cycles,cache-misses,cache-references' --call-graph fp -b -c1000 -a Also the NMI watchdog must be enabled. For this case, the events number is larger than counter number. So perf has to do multiplexing. In perf_mux_hrtimer_handler, it does perf_pmu_disable(), schedule out old events, rotate_ctx, schedule in new events and finally perf_pmu_enable(). If the old events include precise event, the MSR_IA32_PEBS_ENABLE should be cleared when perf_pmu_disable(). The MSR_IA32_PEBS_ENABLE should keep 0 until the perf_pmu_enable() is called and the new event is precise event. However, there is a corner case which could restore PEBS_ENABLE to stale value during the above period. In perf_pmu_disable(), GLOBAL_CTRL will be set to 0 to stop overflow and followed PMI. But there may be pending PMI from an earlier overflow, which cannot be stopped. So even GLOBAL_CTRL is cleared, the kernel still be possible to get PMI. At the end of the PMI handler, __intel_pmu_enable_all() will be called, which will restore the stale values if old events haven't scheduled out. Once the stale pebs value is set, it's impossible to be corrected if the new events are non-precise. Because the pebs_enabled will be set to 0. x86_pmu.enable_all() will ignore the MSR_IA32_PEBS_ENABLE setting. As a result, the following NMI with stale PEBS_ENABLE trigger pebs warning. The pending PMI after enabled=0 will become harmless if the NMI handler does not change the state. This patch checks cpuc->enabled in pmi and only restore the state when PMU is active. Here is the dump: Call Trace: [] dump_stack+0x63/0x85 [] warn_slowpath_common+0x82/0xc0 [] warn_slowpath_null+0x1a/0x20 [] intel_pmu_drain_pebs_nhm+0x2be/0x320 [] intel_pmu_handle_irq+0x279/0x460 [] ? native_write_msr_safe+0x6/0x40 [] ? vunmap_page_range+0x20d/0x330 [] ? unmap_kernel_range_noflush+0x11/0x20 [] ? ghes_copy_tofrom_phys+0x10f/0x2a0 [] ? ghes_read_estatus+0x98/0x170 [] perf_event_nmi_handler+0x2d/0x50 [] nmi_handle+0x69/0x120 [] default_do_nmi+0xe6/0x100 [] do_nmi+0xe2/0x130 [] end_repeat_nmi+0x1a/0x1e [] ? native_write_msr_safe+0x6/0x40 [] ? native_write_msr_safe+0x6/0x40 [] ? native_write_msr_safe+0x6/0x40 <> [] ? x86_perf_event_set_period+0xd8/0x180 [] x86_pmu_start+0x4c/0x100 [] x86_pmu_enable+0x28d/0x300 [] perf_pmu_enable.part.81+0x7/0x10 [] perf_mux_hrtimer_handler+0x200/0x280 [] ? __perf_install_in_context+0xc0/0xc0 [] __hrtimer_run_queues+0xfd/0x280 [] hrtimer_interrupt+0xa8/0x190 [] ? __perf_read_group_add.part.61+0x1a0/0x1a0 [] local_apic_timer_interrupt+0x38/0x60 [] smp_apic_timer_interrupt+0x3d/0x50 [] apic_timer_interrupt+0x8c/0xa0 [] ? __perf_read_group_add.part.61+0x1a0/0x1a0 [] ? smp_call_function_single+0xd5/0x130 [] ? smp_call_function_single+0xcb/0x130 [] ? __perf_read_group_add.part.61+0x1a0/0x1a0 [] event_function_call+0x10a/0x120 [] ? ctx_resched+0x90/0x90 [] ? cpu_clock_event_read+0x30/0x30 [] ? _perf_event_disable+0x60/0x60 [] _perf_event_enable+0x5b/0x70 [] perf_event_for_each_child+0x38/0xa0 [] ? _perf_event_disable+0x60/0x60 [] perf_ioctl+0x12d/0x3c0 [] ? selinux_file_ioctl+0x95/0x1e0 [] do_vfs_ioctl+0xa1/0x5a0 [] ? sched_clock+0x9/0x10 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 ---[ end trace aef202839fe9a71d ]--- Uhhuh. NMI received for unknown reason 2d on CPU 2. Do you have a strange power saving mode enabled? Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1457046448-6184-1-git-send-email-kan.liang@intel.com [ Fixed various typos and other small details. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event.c | 13 +++++++++++++ arch/x86/kernel/cpu/perf_event_intel.c | 15 +++++++++++++-- arch/x86/kernel/cpu/perf_event_knc.c | 4 +++- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bf79d7c97df..a3aeb2cc361e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -593,6 +593,19 @@ void x86_pmu_disable_all(void) } } +/* + * There may be PMI landing after enabled=0. The PMI hitting could be before or + * after disable_all. + * + * If PMI hits before disable_all, the PMU will be disabled in the NMI handler. + * It will not be re-enabled in the NMI handler again, because enabled=0. After + * handling the NMI, disable_all will be called, which will not change the + * state either. If PMI hits after disable_all, the PMU is already disabled + * before entering NMI handler. The NMI handler will not change the state + * either. + * + * So either situation is harmless. + */ static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ad6768f97b08..98a01fd16c8c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1458,7 +1458,15 @@ static __initconst const u64 slm_hw_cache_event_ids }; /* - * Use from PMIs where the LBRs are already disabled. + * Used from PMIs where the LBRs are already disabled. + * + * This function could be called consecutively. It is required to remain in + * disabled state if called consecutively. + * + * During consecutive calls, the same disable value will be written to related + * registers, so the PMU state remains unchanged. hw.state in + * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive + * calls. */ static void __intel_pmu_disable_all(void) { @@ -1895,7 +1903,10 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) goto again; done: - __intel_pmu_enable_all(0, true); + /* Only restore PMU state when it's active. See x86_pmu_disable(). */ + if (cpuc->enabled) + __intel_pmu_enable_all(0, true); + /* * Only unmask the NMI after the overflow counters * have been reset. This avoids spurious NMIs on diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c index 5b0c232d1ee6..b931095e86d4 100644 --- a/arch/x86/kernel/cpu/perf_event_knc.c +++ b/arch/x86/kernel/cpu/perf_event_knc.c @@ -263,7 +263,9 @@ static int knc_pmu_handle_irq(struct pt_regs *regs) goto again; done: - knc_pmu_enable_all(0); + /* Only restore PMU state when it's active. See x86_pmu_disable(). */ + if (cpuc->enabled) + knc_pmu_enable_all(0); return handled; } From a54af124cd73a1429ad5a9d16ab878c71e367bf8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 1 Mar 2016 20:03:52 +0100 Subject: [PATCH 307/797] perf/x86/intel: Use PAGE_SIZE for PEBS buffer size on Core2 commit e72daf3f4d764c47fb71c9bdc7f9c54a503825b1 upstream. Using PAGE_SIZE buffers makes the WRMSR to PERF_GLOBAL_CTRL in intel_pmu_enable_all() mysteriously hang on Core2. As a workaround, we don't do this. The hard lockup is easily triggered by running 'perf test attr' repeatedly. Most of the time it gets stuck on sample session with small periods. # perf test attr -vv 14: struct perf_event_attr setup : --- start --- ... 'PERF_TEST_ATTR=/tmp/tmpuEKz3B /usr/bin/perf record -o /tmp/tmpuEKz3B/perf.data -c 123 kill >/dev/null 2>&1' ret 1 Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Link: http://lkml.kernel.org/r/20160301190352.GA8355@krava.redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel_ds.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index d0e35ebb2adb..0d7bc499d3eb 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -591,6 +591,7 @@ struct x86_pmu { pebs_active :1, pebs_broken :1; int pebs_record_size; + int pebs_buffer_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 5db1c7755548..4bf080b4f744 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -269,7 +269,7 @@ static int alloc_pebs_buffer(int cpu) if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node); + buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); if (unlikely(!buffer)) return -ENOMEM; @@ -286,7 +286,7 @@ static int alloc_pebs_buffer(int cpu) per_cpu(insn_buffer, cpu) = ibuffer; } - max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; + max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; ds->pebs_buffer_base = (u64)(unsigned long)buffer; ds->pebs_index = ds->pebs_buffer_base; @@ -1296,6 +1296,7 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; int format = x86_pmu.intel_cap.pebs_format; @@ -1304,6 +1305,14 @@ void __init intel_ds_init(void) case 0: printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); + /* + * Using >PAGE_SIZE buffers makes the WRMSR to + * PERF_GLOBAL_CTRL in intel_pmu_enable_all() + * mysteriously hang on Core2. + * + * As a workaround, we don't do this. + */ + x86_pmu.pebs_buffer_size = PAGE_SIZE; x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; break; From 4b3d06d989b9535bb04f9339a862d06e3311522a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 1 Mar 2016 14:25:24 -0800 Subject: [PATCH 308/797] perf/x86/intel: Fix PEBS data source interpretation on Nehalem/Westmere commit e17dc65328057c00db7e1bfea249c8771a78b30b upstream. Jiri reported some time ago that some entries in the PEBS data source table in perf do not agree with the SDM. We investigated and the bits changed for Sandy Bridge, but the SDM was not updated. perf already implements the bits correctly for Sandy Bridge and later. This patch patches it up for Nehalem and Westmere. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/1456871124-15985-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event.h | 2 ++ arch/x86/kernel/cpu/perf_event_intel.c | 2 ++ arch/x86/kernel/cpu/perf_event_intel_ds.c | 11 ++++++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 0d7bc499d3eb..ee70445fbb1f 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -908,6 +908,8 @@ void intel_pmu_lbr_init_hsw(void); void intel_pmu_lbr_init_skl(void); +void intel_pmu_pebs_data_source_nhm(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 98a01fd16c8c..078de2e86b7a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -3336,6 +3336,7 @@ __init int intel_pmu_init(void) intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); + intel_pmu_pebs_data_source_nhm(); x86_add_quirk(intel_nehalem_quirk); pr_cont("Nehalem events, "); @@ -3398,6 +3399,7 @@ __init int intel_pmu_init(void) intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); + intel_pmu_pebs_data_source_nhm(); pr_cont("Westmere events, "); break; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 4bf080b4f744..7abb2b88572e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -51,7 +51,8 @@ union intel_x86_pebs_dse { #define OP_LH (P(OP, LOAD) | P(LVL, HIT)) #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) -static const u64 pebs_data_source[] = { +/* Version for Sandy Bridge and later */ +static u64 pebs_data_source[] = { P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */ OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ @@ -70,6 +71,14 @@ static const u64 pebs_data_source[] = { OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ }; +/* Patch up minor differences in the bits */ +void __init intel_pmu_pebs_data_source_nhm(void) +{ + pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT); + pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM); + pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM); +} + static u64 precise_store_data(u64 status) { union intel_x86_pebs_dse dse; From b40108b826ed9e1c558f73b9dbabb8d80ded268b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 12 Apr 2016 09:09:26 -0700 Subject: [PATCH 309/797] Linux 4.4.7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 87d12b44ab66..5a493e785aca 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 6 +SUBLEVEL = 7 EXTRAVERSION = NAME = Blurry Fish Butt From 63c22e8fe29efe1d03980d5cf933c4d7b9a72d09 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 26 Mar 2016 12:28:05 -0700 Subject: [PATCH 310/797] hwmon: (max1111) Return -ENODEV from max1111_read_channel if not instantiated commit 3c2e2266a5bd2d1cef258e6e54dca1d99946379f upstream. arm:pxa_defconfig can result in the following crash if the max1111 driver is not instantiated. Unhandled fault: page domain fault (0x01b) at 0x00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: : 1b [#1] PREEMPT ARM Modules linked in: CPU: 0 PID: 300 Comm: kworker/0:1 Not tainted 4.5.0-01301-g1701f680407c #10 Hardware name: SHARP Akita Workqueue: events sharpsl_charge_toggle task: c390a000 ti: c391e000 task.ti: c391e000 PC is at max1111_read_channel+0x20/0x30 LR is at sharpsl_pm_pxa_read_max1111+0x2c/0x3c pc : [] lr : [] psr: 20000013 ... [] (max1111_read_channel) from [] (sharpsl_pm_pxa_read_max1111+0x2c/0x3c) [] (sharpsl_pm_pxa_read_max1111) from [] (spitzpm_read_devdata+0x5c/0xc4) [] (spitzpm_read_devdata) from [] (sharpsl_check_battery_temp+0x78/0x110) [] (sharpsl_check_battery_temp) from [] (sharpsl_charge_toggle+0x48/0x110) [] (sharpsl_charge_toggle) from [] (process_one_work+0x14c/0x48c) [] (process_one_work) from [] (worker_thread+0x3c/0x5d4) [] (worker_thread) from [] (kthread+0xd0/0xec) [] (kthread) from [] (ret_from_fork+0x14/0x24) This can occur because the SPI controller driver (SPI_PXA2XX) is built as module and thus not necessarily loaded. While building SPI_PXA2XX into the kernel would make the problem disappear, it appears prudent to ensure that the driver is instantiated before accessing its data structures. Cc: Arnd Bergmann Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/max1111.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c index 36544c4f653c..303d0c9df907 100644 --- a/drivers/hwmon/max1111.c +++ b/drivers/hwmon/max1111.c @@ -85,6 +85,9 @@ static struct max1111_data *the_max1111; int max1111_read_channel(int channel) { + if (!the_max1111 || !the_max1111->spi) + return -ENODEV; + return max1111_read(&the_max1111->spi->dev, channel); } EXPORT_SYMBOL(max1111_read_channel); @@ -258,6 +261,9 @@ static int max1111_remove(struct spi_device *spi) { struct max1111_data *data = spi_get_drvdata(spi); +#ifdef CONFIG_SHARPSL_PM + the_max1111 = NULL; +#endif hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&spi->dev.kobj, &max1110_attr_group); sysfs_remove_group(&spi->dev.kobj, &max1111_attr_group); From 19c1764a19cdb41afebc2e66d7a75a7064c0000f Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Sun, 20 Mar 2016 23:23:46 +0100 Subject: [PATCH 311/797] PKCS#7: pkcs7_validate_trust(): initialize the _trusted output argument commit e54358915d0a00399c11c2c23ae1be674cba188a upstream. Despite what the DocBook comment to pkcs7_validate_trust() says, the *_trusted argument is never set to false. pkcs7_validate_trust() only positively sets *_trusted upon encountering a trusted PKCS#7 SignedInfo block. This is quite unfortunate since its callers, system_verify_data() for example, depend on pkcs7_validate_trust() clearing *_trusted on non-trust. Indeed, UBSAN splats when attempting to load the uninitialized local variable 'trusted' from system_verify_data() in pkcs7_validate_trust(): UBSAN: Undefined behaviour in crypto/asymmetric_keys/pkcs7_trust.c:194:14 load of value 82 is not a valid value for type '_Bool' [...] Call Trace: [] dump_stack+0xbc/0x117 [] ? _atomic_dec_and_lock+0x169/0x169 [] ubsan_epilogue+0xd/0x4e [] __ubsan_handle_load_invalid_value+0x111/0x158 [] ? val_to_string.constprop.12+0xcf/0xcf [] ? x509_request_asymmetric_key+0x114/0x370 [] ? kfree+0x220/0x370 [] ? public_key_verify_signature_2+0x32/0x50 [] pkcs7_validate_trust+0x524/0x5f0 [] system_verify_data+0xca/0x170 [] ? top_trace_array+0x9b/0x9b [] ? __vfs_read+0x279/0x3d0 [] mod_verify_sig+0x1ff/0x290 [...] The implication is that pkcs7_validate_trust() effectively grants trust when it really shouldn't have. Fix this by explicitly setting *_trusted to false at the very beginning of pkcs7_validate_trust(). Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/pkcs7_trust.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c index 90d6d47965b0..ecdb5a2ce085 100644 --- a/crypto/asymmetric_keys/pkcs7_trust.c +++ b/crypto/asymmetric_keys/pkcs7_trust.c @@ -178,6 +178,8 @@ int pkcs7_validate_trust(struct pkcs7_message *pkcs7, int cached_ret = -ENOKEY; int ret; + *_trusted = false; + for (p = pkcs7->certs; p; p = p->next) p->seen = false; From 7cdf5d71b408f110657f2f441f7d37c2ebde2839 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 8 Apr 2016 18:11:33 +0200 Subject: [PATCH 312/797] parisc: Avoid function pointers for kernel exception routines commit e3893027a300927049efc1572f852201eb785142 upstream. We want to avoid the kernel module loader to create function pointers for the kernel fixup routines of get_user() and put_user(). Changing the external reference from function type to int type fixes this. This unbreaks exception handling for get_user() and put_user() when called from a kernel module. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/parisc_ksyms.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 568b2c61ea02..3cad8aadc69e 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -47,11 +47,11 @@ EXPORT_SYMBOL(__cmpxchg_u64); EXPORT_SYMBOL(lclear_user); EXPORT_SYMBOL(lstrnlen_user); -/* Global fixups */ -extern void fixup_get_user_skip_1(void); -extern void fixup_get_user_skip_2(void); -extern void fixup_put_user_skip_1(void); -extern void fixup_put_user_skip_2(void); +/* Global fixups - defined as int to avoid creation of function pointers */ +extern int fixup_get_user_skip_1; +extern int fixup_get_user_skip_2; +extern int fixup_put_user_skip_1; +extern int fixup_put_user_skip_2; EXPORT_SYMBOL(fixup_get_user_skip_1); EXPORT_SYMBOL(fixup_get_user_skip_2); EXPORT_SYMBOL(fixup_put_user_skip_1); From ec353a589df2b1bea7e001266e97e4fd4347e1bf Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 8 Apr 2016 18:18:48 +0200 Subject: [PATCH 313/797] parisc: Fix kernel crash with reversed copy_from_user() commit ef72f3110d8b19f4c098a0bff7ed7d11945e70c6 upstream. The kernel module testcase (lib/test_user_copy.c) exhibited a kernel crash on parisc if the parameters for copy_from_user were reversed ("illegal reversed copy_to_user" testcase). Fix this potential crash by checking the fault handler if the faulting address is in the exception table. Signed-off-by: Helge Deller Cc: Kees Cook Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/traps.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 553b09855cfd..77e2262c97f6 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -798,6 +798,9 @@ void notrace handle_interruption(int code, struct pt_regs *regs) if (fault_space == 0 && !faulthandler_disabled()) { + /* Clean up and return if in exception table. */ + if (fixup_exception(regs)) + return; pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); parisc_terminate("Kernel Fault", regs, code, fault_address); } From 4d6deebe06c5383dffe9cba859378ca708d4076d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 8 Apr 2016 18:32:52 +0200 Subject: [PATCH 314/797] parisc: Unbreak handling exceptions from kernel modules commit 2ef4dfd9d9f288943e249b78365a69e3ea3ec072 upstream. Handling exceptions from modules never worked on parisc. It was just masked by the fact that exceptions from modules don't happen during normal use. When a module triggers an exception in get_user() we need to load the main kernel dp value before accessing the exception_data structure, and afterwards restore the original dp value of the module on exit. Noticed-by: Mikulas Patocka Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/uaccess.h | 1 + arch/parisc/kernel/asm-offsets.c | 1 + arch/parisc/lib/fixup.S | 6 ++++++ arch/parisc/mm/fault.c | 1 + 4 files changed, 9 insertions(+) diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 0abdd4c607ed..1960b87c1c8b 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -76,6 +76,7 @@ struct exception_table_entry { */ struct exception_data { unsigned long fault_ip; + unsigned long fault_gp; unsigned long fault_space; unsigned long fault_addr; }; diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index d2f62570a7b1..78d30d2ea2d8 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -299,6 +299,7 @@ int main(void) #endif BLANK(); DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip)); + DEFINE(EXCDATA_GP, offsetof(struct exception_data, fault_gp)); DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space)); DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr)); BLANK(); diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S index 536ef66bb94b..1052b747e011 100644 --- a/arch/parisc/lib/fixup.S +++ b/arch/parisc/lib/fixup.S @@ -26,6 +26,7 @@ #ifdef CONFIG_SMP .macro get_fault_ip t1 t2 + loadgp addil LT%__per_cpu_offset,%r27 LDREG RT%__per_cpu_offset(%r1),\t1 /* t2 = smp_processor_id() */ @@ -40,14 +41,19 @@ LDREG RT%exception_data(%r1),\t1 /* t1 = this_cpu_ptr(&exception_data) */ add,l \t1,\t2,\t1 + /* %r27 = t1->fault_gp - restore gp */ + LDREG EXCDATA_GP(\t1), %r27 /* t1 = t1->fault_ip */ LDREG EXCDATA_IP(\t1), \t1 .endm #else .macro get_fault_ip t1 t2 + loadgp /* t1 = this_cpu_ptr(&exception_data) */ addil LT%exception_data,%r27 LDREG RT%exception_data(%r1),\t2 + /* %r27 = t2->fault_gp - restore gp */ + LDREG EXCDATA_GP(\t2), %r27 /* t1 = t2->fault_ip */ LDREG EXCDATA_IP(\t2), \t1 .endm diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index a762864ec92e..f9064449908a 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -151,6 +151,7 @@ int fixup_exception(struct pt_regs *regs) struct exception_data *d; d = this_cpu_ptr(&exception_data); d->fault_ip = regs->iaoq[0]; + d->fault_gp = regs->gr[27]; d->fault_space = regs->isr; d->fault_addr = regs->ior; From 57f21bd260958fbfda2ef819a7fa8a0054df0c68 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 1 Apr 2016 12:28:16 +0200 Subject: [PATCH 315/797] ALSA: timer: Use mod_timer() for rearming the system timer commit 4a07083ed613644c96c34a7dd2853dc5d7c70902 upstream. ALSA system timer backend stops the timer via del_timer() without sync and leaves del_timer_sync() at the close instead. This is because of the restriction by the design of ALSA timer: namely, the stop callback may be called from the timer handler, and calling the sync shall lead to a hangup. However, this also triggers a kernel BUG() when the timer is rearmed immediately after stopping without sync: kernel BUG at kernel/time/timer.c:966! Call Trace: [] snd_timer_s_start+0x13e/0x1a0 [] snd_timer_interrupt+0x504/0xec0 [] ? debug_check_no_locks_freed+0x290/0x290 [] snd_timer_s_function+0xb4/0x120 [] call_timer_fn+0x162/0x520 [] ? call_timer_fn+0xcd/0x520 [] ? snd_timer_interrupt+0xec0/0xec0 .... It's the place where add_timer() checks the pending timer. It's clear that this may happen after the immediate restart without sync in our cases. So, the workaround here is just to use mod_timer() instead of add_timer(). This looks like a band-aid fix, but it's a right move, as snd_timer_interrupt() takes care of the continuous rearm of timer. Reported-by: Jiri Slaby Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index f24c9fccf008..b982d1b089bd 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1051,8 +1051,8 @@ static int snd_timer_s_start(struct snd_timer * timer) njiff += timer->sticks - priv->correction; priv->correction = 0; } - priv->last_expires = priv->tlist.expires = njiff; - add_timer(&priv->tlist); + priv->last_expires = njiff; + mod_timer(&priv->tlist, njiff); return 0; } From 4568babcf7b22fcde8574892a349cb7759e45b0b Mon Sep 17 00:00:00 2001 From: Bobi Mihalca Date: Wed, 23 Mar 2016 13:23:55 +0200 Subject: [PATCH 316/797] ALSA: hda - Asus N750JV external subwoofer fixup commit 70cf2cbd685e218c3ffd105d9fb6cf0f8d767481 upstream. ASUS N750JV needs the same fixup as N550 for enabling its subwoofer. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=115181 Signed-off-by: Bobi Mihalca Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6968b796baa3..1b460c1f8678 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6643,6 +6643,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A), + SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), From 0d55f19ac372fa3cad40df3ccf8ce463012d6748 Mon Sep 17 00:00:00 2001 From: Bobi Mihalca Date: Wed, 23 Mar 2016 13:26:11 +0200 Subject: [PATCH 317/797] ALSA: hda - Fix white noise on Asus N750JV headphone commit 9d4dc5840f93bcb002fa311693349deae7702bc5 upstream. For reducing the noise from the headphone output on ASUS N750JV, call the existing fixup, alc_fixup_auto_mute_via_amp(), additionally. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=115181 Signed-off-by: Bobi Mihalca Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1b460c1f8678..ba4e5b921b7c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6379,6 +6379,7 @@ enum { ALC668_FIXUP_AUTO_MUTE, ALC668_FIXUP_DELL_DISABLE_AAMIX, ALC668_FIXUP_DELL_XPS13, + ALC662_FIXUP_ASUS_Nx50, }; static const struct hda_fixup alc662_fixups[] = { @@ -6619,6 +6620,12 @@ static const struct hda_fixup alc662_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_bass_chmap, }, + [ALC662_FIXUP_ASUS_Nx50] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_auto_mute_via_amp, + .chained = true, + .chain_id = ALC662_FIXUP_BASS_1A + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -6643,7 +6650,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A), - SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_BASS_1A), + SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), From 3495017eaaee88cd0e85742b710fee3bbc2b3d08 Mon Sep 17 00:00:00 2001 From: Bobi Mihalca Date: Wed, 23 Mar 2016 13:32:33 +0200 Subject: [PATCH 318/797] ALSA: hda - Apply fix for white noise on Asus N550JV, too commit 83a9efb5b8170b7cffef4f62656656e1d8ad2ccd upstream. Apply the new fixup that is used for ASUS N750JV to another similar model, N500JV, too, for reducing the headphone noise. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=115181 Signed-off-by: Bobi Mihalca Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ba4e5b921b7c..3671eb89dd28 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6648,7 +6648,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), - SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A), + SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), From fb4cfc6e0a465ccdeddd47567c42fbb197253aca Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Fri, 1 Apr 2016 14:31:20 -0700 Subject: [PATCH 319/797] mm: fix invalid node in alloc_migrate_target() commit 6f25a14a7053b69917e2ebea0d31dd444cd31fd5 upstream. It is incorrect to use next_node to find a target node, it will return MAX_NUMNODES or invalid node. This will lead to crash in buddy system allocation. Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage") Signed-off-by: Xishi Qiu Acked-by: Vlastimil Babka Acked-by: Naoya Horiguchi Cc: Joonsoo Kim Cc: David Rientjes Cc: "Laura Abbott" Cc: Hui Zhu Cc: Wang Xiaoqiang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_isolation.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 4568fd58f70a..00c96462cc36 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -283,11 +283,11 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private, * now as a simple work-around, we use the next node for destination. */ if (PageHuge(page)) { - nodemask_t src = nodemask_of_node(page_to_nid(page)); - nodemask_t dst; - nodes_complement(dst, src); + int node = next_online_node(page_to_nid(page)); + if (node == MAX_NUMNODES) + node = first_online_node; return alloc_huge_page_node(page_hstate(compound_head(page)), - next_node(page_to_nid(page), dst)); + node); } if (PageHighMem(page)) From d38ae1c2be13f28629ae08de79f7d511ab79d1ee Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Tue, 8 Mar 2016 10:03:56 +0100 Subject: [PATCH 320/797] powerpc/mm: Fixup preempt underflow with huge pages commit 08a5bb2921e490939f78f38fd0d02858bb709942 upstream. hugepd_free() used __get_cpu_var() once. Nothing ensured that the code accessing the variable did not migrate from one CPU to another and soon this was noticed by Tiejun Chen in 94b09d755462 ("powerpc/hugetlb: Replace __get_cpu_var with get_cpu_var"). So we had it fixed. Christoph Lameter was doing his __get_cpu_var() replaces and forgot PowerPC. Then he noticed this and sent his fixed up batch again which got applied as 69111bac42f5 ("powerpc: Replace __get_cpu_var uses"). The careful reader will noticed one little detail: get_cpu_var() got replaced with this_cpu_ptr(). So now we have a put_cpu_var() which does a preempt_enable() and nothing that does preempt_disable() so we underflow the preempt counter. Cc: Benjamin Herrenschmidt Cc: Christoph Lameter Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/hugetlbpage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 9833fee493ec..807f1594701d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -486,13 +486,13 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = this_cpu_ptr(&hugepd_freelist_cur); + batchp = &get_cpu_var(hugepd_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))) { kmem_cache_free(hugepte_cache, hugepte); - put_cpu_var(hugepd_freelist_cur); + put_cpu_var(hugepd_freelist_cur); return; } From fa07cf6613c9cfb744be453005db2268d1811e73 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 7 Apr 2016 19:58:44 -0700 Subject: [PATCH 321/797] libnvdimm: fix smart data retrieval commit 211291126698c8f047617565b2e2e7f822f86354 upstream. It appears that smart data retrieval has been broken the since the initial implementation. Fix the payload size to be 128-bytes per the specification. Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 496b9b662dc6..5f47356d6942 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -335,7 +335,7 @@ static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = { [ND_CMD_IMPLEMENTED] = { }, [ND_CMD_SMART] = { .out_num = 2, - .out_sizes = { 4, 8, }, + .out_sizes = { 4, 128, }, }, [ND_CMD_SMART_THRESHOLD] = { .out_num = 2, From 966bf1bea449c9f0ef89051b2025c748d28baa2b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 7 Apr 2016 19:59:27 -0700 Subject: [PATCH 322/797] libnvdimm, pfn: fix uuid validation commit e5670563f588ed1c0603819350c0f02cec23f5c5 upstream. If we detect a namespace has a stale info block in the init path, we should overwrite with the latest configuration. In fact, we already return -ENODEV when the parent uuid is invalid, the same should be done for the 'self' uuid. Otherwise we can get into a condition where userspace is unable to reconfigure the pfn-device without directly / manually invalidating the info block. Reported-by: Jeff Moyer Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/pfn_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 71805a1aa0f3..9d3974591cd6 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -275,7 +275,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) } else { /* from init we validate */ if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) - return -EINVAL; + return -ENODEV; } /* From d4429b81f68bc696535c455f2fd6e86d2b99dd4f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 31 Mar 2016 09:38:51 +0200 Subject: [PATCH 323/797] compiler-gcc: disable -ftracer for __noclone functions commit 95272c29378ee7dc15f43fa2758cb28a5913a06d upstream. -ftracer can duplicate asm blocks causing compilation to fail in noclone functions. For example, KVM declares a global variable in an asm like asm("2: ... \n .pushsection data \n .global vmx_return \n vmx_return: .long 2b"); and -ftracer causes a double declaration. Cc: Andrew Morton Cc: Michal Marek Cc: kvm@vger.kernel.org Reported-by: Linda Walsh Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 22ab246feed3..eeae401a2412 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -199,7 +199,7 @@ #define unreachable() __builtin_unreachable() /* Mark a function definition as prohibited from being cloned. */ -#define __noclone __attribute__((__noclone__)) +#define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) #endif /* GCC_VERSION >= 40500 */ From 6fcee661a2edbf40b6154aaad4166776a9bad294 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 24 Mar 2016 16:54:34 +0000 Subject: [PATCH 324/797] arm64: opcodes.h: Add arm big-endian config options before including arm header commit a6002ec5a8c68e69706b2efd6db6d682d0ab672c upstream. arm and arm64 use different config options to specify big endian. This needs taking into account when including code/headers between the two architectures. A case in point is PAN, which uses the __instr_arm() macro to output instructions. The macro comes from opcodes.h, which lives under arch/arm. On a big-endian build the mismatched config options mean the instruction isn't byte swapped correctly, resulting in undefined instruction exceptions during boot: | alternatives: patching kernel code | kdevtmpfs[87]: undefined instruction: pc=ffffffc0004505b4 | kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c | kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c | kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c | kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c | kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c | kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c | kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c | kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c | kdevtmpfs[87]: undefined instruction: pc=ffffffc00076231c | Internal error: Oops - undefined instruction: 0 [#1] SMP | Modules linked in: | CPU: 0 PID: 87 Comm: kdevtmpfs Not tainted 4.1.16+ #5 | Hardware name: Hisilicon PhosphorHi1382 EVB (DT) | task: ffffffc336591700 ti: ffffffc3365a4000 task.ti: ffffffc3365a4000 | PC is at dump_instr+0x68/0x100 | LR is at do_undefinstr+0x1d4/0x2a4 | pc : [] lr : [] pstate: 604001c5 | sp : ffffffc3365a6450 Reported-by: Hanjun Guo Tested-by: Xuefeng Wang Signed-off-by: James Morse Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/opcodes.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h index 4e603ea36ad3..123f45d92cd1 100644 --- a/arch/arm64/include/asm/opcodes.h +++ b/arch/arm64/include/asm/opcodes.h @@ -1 +1,5 @@ +#ifdef CONFIG_CPU_BIG_ENDIAN +#define CONFIG_CPU_ENDIAN_BE8 CONFIG_CPU_BIG_ENDIAN +#endif + #include <../../arm/include/asm/opcodes.h> From 7ed78a4894600f29f88f85d22d411c8116d27a8c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 25 Feb 2016 16:15:05 -0500 Subject: [PATCH 325/797] drm/dp: move hw_mutex up the call stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7779c5e23c5132c22a219f1f5554ef81dd15ee91 upstream. 1) don't let other threads trying to bang on aux channel interrupt the defer timeout/logic 2) don't let other threads interrupt the i2c over aux logic Technically, according to people who actually have the DP spec, this should not be required. In practice, it makes some troublesome Dell monitor (and perhaps others) work, so probably a case of "It's compliant if it works with windows" on the hw vendor's part.. v2: rebased to come before DPCD/AUX logging patch for easier backport to stable branches. Reported-by: Dave Wysochanski Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1274157 Signed-off-by: Rob Clark Reviewed-by: Ville Syrjälä Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_helper.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 9535c5b60387..7e5a97204051 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -178,7 +178,7 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request, { struct drm_dp_aux_msg msg; unsigned int retry; - int err; + int err = 0; memset(&msg, 0, sizeof(msg)); msg.address = offset; @@ -186,6 +186,8 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request, msg.buffer = buffer; msg.size = size; + mutex_lock(&aux->hw_mutex); + /* * The specification doesn't give any recommendation on how often to * retry native transactions. We used to retry 7 times like for @@ -194,25 +196,24 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request, */ for (retry = 0; retry < 32; retry++) { - mutex_lock(&aux->hw_mutex); err = aux->transfer(aux, &msg); - mutex_unlock(&aux->hw_mutex); if (err < 0) { if (err == -EBUSY) continue; - return err; + goto unlock; } switch (msg.reply & DP_AUX_NATIVE_REPLY_MASK) { case DP_AUX_NATIVE_REPLY_ACK: if (err < size) - return -EPROTO; - return err; + err = -EPROTO; + goto unlock; case DP_AUX_NATIVE_REPLY_NACK: - return -EIO; + err = -EIO; + goto unlock; case DP_AUX_NATIVE_REPLY_DEFER: usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100); @@ -221,7 +222,11 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request, } DRM_DEBUG_KMS("too many retries, giving up\n"); - return -EIO; + err = -EIO; + +unlock: + mutex_unlock(&aux->hw_mutex); + return err; } /** @@ -543,9 +548,7 @@ static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) int max_retries = max(7, drm_dp_i2c_retry_count(msg, dp_aux_i2c_speed_khz)); for (retry = 0, defer_i2c = 0; retry < (max_retries + defer_i2c); retry++) { - mutex_lock(&aux->hw_mutex); ret = aux->transfer(aux, msg); - mutex_unlock(&aux->hw_mutex); if (ret < 0) { if (ret == -EBUSY) continue; @@ -684,6 +687,8 @@ static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, memset(&msg, 0, sizeof(msg)); + mutex_lock(&aux->hw_mutex); + for (i = 0; i < num; i++) { msg.address = msgs[i].addr; drm_dp_i2c_msg_set_request(&msg, &msgs[i]); @@ -738,6 +743,8 @@ static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, msg.size = 0; (void)drm_dp_i2c_do_msg(aux, &msg); + mutex_unlock(&aux->hw_mutex); + return err; } From dd4fea89f865b0e36cb92c35377d97a82d620e4b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 30 Mar 2016 11:40:43 +0200 Subject: [PATCH 326/797] drm/udl: Use unlocked gem unreferencing commit 72b9ff0612ad8fc969b910cd00ac16b57a1a9ba4 upstream. For drm_gem_object_unreference callers are required to hold dev->struct_mutex, which these paths don't. Enforcing this requirement has become a bit more strict with commit ef4c6270bf2867e2f8032e9614d1a8cfc6c71663 Author: Daniel Vetter Date: Thu Oct 15 09:36:25 2015 +0200 drm/gem: Check locking in drm_gem_object_unreference Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_fb.c | 2 +- drivers/gpu/drm/udl/udl_gem.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 62c7b1dafaa4..73e41a8613da 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -539,7 +539,7 @@ static int udlfb_create(struct drm_fb_helper *helper, out_destroy_fbi: drm_fb_helper_release_fbi(helper); out_gfree: - drm_gem_object_unreference(&ufbdev->ufb.obj->base); + drm_gem_object_unreference_unlocked(&ufbdev->ufb.obj->base); out: return ret; } diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index 2a0a784ab6ee..d7528e0d8442 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -52,7 +52,7 @@ udl_gem_create(struct drm_file *file, return ret; } - drm_gem_object_unreference(&obj->base); + drm_gem_object_unreference_unlocked(&obj->base); *handle_p = handle; return 0; } From 1456f5cf1565ede6a0adcb43f27d66eede96876f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 25 Mar 2016 10:31:04 -0400 Subject: [PATCH 327/797] drm/radeon: add a dpm quirk for sapphire Dual-X R7 370 2G D5 commit f971f2263deaa4a441e377b385c11aee0f3b3f9a upstream. bug: https://bugs.freedesktop.org/show_bug.cgi?id=94692 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index a82b891ae1fe..7d7500413238 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2926,6 +2926,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { /* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */ { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 }, + { PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 }, From ddf58bfd05fc6b71858228962dffcaa91d1b4d53 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 28 Mar 2016 10:16:40 -0400 Subject: [PATCH 328/797] drm/radeon: add another R7 370 quirk commit a64663d9870364bd2a2df62bf0d3a9fbe5ea62a8 upstream. bug: https://bugzilla.kernel.org/show_bug.cgi?id=115291 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 7d7500413238..444935dec7af 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2930,6 +2930,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { { PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 }, + { PCI_VENDOR_ID_ATI, 0x6811, 0x148c, 0x2015, 0, 120000 }, { 0, 0, 0, 0 }, }; From 0bccb7a91e02086d7fcdd61042d508e012ba87ef Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 28 Mar 2016 10:21:20 -0400 Subject: [PATCH 329/797] drm/radeon: add a dpm quirk for all R7 370 parts commit 0e5585dc870af947fab2af96a88c2d8b4270247c upstream. Higher mclk values are not stable due to a bug somewhere. Limit them for now. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 444935dec7af..7285adb27099 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3010,6 +3010,10 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, } ++p; } + /* limit mclk on all R7 370 parts for stability */ + if (rdev->pdev->device == 0x6811 && + rdev->pdev->revision == 0x81) + max_mclk = 120000; if (rps->vce_active) { rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; From 1e84f8b8dad8c270006b89385b4b60e1cc1ccf4c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 31 Mar 2016 16:07:38 -0400 Subject: [PATCH 330/797] drm/amdgpu/gmc: move vram type fetching into sw_init commit d1518a1db31a25682ea09c4b135fa72d9883be42 upstream. early_init gets called before atom asic init so on non-posted cards, the vram type is not initialized. Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 16 ++++++++-------- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 272110cc18c2..ea87033bfaf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -898,14 +898,6 @@ static int gmc_v7_0_early_init(void *handle) gmc_v7_0_set_gart_funcs(adev); gmc_v7_0_set_irq_funcs(adev); - if (adev->flags & AMD_IS_APU) { - adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; - } else { - u32 tmp = RREG32(mmMC_SEQ_MISC0); - tmp &= MC_SEQ_MISC0__MT__MASK; - adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp); - } - return 0; } @@ -926,6 +918,14 @@ static int gmc_v7_0_sw_init(void *handle) if (r) return r; + if (adev->flags & AMD_IS_APU) { + adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + } else { + u32 tmp = RREG32(mmMC_SEQ_MISC0); + tmp &= MC_SEQ_MISC0__MT__MASK; + adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp); + } + r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index ba4ad00ba8b4..f035b5b99cb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -852,14 +852,6 @@ static int gmc_v8_0_early_init(void *handle) gmc_v8_0_set_gart_funcs(adev); gmc_v8_0_set_irq_funcs(adev); - if (adev->flags & AMD_IS_APU) { - adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; - } else { - u32 tmp = RREG32(mmMC_SEQ_MISC0); - tmp &= MC_SEQ_MISC0__MT__MASK; - adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp); - } - return 0; } @@ -880,6 +872,14 @@ static int gmc_v8_0_sw_init(void *handle) if (r) return r; + if (adev->flags & AMD_IS_APU) { + adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + } else { + u32 tmp = RREG32(mmMC_SEQ_MISC0); + tmp &= MC_SEQ_MISC0__MT__MASK; + adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp); + } + r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); if (r) return r; From 40a8f74321fb892dec5757ca44ec3850bde26a24 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 31 Mar 2016 16:41:32 -0400 Subject: [PATCH 331/797] drm/amdgpu/gmc: use proper register for vram type on Fiji commit b634de4f446c062a0c95ec4d150b4cf7c85e3526 upstream. The offset changed on Fiji. Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index f035b5b99cb5..08423089fb84 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -862,6 +862,8 @@ static int gmc_v8_0_late_init(void *handle) return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); } +#define mmMC_SEQ_MISC0_FIJI 0xA71 + static int gmc_v8_0_sw_init(void *handle) { int r; @@ -875,7 +877,12 @@ static int gmc_v8_0_sw_init(void *handle) if (adev->flags & AMD_IS_APU) { adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { - u32 tmp = RREG32(mmMC_SEQ_MISC0); + u32 tmp; + + if (adev->asic_type == CHIP_FIJI) + tmp = RREG32(mmMC_SEQ_MISC0_FIJI); + else + tmp = RREG32(mmMC_SEQ_MISC0); tmp &= MC_SEQ_MISC0__MT__MASK; adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp); } From 0cc60c58ba3fcbc8467a3d56b98c664b0fc7d9bb Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Fri, 18 Mar 2016 10:11:07 -0400 Subject: [PATCH 332/797] xen/events: Mask a moving irq commit ff1e22e7a638a0782f54f81a6c9cb139aca2da35 upstream. Moving an unmasked irq may result in irq handler being invoked on both source and target CPUs. With 2-level this can happen as follows: On source CPU: evtchn_2l_handle_events() -> generic_handle_irq() -> handle_edge_irq() -> eoi_pirq(): irq_move_irq(data); /***** WE ARE HERE *****/ if (VALID_EVTCHN(evtchn)) clear_evtchn(evtchn); If at this moment target processor is handling an unrelated event in evtchn_2l_handle_events()'s loop it may pick up our event since target's cpu_evtchn_mask claims that this event belongs to it *and* the event is unmasked and still pending. At the same time, source CPU will continue executing its own handle_edge_irq(). With FIFO interrupt the scenario is similar: irq_move_irq() may result in a EVTCHNOP_unmask hypercall which, in turn, may make the event pending on the target CPU. We can avoid this situation by moving and clearing the event while keeping event masked. Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 524c22146429..44367783f07a 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -484,9 +484,19 @@ static void eoi_pirq(struct irq_data *data) struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; int rc = 0; - irq_move_irq(data); + if (!VALID_EVTCHN(evtchn)) + return; - if (VALID_EVTCHN(evtchn)) + if (unlikely(irqd_is_setaffinity_pending(data))) { + int masked = test_and_set_mask(evtchn); + + clear_evtchn(evtchn); + + irq_move_masked_irq(data); + + if (!masked) + unmask_evtchn(evtchn); + } else clear_evtchn(evtchn); if (pirq_needs_eoi(data->irq)) { @@ -1357,9 +1367,19 @@ static void ack_dynirq(struct irq_data *data) { int evtchn = evtchn_from_irq(data->irq); - irq_move_irq(data); + if (!VALID_EVTCHN(evtchn)) + return; - if (VALID_EVTCHN(evtchn)) + if (unlikely(irqd_is_setaffinity_pending(data))) { + int masked = test_and_set_mask(evtchn); + + clear_evtchn(evtchn); + + irq_move_masked_irq(data); + + if (!masked) + unmask_evtchn(evtchn); + } else clear_evtchn(evtchn); } From d5322b91e61647630a91cacd7e22736c35dd98bd Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 21 Feb 2016 10:12:39 +0300 Subject: [PATCH 333/797] tcp: convert cached rtt from usec to jiffies when feeding initial rto [ Upstream commit 9bdfb3b79e61c60e1a3e2dc05ad164528afa6b8a ] Currently it's converted into msecs, thus HZ=1000 intact. Signed-off-by: Konstantin Khlebnikov Fixes: 740b0f1841f6 ("tcp: switch rtt estimations to usec resolution") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_metrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index c8cbc2b4b792..a726d7853ce5 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -550,7 +550,7 @@ void tcp_init_metrics(struct sock *sk) */ if (crtt > tp->srtt_us) { /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ - crtt /= 8 * USEC_PER_MSEC; + crtt /= 8 * USEC_PER_SEC / HZ; inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); } else if (tp->srtt_us == 0) { /* RFC6298: 5.7 We've failed to get a valid RTT sample from From 207485dc4f22f8da29d2ba5e9e41c4c9e84d4e01 Mon Sep 17 00:00:00 2001 From: Bernie Harris Date: Mon, 22 Feb 2016 12:58:05 +1300 Subject: [PATCH 334/797] tunnel: Clear IPCB(skb)->opt before dst_link_failure called [ Upstream commit 5146d1f151122e868e594c7b45115d64825aee5f ] IPCB may contain data from previous layers (in the observed case the qdisc layer). In the observed scenario, the data was misinterpreted as ip header options, which later caused the ihl to be set to an invalid value (<5). This resulted in an infinite loop in the mips implementation of ip_fast_csum. This patch clears IPCB(skb)->opt before dst_link_failure can be called for various types of tunnels. This change only applies to encapsulated ipv4 packets. The code introduced in 11c21a30 which clears all of IPCB has been removed to be consistent with these changes, and instead the opt field is cleared unconditionally in ip_tunnel_xmit. The change in ip_tunnel_xmit applies to SIT, GRE, and IPIP tunnels. The relevant vti, l2tp, and pptp functions already contain similar code for clearing the IPCB. Signed-off-by: Bernie Harris Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 3 ++- net/ipv4/udp_tunnel.c | 2 ++ net/ipv6/ip6_gre.c | 2 ++ net/ipv6/ip6_tunnel.c | 2 ++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cbb51f3fac06..ce30c8b72457 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -663,6 +663,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, inner_iph = (const struct iphdr *)skb_inner_network_header(skb); connected = (tunnel->parms.iph.daddr != 0); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ @@ -760,7 +762,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst_link_failure(skb); } else tunnel->err_count = 0; diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index aba428626b52..280a9bdeddee 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -89,6 +89,8 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, uh->source = src_port; uh->len = htons(skb->len); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + udp_set_csum(nocheck, skb, src, dst, skb->len); return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e5ea177d34c6..4650c6824783 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -778,6 +778,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) __u32 mtu; int err; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 137fca42aaa6..6c5dfec7a377 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1180,6 +1180,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + tproto = ACCESS_ONCE(t->parms.proto); if (tproto != IPPROTO_IPIP && tproto != 0) return -1; From e948c9adee31b56f90efd3eb240221fe257f5aba Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 22 Feb 2016 18:43:25 +0100 Subject: [PATCH 335/797] ipv4: only create late gso-skb if skb is already set up with CHECKSUM_PARTIAL [ Upstream commit a8c4a2522a0808c5c2143612909717d1115c40cf ] Otherwise we break the contract with GSO to only pass CHECKSUM_PARTIAL skbs down. This can easily happen with UDP+IPv4 sockets with the first MSG_MORE write smaller than the MTU, second write is a sendfile. Returning -EOPNOTSUPP lets the callers fall back into normal sendmsg path, were we calculate the checksum manually during copying. Commit d749c9cbffd6 ("ipv4: no CHECKSUM_PARTIAL on MSG_MORE corked sockets") started to exposes this bug. Fixes: d749c9cbffd6 ("ipv4: no CHECKSUM_PARTIAL on MSG_MORE corked sockets") Reported-by: Jiri Benc Cc: Jiri Benc Reported-by: Wakko Warner Cc: Wakko Warner Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 49f02856304d..f2ad5216c438 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1237,13 +1237,16 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, if (!skb) return -EINVAL; - cork->length += size; if ((size + skb->len > mtu) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { + if (skb->ip_summed != CHECKSUM_PARTIAL) + return -EOPNOTSUPP; + skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; } + cork->length += size; while (size > 0) { if (skb_is_gso(skb)) { From 05b0ca3ec3a2e49f29bfc2879c87699eb5a90ee4 Mon Sep 17 00:00:00 2001 From: Diego Viola Date: Tue, 23 Feb 2016 12:04:04 -0300 Subject: [PATCH 336/797] net: jme: fix suspend/resume on JMC260 [ Upstream commit ee50c130c82175eaa0820c96b6d3763928af2241 ] The JMC260 network card fails to suspend/resume because the call to jme_start_irq() was too early, moving the call to jme_start_irq() after the call to jme_reset_link() makes it work. Prior this change suspend/resume would fail unless /sys/power/pm_async=0 was explicitly specified. Relevant bug report: https://bugzilla.kernel.org/show_bug.cgi?id=112351 Signed-off-by: Diego Viola Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/jme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 060dd3922974..973dade2d07f 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -3312,13 +3312,14 @@ jme_resume(struct device *dev) jme_reset_phy_processor(jme); jme_phy_calibration(jme); jme_phy_setEA(jme); - jme_start_irq(jme); netif_device_attach(netdev); atomic_inc(&jme->link_changing); jme_reset_link(jme); + jme_start_irq(jme); + return 0; } From ea0519a2bc8d7e6244cc5201f7b92b896be60c07 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 23 Feb 2016 10:10:26 -0800 Subject: [PATCH 337/797] net: vrf: Remove direct access to skb->data [ Upstream commit 65c38aa653c14df49e19faad74bd375f36e61c57 ] Nik pointed that the VRF driver should be using skb_header_pointer instead of accessing skb->data and bits beyond directly which can be garbage. Fixes: 35402e313663 ("net: Add IPv6 support to VRF device") Cc: Nikolay Aleksandrov Signed-off-by: David Ahern Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 0a242b200df4..903bda437839 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -114,20 +114,23 @@ static struct dst_ops vrf_dst_ops = { #if IS_ENABLED(CONFIG_IPV6) static bool check_ipv6_frame(const struct sk_buff *skb) { - const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data; - size_t hlen = sizeof(*ipv6h); + const struct ipv6hdr *ipv6h; + struct ipv6hdr _ipv6h; bool rc = true; - if (skb->len < hlen) + ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h); + if (!ipv6h) goto out; if (ipv6h->nexthdr == NEXTHDR_ICMP) { const struct icmp6hdr *icmph; + struct icmp6hdr _icmph; - if (skb->len < hlen + sizeof(*icmph)) + icmph = skb_header_pointer(skb, sizeof(_ipv6h), + sizeof(_icmph), &_icmph); + if (!icmph) goto out; - icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h)); switch (icmph->icmp6_type) { case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: From aad983b70b30a90d0e5fba4d8929d0db1191dd3f Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 23 Feb 2016 19:23:23 +0000 Subject: [PATCH 338/797] net: qca_spi: Don't clear IFF_BROADCAST [ Upstream commit 2b70bad23c89b121a3e4a00f8968d14ebb78887d ] Currently qcaspi_netdev_setup accidentally clears IFF_BROADCAST. So fix this by keeping the flags from ether_setup. Reported-by: Michael Heimpold Signed-off-by: Stefan Wahren Fixes: 291ab06ecf67 (net: qualcomm: new Ethernet over SPI driver for QCA7000) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/qca_spi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 689a4a5c8dcf..f2ee3e5fb167 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -811,7 +811,6 @@ qcaspi_netdev_setup(struct net_device *dev) dev->netdev_ops = &qcaspi_netdev_ops; qcaspi_set_ethtool_ops(dev); dev->watchdog_timeo = QCASPI_TX_TIMEOUT; - dev->flags = IFF_MULTICAST; dev->tx_queue_len = 100; qca = netdev_priv(dev); From 2f0bd0e69e5b0a4ac00fd69f6591301b43fdeaae Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 23 Feb 2016 19:23:24 +0000 Subject: [PATCH 339/797] net: qca_spi: clear IFF_TX_SKB_SHARING [ Upstream commit a4690afeb0d2d7ba4d60dfa98a89f3bb1ce60ecd ] ether_setup sets IFF_TX_SKB_SHARING but this is not supported by qca_spi as it modifies the skb on xmit. Signed-off-by: Stefan Wahren Fixes: 291ab06ecf67 (net: qualcomm: new Ethernet over SPI driver for QCA7000) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/qca_spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index f2ee3e5fb167..1ef03939d25f 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -811,6 +811,7 @@ qcaspi_netdev_setup(struct net_device *dev) dev->netdev_ops = &qcaspi_netdev_ops; qcaspi_set_ethtool_ops(dev); dev->watchdog_timeo = QCASPI_TX_TIMEOUT; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->tx_queue_len = 100; qca = netdev_priv(dev); From 44bc7d1b9777128656310c0c7b47cb952a7c7b2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Wed, 24 Feb 2016 04:21:42 +0100 Subject: [PATCH 340/797] net: fix bridge multicast packet checksum validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9b368814b336b0a1a479135eb2815edbc00efd3c ] We need to update the skb->csum after pulling the skb, otherwise an unnecessary checksum (re)computation can ocure for IGMP/MLD packets in the bridge code. Additionally this fixes the following splats for network devices / bridge ports with support for and enabled RX checksum offloading: [...] [ 43.986968] eth0: hw csum failure [ 43.990344] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.4.0 #2 [ 43.996193] Hardware name: BCM2709 [ 43.999647] [<800204e0>] (unwind_backtrace) from [<8001cf14>] (show_stack+0x10/0x14) [ 44.007432] [<8001cf14>] (show_stack) from [<801ab614>] (dump_stack+0x80/0x90) [ 44.014695] [<801ab614>] (dump_stack) from [<802e4548>] (__skb_checksum_complete+0x6c/0xac) [ 44.023090] [<802e4548>] (__skb_checksum_complete) from [<803a055c>] (ipv6_mc_validate_checksum+0x104/0x178) [ 44.032959] [<803a055c>] (ipv6_mc_validate_checksum) from [<802e111c>] (skb_checksum_trimmed+0x130/0x188) [ 44.042565] [<802e111c>] (skb_checksum_trimmed) from [<803a06e8>] (ipv6_mc_check_mld+0x118/0x338) [ 44.051501] [<803a06e8>] (ipv6_mc_check_mld) from [<803b2c98>] (br_multicast_rcv+0x5dc/0xd00) [ 44.060077] [<803b2c98>] (br_multicast_rcv) from [<803aa510>] (br_handle_frame_finish+0xac/0x51c) [...] Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code") Reported-by: Álvaro Fernández Rojas Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 17 +++++++++++++++++ net/core/skbuff.c | 22 ++++++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 75f136a22a5e..d84c593012ed 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2724,6 +2724,23 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); +static inline void skb_postpush_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + /* For performing the reverse operation to skb_postpull_rcsum(), + * we can instead of ... + * + * skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); + * + * ... just use this equivalent version here to save a few + * instructions. Feeding csum of 0 in csum_partial() and later + * on adding skb->csum is equivalent to feed skb->csum in the + * first place. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_partial(start, len, skb->csum); +} + /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5bf88f58bee7..8616d1147c93 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2947,6 +2947,24 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page, } EXPORT_SYMBOL_GPL(skb_append_pagefrags); +/** + * skb_push_rcsum - push skb and update receive checksum + * @skb: buffer to update + * @len: length of data pulled + * + * This function performs an skb_push on the packet and updates + * the CHECKSUM_COMPLETE checksum. It should be used on + * receive path processing instead of skb_push unless you know + * that the checksum difference is zero (e.g., a valid IP header) + * or you are setting ip_summed to CHECKSUM_NONE. + */ +static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len) +{ + skb_push(skb, len); + skb_postpush_rcsum(skb, skb->data, len); + return skb->data; +} + /** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update @@ -4084,9 +4102,9 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, if (!pskb_may_pull(skb_chk, offset)) goto err; - __skb_pull(skb_chk, offset); + skb_pull_rcsum(skb_chk, offset); ret = skb_chkf(skb_chk); - __skb_push(skb_chk, offset); + skb_push_rcsum(skb_chk, offset); if (ret) goto err; From a87c65252bb82cc82ed226f70922aa83709ea2fe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 28 Feb 2016 10:03:51 +0800 Subject: [PATCH 341/797] sctp: lack the check for ports in sctp_v6_cmp_addr [ Upstream commit 40b4f0fd74e46c017814618d67ec9127ff20f157 ] As the member .cmp_addr of sctp_af_inet6, sctp_v6_cmp_addr should also check the port of addresses, just like sctp_v4_cmp_addr, cause it's invoked by sctp_cmp_addr_exact(). Now sctp_v6_cmp_addr just check the port when two addresses have different family, and lack the port check for two ipv6 addresses. that will make sctp_hash_cmp() cannot work well. so fix it by adding ports comparison in sctp_v6_cmp_addr(). Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ec529121f38a..ce46f1c7f133 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -526,6 +526,8 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, } return 0; } + if (addr1->v6.sin6_port != addr2->v6.sin6_port) + return 0; if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr)) return 0; /* If this is a linklocal address, compare the scope_id. */ From d9bbdcd83d63010fab254d5ed39116f9f58f1228 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 29 Feb 2016 15:03:33 -0800 Subject: [PATCH 342/797] mld, igmp: Fix reserved tailroom calculation [ Upstream commit 1837b2e2bcd23137766555a63867e649c0b637f0 ] The current reserved_tailroom calculation fails to take hlen and tlen into account. skb: [__hlen__|__data____________|__tlen___|__extra__] ^ ^ head skb_end_offset In this representation, hlen + data + tlen is the size passed to alloc_skb. "extra" is the extra space made available in __alloc_skb because of rounding up by kmalloc. We can reorder the representation like so: [__hlen__|__data____________|__extra__|__tlen___] ^ ^ head skb_end_offset The maximum space available for ip headers and payload without fragmentation is min(mtu, data + extra). Therefore, reserved_tailroom = data + extra + tlen - min(mtu, data + extra) = skb_end_offset - hlen - min(mtu, skb_end_offset - hlen - tlen) = skb_tailroom - min(mtu, skb_tailroom - tlen) ; after skb_reserve(hlen) Compare the second line to the current expression: reserved_tailroom = skb_end_offset - min(mtu, skb_end_offset) and we can see that hlen and tlen are not taken into account. The min() in the third line can be expanded into: if mtu < skb_tailroom - tlen: reserved_tailroom = skb_tailroom - mtu else: reserved_tailroom = tlen Depending on hlen, tlen, mtu and the number of multicast address records, the current code may output skbs that have less tailroom than dev->needed_tailroom or it may output more skbs than needed because not all space available is used. Fixes: 4c672e4b ("ipv6: mld: fix add_grhead skb_over_panic for devs with large MTUs") Signed-off-by: Benjamin Poirier Acked-by: Hannes Frederic Sowa Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 24 ++++++++++++++++++++++++ net/ipv4/igmp.c | 3 +-- net/ipv6/mcast.c | 3 +-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d84c593012ed..4fde61804191 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1908,6 +1908,30 @@ static inline void skb_reserve(struct sk_buff *skb, int len) skb->tail += len; } +/** + * skb_tailroom_reserve - adjust reserved_tailroom + * @skb: buffer to alter + * @mtu: maximum amount of headlen permitted + * @needed_tailroom: minimum amount of reserved_tailroom + * + * Set reserved_tailroom so that headlen can be as large as possible but + * not larger than mtu and tailroom cannot be smaller than + * needed_tailroom. + * The required headroom should already have been reserved before using + * this function. + */ +static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu, + unsigned int needed_tailroom) +{ + SKB_LINEAR_ASSERT(skb); + if (mtu < skb_tailroom(skb) - needed_tailroom) + /* use at most mtu */ + skb->reserved_tailroom = skb_tailroom(skb) - mtu; + else + /* use up to all available space */ + skb->reserved_tailroom = needed_tailroom; +} + #define ENCAP_TYPE_ETHER 0 #define ENCAP_TYPE_IPPROTO 1 diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 05e4cba14162..b3086cf27027 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -356,9 +356,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) skb_dst_set(skb, &rt->dst); skb->dev = dev; - skb->reserved_tailroom = skb_end_offset(skb) - - min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); + skb_tailroom_reserve(skb, mtu, tlen); skb_reset_network_header(skb); pip = ip_hdr(skb); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ee56d0a8699..d64ee7e83664 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1574,9 +1574,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) return NULL; skb->priority = TC_PRIO_CONTROL; - skb->reserved_tailroom = skb_end_offset(skb) - - min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); + skb_tailroom_reserve(skb, mtu, tlen); if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { /* : From 7da899cee1f19bdaf2649e5e8c36c0e65ace5a5c Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 1 Mar 2016 11:07:09 +0100 Subject: [PATCH 343/797] tipc: Revert "tipc: use existing sk_write_queue for outgoing packet chain" [ Upstream commit f214fc402967e1bc94ad7f39faa03db5813d6849 ] reverts commit 94153e36e709e ("tipc: use existing sk_write_queue for outgoing packet chain") In Commit 94153e36e709e, we assume that we fill & empty the socket's sk_write_queue within the same lock_sock() session. This is not true if the link is congested. During congestion, the socket lock is released while we wait for the congestion to cease. This implementation causes a nullptr exception, if the user space program has several threads accessing the same socket descriptor. Consider two threads of the same program performing the following: Thread1 Thread2 -------------------- ---------------------- Enter tipc_sendmsg() Enter tipc_sendmsg() lock_sock() lock_sock() Enter tipc_link_xmit(), ret=ELINKCONG spin on socket lock.. sk_wait_event() : release_sock() grab socket lock : Enter tipc_link_xmit(), ret=0 : release_sock() Wakeup after congestion lock_sock() skb = skb_peek(pktchain); !! TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; In this case, the second thread transmits the buffers belonging to both thread1 and thread2 successfully. When the first thread wakeup after the congestion it assumes that the pktchain is intact and operates on the skb's in it, which leads to the following exception: [2102.439969] BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0 [2102.440074] IP: [] __tipc_link_xmit+0x2b0/0x4d0 [tipc] [2102.440074] PGD 3fa3f067 PUD 3fa6b067 PMD 0 [2102.440074] Oops: 0000 [#1] SMP [2102.440074] CPU: 2 PID: 244 Comm: sender Not tainted 3.12.28 #1 [2102.440074] RIP: 0010:[] [] __tipc_link_xmit+0x2b0/0x4d0 [tipc] [...] [2102.440074] Call Trace: [2102.440074] [] ? schedule+0x29/0x70 [2102.440074] [] ? tipc_node_unlock+0x46/0x170 [tipc] [2102.440074] [] tipc_link_xmit+0x51/0xf0 [tipc] [2102.440074] [] tipc_send_stream+0x11e/0x4f0 [tipc] [2102.440074] [] ? __wake_up_sync+0x20/0x20 [2102.440074] [] tipc_send_packet+0x1c/0x20 [tipc] [2102.440074] [] sock_sendmsg+0xa8/0xd0 [2102.440074] [] ? release_sock+0x145/0x170 [2102.440074] [] ___sys_sendmsg+0x3d8/0x3e0 [2102.440074] [] ? _raw_spin_unlock+0xe/0x10 [2102.440074] [] ? handle_mm_fault+0x6ca/0x9d0 [2102.440074] [] ? set_next_entity+0x85/0xa0 [2102.440074] [] ? _raw_spin_unlock_irq+0xe/0x20 [2102.440074] [] ? finish_task_switch+0x5c/0xc0 [2102.440074] [] ? __schedule+0x34c/0x950 [2102.440074] [] __sys_sendmsg+0x42/0x80 [2102.440074] [] SyS_sendmsg+0x12/0x20 [2102.440074] [] system_call_fastpath+0x16/0x1b In this commit, we maintain the skb list always in the stack. Signed-off-by: Parthasarathy Bhuvaragan Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b53246fb0412..e53003cf7703 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -673,7 +673,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; struct iov_iter save = msg->msg_iter; uint mtu; int rc; @@ -687,14 +687,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_nameupper(mhdr, seq->upper); msg_set_hdr_sz(mhdr, MCAST_H_SIZE); + skb_queue_head_init(&pktchain); + new_mtu: mtu = tipc_bcast_get_mtu(net); - rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); + rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bcast_xmit(net, pktchain); + rc = tipc_bcast_xmit(net, &pktchain); if (likely(!rc)) return dsz; @@ -704,7 +706,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, if (!rc) continue; } - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); if (rc == -EMSGSIZE) { msg->msg_iter = save; goto new_mtu; @@ -863,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; struct sk_buff *skb; struct tipc_name_seq *seq; struct iov_iter save; @@ -924,17 +926,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) msg_set_hdr_sz(mhdr, BASIC_H_SIZE); } + skb_queue_head_init(&pktchain); save = m->msg_iter; new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); - rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); + rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain); if (rc < 0) return rc; do { - skb = skb_peek(pktchain); + skb = skb_peek(&pktchain); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid); + rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid); if (likely(!rc)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -946,7 +949,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) if (!rc) continue; } - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); if (rc == -EMSGSIZE) { m->msg_iter = save; goto new_mtu; @@ -1016,7 +1019,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); u32 portid = tsk->portid; int rc = -EINVAL; @@ -1044,17 +1047,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); dnode = tsk_peer_node(tsk); + skb_queue_head_init(&pktchain); next: save = m->msg_iter; mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); - rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); + rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain); if (unlikely(rc < 0)) return rc; + do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_node_xmit(net, pktchain, dnode, portid); + rc = tipc_node_xmit(net, &pktchain, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1063,7 +1068,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) goto next; } if (rc == -EMSGSIZE) { - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); tsk->max_pkt = tipc_node_get_mtu(net, dnode, portid); m->msg_iter = save; @@ -1077,7 +1082,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) rc = tipc_wait_for_sndpkt(sock, &timeo); } while (!rc); - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); return sent ? sent : rc; } From 242fab1419e149ffc64b8b778fa1dabab34ff2ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 1 Mar 2016 14:31:02 +0100 Subject: [PATCH 344/797] qmi_wwan: add Sierra Wireless EM74xx device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bf13c94ccb33c3182efc92ce4989506a0f541243 ] The MC74xx and EM74xx modules use different IDs by default, according to the Lenovo EM7455 driver for Windows. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 982e0acd1a36..df77467c7e93 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -718,8 +718,10 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9070, 8)}, /* Sierra Wireless MC74xx/EM74xx */ {QMI_FIXED_INTF(0x1199, 0x9070, 10)}, /* Sierra Wireless MC74xx/EM74xx */ - {QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx/EM74xx */ - {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx/EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx */ + {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx */ + {QMI_FIXED_INTF(0x1199, 0x9079, 8)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ From b80398d91c2bdb23d503c5d742ba5c4541269112 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 1 Mar 2016 16:15:16 +0100 Subject: [PATCH 345/797] ipv6: re-enable fragment header matching in ipv6_find_hdr [ Upstream commit 5d150a985520bbe3cb2aa1ceef24a7e32f20c15f ] When ipv6_find_hdr is used to find a fragment header (caller specifies target NEXTHDR_FRAGMENT) we erronously return -ENOENT for all fragments with nonzero offset. Before commit 9195bb8e381d, when target was specified, we did not enter the exthdr walk loop as nexthdr == target so this used to work. Now we do (so we can skip empty route headers). When we then stumble upon a frag with nonzero frag_off we must return -ENOENT ("header not found") only if the caller did not specifically request NEXTHDR_FRAGMENT. This allows nfables exthdr expression to match ipv6 fragments, e.g. via nft add rule ip6 filter input frag frag-off gt 0 Fixes: 9195bb8e381d ("ipv6: improve ipv6_find_hdr() to skip empty routing headers") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/exthdrs_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 5c5d23e59da5..9508a20fbf61 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -257,7 +257,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, *fragoff = _frag_off; return hp->nexthdr; } - return -ENOENT; + if (!found) + return -ENOENT; + if (fragoff) + *fragoff = _frag_off; + break; } hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) { From 32cb6781a96f24287a7b3c8716f47b0e8768709d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 2 Mar 2016 02:32:08 +0100 Subject: [PATCH 346/797] vxlan: fix missing options_len update on RX with collect metadata [ Upstream commit 4024fcf70556311521e7b6cf79fa50e16f31013a ] When signalling to metadata consumers that the metadata_dst entry carries additional GBP extension data for vxlan (TUNNEL_VXLAN_OPT), the dst's vxlan_metadata information is populated, but options_len is left to zero. F.e. in ovs, ovs_flow_key_extract() checks for options_len before extracting the data through ip_tunnel_info_opts_get(). Geneve uses ip_tunnel_info_opts_set() helper in receive path, which sets options_len internally, vxlan however uses ip_tunnel_info_opts(), so when filling vxlan_metadata, we do need to update options_len. Fixes: 4c22279848c5 ("ip-tunnel: Use API to access tunnel metadata options.") Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e0fcda4ddd55..3c0df70e2f53 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1306,8 +1306,10 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) gbp = (struct vxlanhdr_gbp *)vxh; md->gbp = ntohs(gbp->policy_id); - if (tun_dst) + if (tun_dst) { tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT; + tun_dst->u.tun_info.options_len = sizeof(*md); + } if (gbp->dont_learn) md->gbp |= VXLAN_GBP_DONT_LEARN; From 3aaa64b61a787d0aedc5c3fe36102419fc4eeb93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 3 Mar 2016 22:20:53 +0100 Subject: [PATCH 347/797] cdc_ncm: toggle altsetting to force reset before setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 48906f62c96cc2cd35753e59310cb70eb08cc6a5 ] Some devices will silently fail setup unless they are reset first. This is necessary even if the data interface is already in altsetting 0, which it will be when the device is probed for the first time. Briefly toggling the altsetting forces a function reset regardless of the initial state. This fixes a setup problem observed on a number of Huawei devices, appearing to operate in NTB-32 mode even if we explicitly set them to NTB-16 mode. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_ncm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index e8a1144c5a8b..8c2bb77db049 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -794,7 +794,11 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ iface_no = ctx->data->cur_altsetting->desc.bInterfaceNumber; - /* reset data interface */ + /* Reset data interface. Some devices will not reset properly + * unless they are configured first. Toggle the altsetting to + * force a reset + */ + usb_set_interface(dev->udev, iface_no, data_altsetting); temp = usb_set_interface(dev->udev, iface_no, 0); if (temp) { dev_dbg(&intf->dev, "set interface failed\n"); From 2d11623bd01a311c868d2e90fbdda8b5eec39a2f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 7 Mar 2016 11:31:10 +0100 Subject: [PATCH 348/797] usbnet: cleanup after bind() in probe() [ Upstream commit 1666984c8625b3db19a9abc298931d35ab7bc64b ] In case bind() works, but a later error forces bailing in probe() in error cases work and a timer may be scheduled. They must be killed. This fixes an error case related to the double free reported in http://www.spinics.net/lists/netdev/msg367669.html and needs to go on top of Linus' fix to cdc-ncm. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/usbnet.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 0744bf2ef2d6..c2ea4e5666fb 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1766,6 +1766,13 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) if (info->unbind) info->unbind (dev, udev); out1: + /* subdrivers must undo all they did in bind() if they + * fail it, but we may fail later and a deferred kevent + * may trigger an error resubmitting itself and, worse, + * schedule a timer. So we kill it all just in case. + */ + cancel_work_sync(&dev->kevent); + del_timer_sync(&dev->delay); free_netdev(net); out: return status; From 8a2226c17e0e2256f71e6b3175b6d3455b479f02 Mon Sep 17 00:00:00 2001 From: Bill Sommerfeld Date: Fri, 4 Mar 2016 14:47:21 -0800 Subject: [PATCH 349/797] udp6: fix UDP/IPv6 encap resubmit path [ Upstream commit 59dca1d8a6725a121dae6c452de0b2611d5865dc ] IPv4 interprets a negative return value from a protocol handler as a request to redispatch to a new protocol. In contrast, IPv6 interprets a negative value as an error, and interprets a positive value as a request for redispatch. UDP for IPv6 was unaware of this difference. Change __udp6_lib_rcv() to return a positive value for redispatch. Note that the socket's encap_rcv hook still needs to return a negative value to request dispatch, and in the case of IPv6 packets, adjust IP6CB(skb)->nhoff to identify the byte containing the next protocol. Signed-off-by: Bill Sommerfeld Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/udp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9da3287a3923..1e293a552693 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -916,11 +916,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ + /* a return value > 0 means to resubmit the input */ if (ret > 0) - return -ret; + return ret; return 0; } From 36b9c7cc09fc6d90e155f322f3f1d6fd3f53b0b7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 6 Mar 2016 09:29:21 -0800 Subject: [PATCH 350/797] tcp: fix tcpi_segs_in after connection establishment [ Upstream commit a9d99ce28ed359d68cf6f3c1a69038aefedf6d6a ] If final packet (ACK) of 3WHS is lost, it appears we do not properly account the following incoming segment into tcpi_segs_in While we are at it, starts segs_in with one, to count the SYN packet. We do not yet count number of SYN we received for a request sock, we might add this someday. packetdrill script showing proper behavior after fix : // Tests tcpi_segs_in when 3rd packet (ACK) of 3WHS is lost 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 +.020 < P. 1:1001(1000) ack 1 win 32792 +0 accept(3, ..., ...) = 4 +.000 %{ assert tcpi_segs_in == 2, 'tcpi_segs_in=%d' % tcpi_segs_in }% Fixes: 2efd055c53c06 ("tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_minisocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ac6b1961ffeb..9475a2748a9a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -458,7 +458,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; - newtp->segs_in = 0; + newtp->segs_in = 1; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; @@ -818,6 +818,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; + tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); if (!sock_owned_by_user(child)) { ret = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ From cd8101d8ece8d776b79b7e0528cc8ba6d6c33562 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Mar 2016 19:36:44 +0100 Subject: [PATCH 351/797] ppp: release rtnl mutex when interface creation fails [ Upstream commit 6faac63a6986f29ef39827f460edd3a5ba64ad5c ] Add missing rtnl_unlock() in the error path of ppp_create_interface(). Fixes: 58a89ecaca53 ("ppp: fix lockdep splat in ppp_dev_uninit()") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 9a863c6a6a33..40b303ed63b7 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2803,6 +2803,7 @@ static struct ppp *ppp_create_interface(struct net *net, int unit, out2: mutex_unlock(&pn->all_ppp_mutex); + rtnl_unlock(); free_netdev(dev); out1: *retp = ret; From 8b8d278aa4de9335682bbd4a3bb619af015c859e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 9 Mar 2016 21:58:32 -0500 Subject: [PATCH 352/797] net: validate variable length ll headers [ Upstream commit 2793a23aacbd754dbbb5cb75093deb7e4103bace ] Netdevice parameter hard_header_len is variously interpreted both as an upper and lower bound on link layer header length. The field is used as upper bound when reserving room at allocation, as lower bound when validating user input in PF_PACKET. Clarify the definition to be maximum header length. For validation of untrusted headers, add an optional validate member to header_ops. Allow bypassing of validation by passing CAP_SYS_RAWIO, for instance for deliberate testing of corrupt input. In this case, pad trailing bytes, as some device drivers expect completely initialized headers. See also http://comments.gmane.org/gmane.linux.network/401064 Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3143c847bddb..04c068e55353 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -265,6 +265,7 @@ struct header_ops { void (*cache_update)(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); + bool (*validate)(const char *ll_header, unsigned int len); }; /* These flag bits are private to the generic network queueing @@ -1398,8 +1399,7 @@ enum netdev_priv_flags { * @dma: DMA channel * @mtu: Interface MTU value * @type: Interface hardware type - * @hard_header_len: Hardware header length, which means that this is the - * minimum size of a packet. + * @hard_header_len: Maximum hardware header length. * * @needed_headroom: Extra headroom the hardware may need, but not in all * cases can this be guaranteed @@ -2493,6 +2493,24 @@ static inline int dev_parse_header(const struct sk_buff *skb, return dev->header_ops->parse(skb, haddr); } +/* ll_header must have at least hard_header_len allocated */ +static inline bool dev_validate_header(const struct net_device *dev, + char *ll_header, int len) +{ + if (likely(len >= dev->hard_header_len)) + return true; + + if (capable(CAP_SYS_RAWIO)) { + memset(ll_header + len, 0, dev->hard_header_len - len); + return true; + } + + if (dev->header_ops && dev->header_ops->validate) + return dev->header_ops->validate(ll_header, len); + + return false; +} + typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); static inline int unregister_gifconf(unsigned int family) From abd42587771aff48c39312116e8b8db851ea0fa5 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 9 Mar 2016 21:58:33 -0500 Subject: [PATCH 353/797] ax25: add link layer header validation function [ Upstream commit ea47781c26510e5d97f80f9aceafe9065bd5e3aa ] As variable length protocol, AX25 fails link layer header validation tests based on a minimum length. header_ops.validate allows protocols to validate headers that are shorter than hard_header_len. Implement this callback for AX25. See also http://comments.gmane.org/gmane.linux.network/401064 Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ax25/ax25_ip.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index b563a3f5f2a8..2fa3be965101 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -228,8 +228,23 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) } #endif +static bool ax25_validate_header(const char *header, unsigned int len) +{ + ax25_digi digi; + + if (!len) + return false; + + if (header[0]) + return true; + + return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL, + NULL); +} + const struct header_ops ax25_header_ops = { .create = ax25_hard_header, + .validate = ax25_validate_header, }; EXPORT_SYMBOL(ax25_header_ops); From edb60bc7bb4ee6e3862aa8840a65cce47e09dcfe Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 9 Mar 2016 21:58:34 -0500 Subject: [PATCH 354/797] packet: validate variable length ll headers [ Upstream commit 9ed988cd591500c040b2a6257bc68543e08ceeef ] Replace link layer header validation check ll_header_truncate with more generic dev_validate_header. Validation based on hard_header_len incorrectly drops valid packets in variable length protocols, such as AX25. dev_validate_header calls header_ops.validate for such protocols to ensure correctness below hard_header_len. See also http://comments.gmane.org/gmane.linux.network/401064 Fixes 9c7077622dd9 ("packet: make packet_snd fail on len smaller than l2 header") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 992396aa635c..da1ae0e13cb5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1916,6 +1916,10 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, goto retry; } + if (!dev_validate_header(dev, skb->data, len)) { + err = -EINVAL; + goto out_unlock; + } if (len > (dev->mtu + dev->hard_header_len + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; @@ -2326,18 +2330,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) sock_wfree(skb); } -static bool ll_header_truncated(const struct net_device *dev, int len) -{ - /* net device doesn't like empty head */ - if (unlikely(len < dev->hard_header_len)) { - net_warn_ratelimited("%s: packet size is too short (%d < %d)\n", - current->comm, len, dev->hard_header_len); - return true; - } - - return false; -} - static void tpacket_set_protocol(const struct net_device *dev, struct sk_buff *skb) { @@ -2420,19 +2412,19 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, if (unlikely(err < 0)) return -EINVAL; } else if (dev->hard_header_len) { - if (ll_header_truncated(dev, tp_len)) - return -EINVAL; + int hdrlen = min_t(int, dev->hard_header_len, tp_len); skb_push(skb, dev->hard_header_len); - err = skb_store_bits(skb, 0, data, - dev->hard_header_len); + err = skb_store_bits(skb, 0, data, hdrlen); if (unlikely(err)) return err; + if (!dev_validate_header(dev, skb->data, hdrlen)) + return -EINVAL; if (!skb->protocol) tpacket_set_protocol(dev, skb); - data += dev->hard_header_len; - to_write -= dev->hard_header_len; + data += hdrlen; + to_write -= hdrlen; } offset = offset_in_page(data); @@ -2763,9 +2755,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (unlikely(offset < 0)) goto out_free; - } else { - if (ll_header_truncated(dev, len)) - goto out_free; } /* Returns -EFAULT on error */ @@ -2773,6 +2762,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (err) goto out_free; + if (sock->type == SOCK_RAW && + !dev_validate_header(dev, skb->data, len)) { + err = -EINVAL; + goto out_free; + } + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (!gso_type && (len > dev->mtu + reserve + extra_len) && From e8e43232627082328fa4016fab1960360360f167 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 9 Mar 2016 20:02:33 -0800 Subject: [PATCH 355/797] bpf: avoid copying junk bytes in bpf_get_current_comm() [ Upstream commit cdc4e47da8f4c32eeb6b2061a8a834f4362a12b7 ] Lots of places in the kernel use memcpy(buf, comm, TASK_COMM_LEN); but the result is typically passed to print("%s", buf) and extra bytes after zero don't cause any harm. In bpf the result of bpf_get_current_comm() is used as the part of map key and was causing spurious hash map mismatches. Use strlcpy() to guarantee zero-terminated string. bpf verifier checks that output buffer is zero-initialized, so even for short task names the output buffer don't have junk bytes. Note it's not a security concern, since kprobe+bpf is root only. Fixes: ffeedafbf023 ("bpf: introduce current->pid, tgid, uid, gid, comm accessors") Reported-by: Tobias Waldekranz Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 4504ca66118d..50da680c479f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -166,7 +166,7 @@ static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5) if (!task) return -EINVAL; - memcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm))); + strlcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm))); return 0; } From a95fc0f757728d08acae89c6194f8ea9e89ec3fe Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 8 Mar 2016 01:36:28 +0300 Subject: [PATCH 356/797] sh_eth: fix NULL pointer dereference in sh_eth_ring_format() [ Upstream commit c1b7fca65070bfadca94dd53a4e6b71cd4f69715 ] In a low memory situation, if netdev_alloc_skb() fails on a first RX ring loop iteration in sh_eth_ring_format(), 'rxdesc' is still NULL. Avoid kernel oops by adding the 'rxdesc' check after the loop. Reported-by: Wolfram Sang Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 6a8fc0f341ff..a1702f4ac6ff 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1212,7 +1212,8 @@ static void sh_eth_ring_format(struct net_device *ndev) mdp->dirty_rx = (u32) (i - mdp->num_rx_ring); /* Mark the last entry as wrapping the ring. */ - rxdesc->status |= cpu_to_edmac(mdp, RD_RDLE); + if (rxdesc) + rxdesc->status |= cpu_to_edmac(mdp, RD_RDLE); memset(mdp->tx_ring, 0, tx_ringsize); From 8352a292fe92ae2397b60701495b576e9afbc012 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 8 Mar 2016 01:37:09 +0300 Subject: [PATCH 357/797] sh_eth: advance 'rxdesc' later in sh_eth_ring_format() [ Upstream commit d0ba913488dc8c55d1880f5ed34f096dc45fb05d ] Iff dma_map_single() fails, 'rxdesc' should point to the last filled RX descriptor, so that it can be marked as the last one, however the driver would have already advanced it by that time. In order to fix that, only fill an RX descriptor once all the data for it is ready. Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index a1702f4ac6ff..36fc9427418f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1185,11 +1185,8 @@ static void sh_eth_ring_format(struct net_device *ndev) break; sh_eth_set_receive_align(skb); - /* RX descriptor */ - rxdesc = &mdp->rx_ring[i]; /* The size of the buffer is a multiple of 32 bytes. */ buf_len = ALIGN(mdp->rx_buf_sz, 32); - rxdesc->len = cpu_to_edmac(mdp, buf_len << 16); dma_addr = dma_map_single(&ndev->dev, skb->data, buf_len, DMA_FROM_DEVICE); if (dma_mapping_error(&ndev->dev, dma_addr)) { @@ -1197,6 +1194,10 @@ static void sh_eth_ring_format(struct net_device *ndev) break; } mdp->rx_skbuff[i] = skb; + + /* RX descriptor */ + rxdesc = &mdp->rx_ring[i]; + rxdesc->len = cpu_to_edmac(mdp, buf_len << 16); rxdesc->addr = cpu_to_edmac(mdp, dma_addr); rxdesc->status = cpu_to_edmac(mdp, RD_RACT | RD_RFP); From 12dd6d869b22ae114f81962e346cd5428b358b72 Mon Sep 17 00:00:00 2001 From: Rajesh Borundia Date: Tue, 8 Mar 2016 02:39:57 -0500 Subject: [PATCH 358/797] qlcnic: Remove unnecessary usage of atomic_t [ Upstream commit 5bf93251cee1fb66141d1d2eaff86e04a9397bdf ] o atomic_t usage is incorrect as we are not implementing any atomicity. Signed-off-by: Rajesh Borundia Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 46bbea8e023c..d18667b1b5b7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -1099,7 +1099,7 @@ struct qlcnic_mailbox { unsigned long status; spinlock_t queue_lock; /* Mailbox queue lock */ spinlock_t aen_lock; /* Mailbox response/AEN lock */ - atomic_t rsp_status; + u32 rsp_status; u32 num_cmds; }; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 37a731be7d39..e3d1bb722903 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -491,7 +491,7 @@ irqreturn_t qlcnic_83xx_clear_legacy_intr(struct qlcnic_adapter *adapter) static inline void qlcnic_83xx_notify_mbx_response(struct qlcnic_mailbox *mbx) { - atomic_set(&mbx->rsp_status, QLC_83XX_MBX_RESPONSE_ARRIVED); + mbx->rsp_status = QLC_83XX_MBX_RESPONSE_ARRIVED; complete(&mbx->completion); } @@ -510,7 +510,7 @@ static void qlcnic_83xx_poll_process_aen(struct qlcnic_adapter *adapter) if (event & QLCNIC_MBX_ASYNC_EVENT) { __qlcnic_83xx_process_aen(adapter); } else { - if (atomic_read(&mbx->rsp_status) != rsp_status) + if (mbx->rsp_status != rsp_status) qlcnic_83xx_notify_mbx_response(mbx); } out: @@ -1023,7 +1023,7 @@ static void qlcnic_83xx_process_aen(struct qlcnic_adapter *adapter) if (event & QLCNIC_MBX_ASYNC_EVENT) { __qlcnic_83xx_process_aen(adapter); } else { - if (atomic_read(&mbx->rsp_status) != rsp_status) + if (mbx->rsp_status != rsp_status) qlcnic_83xx_notify_mbx_response(mbx); } } @@ -4050,7 +4050,6 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work) struct qlcnic_adapter *adapter = mbx->adapter; const struct qlcnic_mbx_ops *mbx_ops = mbx->ops; struct device *dev = &adapter->pdev->dev; - atomic_t *rsp_status = &mbx->rsp_status; struct list_head *head = &mbx->cmd_q; struct qlcnic_hardware_context *ahw; struct qlcnic_cmd_args *cmd = NULL; @@ -4063,7 +4062,7 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work) return; } - atomic_set(rsp_status, QLC_83XX_MBX_RESPONSE_WAIT); + mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT; spin_lock(&mbx->queue_lock); From b39af5aa111dd05472c8b1b0d3f114f4e34f61fe Mon Sep 17 00:00:00 2001 From: Rajesh Borundia Date: Tue, 8 Mar 2016 02:39:58 -0500 Subject: [PATCH 359/797] qlcnic: Fix mailbox completion handling during spurious interrupt [ Upstream commit 819bfe764dceec2f6b4551768453f374b4c60443 ] o While the driver is in the middle of a MB completion processing and it receives a spurious MB interrupt, it is mistaken as a good MB completion interrupt leading to premature completion of the next MB request. Fix the driver to guard against this by checking the current state of MB processing and ignore the spurious interrupt. Also added a stats counter to record this condition. Signed-off-by: Rajesh Borundia Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 1 + .../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 15 +++++++++++---- .../net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 3 ++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index d18667b1b5b7..55007f1e6bbc 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -566,6 +566,7 @@ struct qlcnic_adapter_stats { u64 tx_dma_map_error; u64 spurious_intr; u64 mac_filter_limit_overrun; + u64 mbx_spurious_intr; }; /* diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index e3d1bb722903..f9640d5ce6ba 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2338,9 +2338,9 @@ static void qlcnic_83xx_handle_link_aen(struct qlcnic_adapter *adapter, static irqreturn_t qlcnic_83xx_handle_aen(int irq, void *data) { + u32 mask, resp, event, rsp_status = QLC_83XX_MBX_RESPONSE_ARRIVED; struct qlcnic_adapter *adapter = data; struct qlcnic_mailbox *mbx; - u32 mask, resp, event; unsigned long flags; mbx = adapter->ahw->mailbox; @@ -2350,10 +2350,14 @@ static irqreturn_t qlcnic_83xx_handle_aen(int irq, void *data) goto out; event = readl(QLCNIC_MBX_FW(adapter->ahw, 0)); - if (event & QLCNIC_MBX_ASYNC_EVENT) + if (event & QLCNIC_MBX_ASYNC_EVENT) { __qlcnic_83xx_process_aen(adapter); - else - qlcnic_83xx_notify_mbx_response(mbx); + } else { + if (mbx->rsp_status != rsp_status) + qlcnic_83xx_notify_mbx_response(mbx); + else + adapter->stats.mbx_spurious_intr++; + } out: mask = QLCRDX(adapter->ahw, QLCNIC_DEF_INT_MASK); @@ -4053,6 +4057,7 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work) struct list_head *head = &mbx->cmd_q; struct qlcnic_hardware_context *ahw; struct qlcnic_cmd_args *cmd = NULL; + unsigned long flags; ahw = adapter->ahw; @@ -4062,7 +4067,9 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work) return; } + spin_lock_irqsave(&mbx->aen_lock, flags); mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT; + spin_unlock_irqrestore(&mbx->aen_lock, flags); spin_lock(&mbx->queue_lock); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 494e8105adee..0a2318cad34d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -59,7 +59,8 @@ static const struct qlcnic_stats qlcnic_gstrings_stats[] = { QLC_OFF(stats.mac_filter_limit_overrun)}, {"spurious intr", QLC_SIZEOF(stats.spurious_intr), QLC_OFF(stats.spurious_intr)}, - + {"mbx spurious intr", QLC_SIZEOF(stats.mbx_spurious_intr), + QLC_OFF(stats.mbx_spurious_intr)}, }; static const char qlcnic_device_gstrings_stats[][ETH_GSTRING_LEN] = { From a96f3553d5d1d6650f608f14162ed403d64e2b66 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 8 Mar 2016 15:18:54 -0500 Subject: [PATCH 360/797] macvtap: always pass ethernet header in linear [ Upstream commit 8e2ad4113ce4671686740f808ff2795395c39eef ] The stack expects link layer headers in the skb linear section. Macvtap can create skbs with llheader in frags in edge cases: when (IFF_VNET_HDR is off or vnet_hdr.hdr_len < ETH_HLEN) and prepad + len > PAGE_SIZE and vnet_hdr.flags has no or bad csum. Add checks to ensure linear is always at least ETH_HLEN. At this point, len is already ensured to be >= ETH_HLEN. For backwards compatiblity, rounds up short vnet_hdr.hdr_len. This differs from tap and packet, which return an error. Fixes b9fb9ee07e67 ("macvtap: add GSO/csum offload support") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvtap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0fc521941c71..159a68782bec 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -760,6 +760,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, macvtap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; if (copylen > good_linear) copylen = good_linear; + else if (copylen < ETH_HLEN) + copylen = ETH_HLEN; linear = copylen; i = *from; iov_iter_advance(&i, copylen); @@ -769,10 +771,11 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (!zerocopy) { copylen = len; - if (macvtap16_to_cpu(q, vnet_hdr.hdr_len) > good_linear) + linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len); + if (linear > good_linear) linear = good_linear; - else - linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len); + else if (linear < ETH_HLEN) + linear = ETH_HLEN; } skb = macvtap_alloc_skb(&q->sk, MACVTAP_RESERVE, copylen, From 7d870cff8ece6088dd9e26f54a3fd1b4b899ddf9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 8 Mar 2016 12:59:33 -0800 Subject: [PATCH 361/797] mlxsw: spectrum: Check requested ageing time is valid [ Upstream commit 869f63a4d28144c03c8f4a4c0d1e8f31f8c11a10 ] Commit c62987bbd8a1 ("bridge: push bridge setting ageing_time down to switchdev") added a check for minimum and maximum ageing time, but this breaks existing behaviour where one can set ageing time to 0 for a non-learning bridge. Push this check down to the driver and allow the check in the bridge layer to be removed. Currently ageing time 0 is refused by the driver, but we can later add support for this functionality. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 ++ drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4365c8bccc6d..605f6410f867 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -61,6 +61,8 @@ struct mlxsw_sp { #define MLXSW_SP_DEFAULT_LEARNING_INTERVAL 100 unsigned int interval; /* ms */ } fdb_notify; +#define MLXSW_SP_MIN_AGEING_TIME 10 +#define MLXSW_SP_MAX_AGEING_TIME 1000000 #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; struct { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7dbeafa65934..d4c4c2b5156c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -232,8 +232,13 @@ static int mlxsw_sp_port_attr_br_ageing_set(struct mlxsw_sp_port *mlxsw_sp_port, unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t); u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000; - if (switchdev_trans_ph_prepare(trans)) - return 0; + if (switchdev_trans_ph_prepare(trans)) { + if (ageing_time < MLXSW_SP_MIN_AGEING_TIME || + ageing_time > MLXSW_SP_MAX_AGEING_TIME) + return -ERANGE; + else + return 0; + } return mlxsw_sp_ageing_set(mlxsw_sp, ageing_time); } From c3d8f507e7fedeeab81bd9dafa2d63d82be159a3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 8 Mar 2016 12:59:34 -0800 Subject: [PATCH 362/797] rocker: set FDB cleanup timer according to lowest ageing time [ Upstream commit 88de1cd457e5cb664d6d437e2ea4750d089165f5 ] In rocker, ageing time is a per-port attribute, so the next time the FDB cleanup timer fires should be set according to the lowest ageing time. This will later allow us to delete the BR_MIN_AGEING_TIME macro, which was added to guarantee minimum ageing time in the bridge layer, thereby breaking existing behavior. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/rocker/rocker.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 52ec3d6e056a..2b34622a4bfe 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -239,6 +239,7 @@ struct rocker { struct { u64 id; } hw; + unsigned long ageing_time; spinlock_t cmd_ring_lock; /* for cmd ring accesses */ struct rocker_dma_ring_info cmd_ring; struct rocker_dma_ring_info event_ring; @@ -3704,7 +3705,7 @@ static void rocker_fdb_cleanup(unsigned long data) struct rocker_port *rocker_port; struct rocker_fdb_tbl_entry *entry; struct hlist_node *tmp; - unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME; + unsigned long next_timer = jiffies + rocker->ageing_time; unsigned long expires; unsigned long lock_flags; int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE | @@ -4367,8 +4368,12 @@ static int rocker_port_bridge_ageing_time(struct rocker_port *rocker_port, struct switchdev_trans *trans, u32 ageing_time) { + struct rocker *rocker = rocker_port->rocker; + if (!switchdev_trans_ph_prepare(trans)) { rocker_port->ageing_time = clock_t_to_jiffies(ageing_time); + if (rocker_port->ageing_time < rocker->ageing_time) + rocker->ageing_time = rocker_port->ageing_time; mod_timer(&rocker_port->rocker->fdb_cleanup_timer, jiffies); } @@ -5206,10 +5211,13 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_init_tbls; } + rocker->ageing_time = BR_DEFAULT_AGEING_TIME; setup_timer(&rocker->fdb_cleanup_timer, rocker_fdb_cleanup, (unsigned long) rocker); mod_timer(&rocker->fdb_cleanup_timer, jiffies); + rocker->ageing_time = BR_DEFAULT_AGEING_TIME; + err = rocker_probe_ports(rocker); if (err) { dev_err(&pdev->dev, "failed to probe ports\n"); From acbea202fbba11c52df2fd4040c19bb796fd37fa Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 8 Mar 2016 12:59:35 -0800 Subject: [PATCH 363/797] bridge: allow zero ageing time [ Upstream commit 4c656c13b254d598e83e586b7b4d36a2043dad85 ] This fixes a regression in the bridge ageing time caused by: commit c62987bbd8a1 ("bridge: push bridge setting ageing_time down to switchdev") There are users of Linux bridge which use the feature that if ageing time is set to 0 it causes entries to never expire. See: https://www.linuxfoundation.org/collaborate/workgroups/networking/bridge For a pure software bridge, it is unnecessary for the code to have arbitrary restrictions on what values are allowable. Signed-off-by: Stephen Hemminger Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/if_bridge.h | 4 ---- net/bridge/br_stp.c | 11 ++++++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index a338a688ee4a..dcb89e3515db 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -46,10 +46,6 @@ struct br_ip_list { #define BR_LEARNING_SYNC BIT(9) #define BR_PROXYARP_WIFI BIT(10) -/* values as per ieee8021QBridgeFdbAgingTime */ -#define BR_MIN_AGEING_TIME (10 * HZ) -#define BR_MAX_AGEING_TIME (1000000 * HZ) - #define BR_DEFAULT_AGEING_TIME (300 * HZ) extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 5f3f64553179..0e658f47a5da 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -567,6 +567,14 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) } +/* Set time interval that dynamic forwarding entries live + * For pure software bridge, allow values outside the 802.1 + * standard specification for special cases: + * 0 - entry never ages (all permanant) + * 1 - entry disappears (no persistance) + * + * Offloaded switch entries maybe more restrictive + */ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) { struct switchdev_attr attr = { @@ -577,9 +585,6 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) unsigned long t = clock_t_to_jiffies(ageing_time); int err; - if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME) - return -ERANGE; - err = switchdev_port_attr_set(br->dev, &attr); if (err) return err; From 54789759917f127cfadcca730f44ea67d557a9b0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 13 Mar 2016 23:28:00 -0400 Subject: [PATCH 364/797] ipv4: Don't do expensive useless work during inetdev destroy. [ Upstream commit fbd40ea0180a2d328c5adc61414dc8bab9335ce2 ] When an inetdev is destroyed, every address assigned to the interface is removed. And in this scenerio we do two pointless things which can be very expensive if the number of assigned interfaces is large: 1) Address promotion. We are deleting all addresses, so there is no point in doing this. 2) A full nf conntrack table purge for every address. We only need to do this once, as is already caught by the existing masq_dev_notifier so masq_inet_event() can skip this. Reported-by: Solar Designer Signed-off-by: David S. Miller Tested-by: Cyrill Gorcunov Signed-off-by: Greg Kroah-Hartman --- net/ipv4/devinet.c | 4 ++++ net/ipv4/fib_frontend.c | 4 ++++ net/ipv4/netfilter/nf_nat_masquerade_ipv4.c | 12 ++++++++++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f6303b17546b..0212591b0077 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -334,6 +334,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, ASSERT_RTNL(); + if (in_dev->dead) + goto no_promotions; + /* 1. Deleting primary ifaddr forces deletion all secondaries * unless alias promotion is set **/ @@ -380,6 +383,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, fib_del_ifaddr(ifa, ifa1); } +no_promotions: /* 2. Unlink it */ *ifap = ifa1->ifa_next; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 473447593060..21add552e56a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -922,6 +922,9 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) subnet = 1; } + if (in_dev->dead) + goto no_promotions; + /* Deletion is more complicated than add. * We should take care of not to delete too much :-) * @@ -997,6 +1000,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) } } +no_promotions: if (!(ok & BRD_OK)) fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); if (subnet && ifa->ifa_prefixlen < 31) { diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index c6eb42100e9a..ea91058b5f6f 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -108,10 +108,18 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; + struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; struct netdev_notifier_info info; - netdev_notifier_info_init(&info, dev); + /* The masq_dev_notifier will catch the case of the device going + * down. So if the inetdev is dead and being destroyed we have + * no work to do. Otherwise this is an individual address removal + * and we have to perform the flush. + */ + if (idev->dead) + return NOTIFY_DONE; + + netdev_notifier_info_init(&info, idev->dev); return masq_device_event(this, event, &info); } From 405f10a39443ae9ccacf51f18511dfc827e09108 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Mar 2016 09:56:35 -0300 Subject: [PATCH 365/797] net: Fix use after free in the recvmmsg exit path [ Upstream commit 34b88a68f26a75e4fded796f1a49c40f82234b7d ] The syzkaller fuzzer hit the following use-after-free: Call Trace: [] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:295 [] __sys_recvmmsg+0x6fa/0x7f0 net/socket.c:2261 [< inline >] SYSC_recvmmsg net/socket.c:2281 [] SyS_recvmmsg+0x16f/0x180 net/socket.c:2270 [] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 And, as Dmitry rightly assessed, that is because we can drop the reference and then touch it when the underlying recvmsg calls return some packets and then hit an error, which will make recvmmsg to set sock->sk->sk_err, oops, fix it. Reported-and-Tested-by: Dmitry Vyukov Cc: Alexander Potapenko Cc: Eric Dumazet Cc: Kostya Serebryany Cc: Sasha Levin Fixes: a2e2725541fa ("net: Introduce recvmmsg socket syscall") http://lkml.kernel.org/r/20160122211644.GC2470@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/net/socket.c b/net/socket.c index d730ef9dfbf0..263b334ec5e4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2238,31 +2238,31 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, break; } + if (err == 0) + goto out_put; + + if (datagrams == 0) { + datagrams = err; + goto out_put; + } + + /* + * We may return less entries than requested (vlen) if the + * sock is non block and there aren't enough datagrams... + */ + if (err != -EAGAIN) { + /* + * ... or if recvmsg returns an error after we + * received some datagrams, where we record the + * error to return on the next call or if the + * app asks about it using getsockopt(SO_ERROR). + */ + sock->sk->sk_err = -err; + } out_put: fput_light(sock->file, fput_needed); - if (err == 0) - return datagrams; - - if (datagrams != 0) { - /* - * We may return less entries than requested (vlen) if the - * sock is non block and there aren't enough datagrams... - */ - if (err != -EAGAIN) { - /* - * ... or if recvmsg returns an error after we - * received some datagrams, where we record the - * error to return on the next call or if the - * app asks about it using getsockopt(SO_ERROR). - */ - sock->sk->sk_err = -err; - } - - return datagrams; - } - - return err; + return datagrams; } SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, From 13684fe9dc61c38b4241474ea4f9e28a59c9518c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 14 Mar 2016 15:18:34 +0100 Subject: [PATCH 366/797] mlx4: add missing braces in verify_qp_parameters [ Upstream commit baefd7015cdb304ce6c94f9679d0486c71954766 ] The implementation of QP paravirtualization back in linux-3.7 included some code that looks very dubious, and gcc-6 has grown smart enough to warn about it: drivers/net/ethernet/mellanox/mlx4/resource_tracker.c: In function 'verify_qp_parameters': drivers/net/ethernet/mellanox/mlx4/resource_tracker.c:3154:5: error: statement is indented as if it were guarded by... [-Werror=misleading-indentation] if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { ^~ drivers/net/ethernet/mellanox/mlx4/resource_tracker.c:3144:4: note: ...this 'if' clause, but it is not if (slave != mlx4_master_func_num(dev)) >From looking at the context, I'm reasonably sure that the indentation is correct but that it should have contained curly braces from the start, as the update_gid() function in the same patch correctly does. Signed-off-by: Arnd Bergmann Fixes: 54679e148287 ("mlx4: Implement QP paravirtualization and maintain phys_pkey_cache for smp_snoop") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index cad6c44df91c..d314d96dcb1c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3132,7 +3132,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, case QP_TRANS_RTS2RTS: case QP_TRANS_SQD2SQD: case QP_TRANS_SQD2RTS: - if (slave != mlx4_master_func_num(dev)) + if (slave != mlx4_master_func_num(dev)) { if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) @@ -3151,6 +3151,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, if (qp_ctx->alt_path.mgid_index >= num_gids) return -EINVAL; } + } break; default: break; From 6e6ede49a9bdc8e6762216fe1760c4183791676c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 14 Mar 2016 15:18:35 +0100 Subject: [PATCH 367/797] farsync: fix off-by-one bug in fst_add_one [ Upstream commit e725a66c0202b5f36c2f9d59d26a65c53bbf21f7 ] gcc-6 finds an out of bounds access in the fst_add_one function when calculating the end of the mmio area: drivers/net/wan/farsync.c: In function 'fst_add_one': drivers/net/wan/farsync.c:418:53: error: index 2 denotes an offset greater than size of 'u8[2][8192] {aka unsigned char[2][8192]}' [-Werror=array-bounds] #define BUF_OFFSET(X) (BFM_BASE + offsetof(struct buf_window, X)) ^ include/linux/compiler-gcc.h:158:21: note: in definition of macro '__compiler_offsetof' __builtin_offsetof(a, b) ^ drivers/net/wan/farsync.c:418:37: note: in expansion of macro 'offsetof' #define BUF_OFFSET(X) (BFM_BASE + offsetof(struct buf_window, X)) ^~~~~~~~ drivers/net/wan/farsync.c:2519:36: note: in expansion of macro 'BUF_OFFSET' + BUF_OFFSET ( txBuffer[i][NUM_TX_BUFFER][0]); ^~~~~~~~~~ The warning is correct, but not critical because this appears to be a write-only variable that is set by each WAN driver but never accessed afterwards. I'm taking the minimal fix here, using the correct pointer by pointing 'mem_end' to the last byte inside of the register area as all other WAN drivers do, rather than the first byte outside of it. An alternative would be to just remove the mem_end member entirely. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/wan/farsync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 44541dbc5c28..69b994f3b8c5 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2516,7 +2516,7 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->mem_start = card->phys_mem + BUF_OFFSET ( txBuffer[i][0][0]); dev->mem_end = card->phys_mem - + BUF_OFFSET ( txBuffer[i][NUM_TX_BUFFER][0]); + + BUF_OFFSET ( txBuffer[i][NUM_TX_BUFFER - 1][LEN_RX_BUFFER - 1]); dev->base_addr = card->pci_conf; dev->irq = card->irq; From a317579bb62ec6c1cb6bd7e5d0d8a25a746832f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 14 Mar 2016 15:18:36 +0100 Subject: [PATCH 368/797] ath9k: fix buffer overrun for ar9287 [ Upstream commit 83d6f1f15f8cce844b0a131cbc63e444620e48b5 ] Code that was added back in 2.6.38 has an obvious overflow when accessing a static array, and at the time it was added only a code comment was put in front of it as a reminder to have it reviewed properly. This has not happened, but gcc-6 now points to the specific overflow: drivers/net/wireless/ath/ath9k/eeprom.c: In function 'ath9k_hw_get_gain_boundaries_pdadcs': drivers/net/wireless/ath/ath9k/eeprom.c:483:44: error: array subscript is above array bounds [-Werror=array-bounds] maxPwrT4[i] = data_9287[idxL].pwrPdg[i][4]; ~~~~~~~~~~~~~~~~~~~~~~~~~^~~ It turns out that the correct array length exists in the local 'intercepts' variable of this function, so we can just use that instead of hardcoding '4', so this patch changes all three instances to use that variable. The other two instances were already correct, but it's more consistent this way. Signed-off-by: Arnd Bergmann Fixes: 940cd2c12ebf ("ath9k_hw: merge the ar9287 version of ath9k_hw_get_gain_boundaries_pdadcs") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/eeprom.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/eeprom.c b/drivers/net/wireless/ath/ath9k/eeprom.c index cc81482c934d..113a43fca9cf 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom.c +++ b/drivers/net/wireless/ath/ath9k/eeprom.c @@ -403,10 +403,9 @@ void ath9k_hw_get_gain_boundaries_pdadcs(struct ath_hw *ah, if (match) { if (AR_SREV_9287(ah)) { - /* FIXME: array overrun? */ for (i = 0; i < numXpdGains; i++) { minPwrT4[i] = data_9287[idxL].pwrPdg[i][0]; - maxPwrT4[i] = data_9287[idxL].pwrPdg[i][4]; + maxPwrT4[i] = data_9287[idxL].pwrPdg[i][intercepts - 1]; ath9k_hw_fill_vpd_table(minPwrT4[i], maxPwrT4[i], data_9287[idxL].pwrPdg[i], data_9287[idxL].vpdPdg[i], @@ -416,7 +415,7 @@ void ath9k_hw_get_gain_boundaries_pdadcs(struct ath_hw *ah, } else if (eeprom_4k) { for (i = 0; i < numXpdGains; i++) { minPwrT4[i] = data_4k[idxL].pwrPdg[i][0]; - maxPwrT4[i] = data_4k[idxL].pwrPdg[i][4]; + maxPwrT4[i] = data_4k[idxL].pwrPdg[i][intercepts - 1]; ath9k_hw_fill_vpd_table(minPwrT4[i], maxPwrT4[i], data_4k[idxL].pwrPdg[i], data_4k[idxL].vpdPdg[i], @@ -426,7 +425,7 @@ void ath9k_hw_get_gain_boundaries_pdadcs(struct ath_hw *ah, } else { for (i = 0; i < numXpdGains; i++) { minPwrT4[i] = data_def[idxL].pwrPdg[i][0]; - maxPwrT4[i] = data_def[idxL].pwrPdg[i][4]; + maxPwrT4[i] = data_def[idxL].pwrPdg[i][intercepts - 1]; ath9k_hw_fill_vpd_table(minPwrT4[i], maxPwrT4[i], data_def[idxL].pwrPdg[i], data_def[idxL].vpdPdg[i], From 029464a380858e54ab750a5a536a0bdcd7180b1f Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 14 Mar 2016 21:17:16 +0100 Subject: [PATCH 369/797] ppp: ensure file->private_data can't be overridden [ Upstream commit e8e56ffd9d2973398b60ece1f1bebb8d67b4d032 ] Locking ppp_mutex must be done before dereferencing file->private_data, otherwise it could be modified before ppp_unattached_ioctl() takes the lock. This could lead ppp_unattached_ioctl() to override ->private_data, thus leaking reference to the ppp_file previously pointed to. v2: lock all ppp_ioctl() instead of just checking private_data in ppp_unattached_ioctl(), to avoid ambiguous behaviour. Fixes: f3ff8a4d80e8 ("ppp: push BKL down into the driver") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_generic.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 40b303ed63b7..35e8b5a6fd93 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -567,7 +567,7 @@ static int get_filter(void __user *arg, struct sock_filter **p) static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct ppp_file *pf = file->private_data; + struct ppp_file *pf; struct ppp *ppp; int err = -EFAULT, val, val2, i; struct ppp_idle idle; @@ -577,9 +577,14 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; int __user *p = argp; - if (!pf) - return ppp_unattached_ioctl(current->nsproxy->net_ns, - pf, file, cmd, arg); + mutex_lock(&ppp_mutex); + + pf = file->private_data; + if (!pf) { + err = ppp_unattached_ioctl(current->nsproxy->net_ns, + pf, file, cmd, arg); + goto out; + } if (cmd == PPPIOCDETACH) { /* @@ -594,7 +599,6 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) * this fd and reopening /dev/ppp. */ err = -EINVAL; - mutex_lock(&ppp_mutex); if (pf->kind == INTERFACE) { ppp = PF_TO_PPP(pf); rtnl_lock(); @@ -608,15 +612,13 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } else pr_warn("PPPIOCDETACH file->f_count=%ld\n", atomic_long_read(&file->f_count)); - mutex_unlock(&ppp_mutex); - return err; + goto out; } if (pf->kind == CHANNEL) { struct channel *pch; struct ppp_channel *chan; - mutex_lock(&ppp_mutex); pch = PF_TO_CHANNEL(pf); switch (cmd) { @@ -638,17 +640,16 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = chan->ops->ioctl(chan, cmd, arg); up_read(&pch->chan_sem); } - mutex_unlock(&ppp_mutex); - return err; + goto out; } if (pf->kind != INTERFACE) { /* can't happen */ pr_err("PPP: not interface or channel??\n"); - return -EINVAL; + err = -EINVAL; + goto out; } - mutex_lock(&ppp_mutex); ppp = PF_TO_PPP(pf); switch (cmd) { case PPPIOCSMRU: @@ -823,7 +824,10 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) default: err = -ENOTTY; } + +out: mutex_unlock(&ppp_mutex); + return err; } @@ -836,7 +840,6 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, struct ppp_net *pn; int __user *p = (int __user *)arg; - mutex_lock(&ppp_mutex); switch (cmd) { case PPPIOCNEWUNIT: /* Create a new ppp unit */ @@ -886,7 +889,7 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, default: err = -ENOTTY; } - mutex_unlock(&ppp_mutex); + return err; } From bd33d14acf43bdb040f203555b13765cd2b23d9e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Mar 2016 22:52:15 -0700 Subject: [PATCH 370/797] tcp/dccp: remove obsolete WARN_ON() in icmp handlers [ Upstream commit e316ea62e3203d524ff0239a40c56d3a39ad1b5c ] Now SYN_RECV request sockets are installed in ehash table, an ICMP handler can find a request socket while another cpu handles an incoming packet transforming this SYN_RECV request socket into an ESTABLISHED socket. We need to remove the now obsolete WARN_ON(req->sk), since req->sk is set when a new child is created and added into listener accept queue. If this race happens, the ICMP will do nothing special. Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Reported-by: Ben Lazarus Reported-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv4.c | 2 -- net/ipv4/tcp_ipv4.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 902d606324a0..8be8f27bfacc 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -204,8 +204,6 @@ void dccp_req_err(struct sock *sk, u64 seq) * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - WARN_ON(req->sk); - if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); } else { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8c7e63163e92..048418b049d8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -320,8 +320,6 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) /* ICMPs are not backlogged, hence we cannot get * an established socket here. */ - WARN_ON(req->sk); - if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); } else if (abort) { From a5ce25f61bce0ffb1ff59071c06b948277c90a28 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Tue, 15 Mar 2016 07:13:45 -0400 Subject: [PATCH 371/797] qlge: Fix receive packets drop. [ Upstream commit 2c9a266afefe137bff06bbe0fc48b4d3b3cb348c ] When running small packets [length < 256 bytes] traffic, packets were being dropped due to invalid data in those packets which were delivered by the driver upto the stack. Using pci_dma_sync_single_for_cpu ensures copying latest and updated data into skb from the receive buffer. Signed-off-by: Sony Chacko Signed-off-by: Manish Chopra Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 997976426799..b28e73ea2c25 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -1648,7 +1648,18 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev, return; } skb_reserve(new_skb, NET_IP_ALIGN); + + pci_dma_sync_single_for_cpu(qdev->pdev, + dma_unmap_addr(sbq_desc, mapaddr), + dma_unmap_len(sbq_desc, maplen), + PCI_DMA_FROMDEVICE); + memcpy(skb_put(new_skb, length), skb->data, length); + + pci_dma_sync_single_for_device(qdev->pdev, + dma_unmap_addr(sbq_desc, mapaddr), + dma_unmap_len(sbq_desc, maplen), + PCI_DMA_FROMDEVICE); skb = new_skb; /* Frame error, so drop the packet. */ From 7a0e9a08642993bfde0dd03a5a3f825869cc4d06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Mar 2016 11:57:06 -0700 Subject: [PATCH 372/797] net: bcmgenet: fix dma api length mismatch [ Upstream commit eee577232203842b4dcadb7ab477a298479633ed ] When un-mapping skb->data in __bcmgenet_tx_reclaim(), we must use the length that was used in original dma_map_single(), instead of skb->len that might be bigger (includes the frags) We simply can store skb_len into tx_cb_ptr->dma_len and use it at unmap time. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Eric Dumazet Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 17f017ab4dac..0fb3f8de88e9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1197,7 +1197,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, dev->stats.tx_bytes += tx_cb_ptr->skb->len; dma_unmap_single(&dev->dev, dma_unmap_addr(tx_cb_ptr, dma_addr), - tx_cb_ptr->skb->len, + dma_unmap_len(tx_cb_ptr, dma_len), DMA_TO_DEVICE); bcmgenet_free_cb(tx_cb_ptr); } else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) { @@ -1308,7 +1308,7 @@ static int bcmgenet_xmit_single(struct net_device *dev, } dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping); - dma_unmap_len_set(tx_cb_ptr, dma_len, skb->len); + dma_unmap_len_set(tx_cb_ptr, dma_len, skb_len); length_status = (skb_len << DMA_BUFLENGTH_SHIFT) | dma_desc_flags | (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT) | DMA_TX_APPEND_CRC; From 8178211eb7948b40b1f730e2d0b9b0a7a2ed62d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Mar 2016 17:23:36 -0700 Subject: [PATCH 373/797] bonding: fix bond_get_stats() [ Upstream commit fe30937b65354c7fec244caebbdaae68e28ca797 ] bond_get_stats() can be called from rtnetlink (with RTNL held) or from /proc/net/dev seq handler (with RCU held) The logic added in commit 5f0c5f73e5ef ("bonding: make global bonding stats more reliable") kind of assumed only one cpu could run there. If multiple threads are reading /proc/net/dev, stats can be really messed up after a while. A second problem is that some fields are 32bit, so we need to properly handle the wrap around problem. Given that RTNL is not always held, we need to use bond_for_each_slave_rcu(). Fixes: 5f0c5f73e5ef ("bonding: make global bonding stats more reliable") Signed-off-by: Eric Dumazet Cc: Andy Gospodarek Cc: Jay Vosburgh Cc: Veaceslav Falico Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 63 ++++++++++++++++++--------------- include/net/bonding.h | 1 + 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 28bbca0af238..b3d70a7a5262 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3260,6 +3260,30 @@ static int bond_close(struct net_device *bond_dev) return 0; } +/* fold stats, assuming all rtnl_link_stats64 fields are u64, but + * that some drivers can provide 32bit values only. + */ +static void bond_fold_stats(struct rtnl_link_stats64 *_res, + const struct rtnl_link_stats64 *_new, + const struct rtnl_link_stats64 *_old) +{ + const u64 *new = (const u64 *)_new; + const u64 *old = (const u64 *)_old; + u64 *res = (u64 *)_res; + int i; + + for (i = 0; i < sizeof(*_res) / sizeof(u64); i++) { + u64 nv = new[i]; + u64 ov = old[i]; + + /* detects if this particular field is 32bit only */ + if (((nv | ov) >> 32) == 0) + res[i] += (u32)nv - (u32)ov; + else + res[i] += nv - ov; + } +} + static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats) { @@ -3268,43 +3292,23 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, struct list_head *iter; struct slave *slave; + spin_lock(&bond->stats_lock); memcpy(stats, &bond->bond_stats, sizeof(*stats)); - bond_for_each_slave(bond, slave, iter) { - const struct rtnl_link_stats64 *sstats = + rcu_read_lock(); + bond_for_each_slave_rcu(bond, slave, iter) { + const struct rtnl_link_stats64 *new = dev_get_stats(slave->dev, &temp); - struct rtnl_link_stats64 *pstats = &slave->slave_stats; - stats->rx_packets += sstats->rx_packets - pstats->rx_packets; - stats->rx_bytes += sstats->rx_bytes - pstats->rx_bytes; - stats->rx_errors += sstats->rx_errors - pstats->rx_errors; - stats->rx_dropped += sstats->rx_dropped - pstats->rx_dropped; - - stats->tx_packets += sstats->tx_packets - pstats->tx_packets;; - stats->tx_bytes += sstats->tx_bytes - pstats->tx_bytes; - stats->tx_errors += sstats->tx_errors - pstats->tx_errors; - stats->tx_dropped += sstats->tx_dropped - pstats->tx_dropped; - - stats->multicast += sstats->multicast - pstats->multicast; - stats->collisions += sstats->collisions - pstats->collisions; - - stats->rx_length_errors += sstats->rx_length_errors - pstats->rx_length_errors; - stats->rx_over_errors += sstats->rx_over_errors - pstats->rx_over_errors; - stats->rx_crc_errors += sstats->rx_crc_errors - pstats->rx_crc_errors; - stats->rx_frame_errors += sstats->rx_frame_errors - pstats->rx_frame_errors; - stats->rx_fifo_errors += sstats->rx_fifo_errors - pstats->rx_fifo_errors; - stats->rx_missed_errors += sstats->rx_missed_errors - pstats->rx_missed_errors; - - stats->tx_aborted_errors += sstats->tx_aborted_errors - pstats->tx_aborted_errors; - stats->tx_carrier_errors += sstats->tx_carrier_errors - pstats->tx_carrier_errors; - stats->tx_fifo_errors += sstats->tx_fifo_errors - pstats->tx_fifo_errors; - stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors - pstats->tx_heartbeat_errors; - stats->tx_window_errors += sstats->tx_window_errors - pstats->tx_window_errors; + bond_fold_stats(stats, new, &slave->slave_stats); /* save off the slave stats for the next run */ - memcpy(pstats, sstats, sizeof(*sstats)); + memcpy(&slave->slave_stats, new, sizeof(*new)); } + rcu_read_unlock(); + memcpy(&bond->bond_stats, stats, sizeof(*stats)); + spin_unlock(&bond->stats_lock); return stats; } @@ -4118,6 +4122,7 @@ void bond_setup(struct net_device *bond_dev) struct bonding *bond = netdev_priv(bond_dev); spin_lock_init(&bond->mode_lock); + spin_lock_init(&bond->stats_lock); bond->params = bonding_defaults; /* Initialize pointers */ diff --git a/include/net/bonding.h b/include/net/bonding.h index c1740a2794a3..93abe5f6188d 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -214,6 +214,7 @@ struct bonding { * ALB mode (6) - to sync the use and modifications of its hash table */ spinlock_t mode_lock; + spinlock_t stats_lock; u8 send_peer_notif; u8 igmp_retrans; #ifdef CONFIG_PROC_FS From 2ddb181390475f4902406baa008c220f39aeaa69 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 22 Mar 2016 09:19:38 +0100 Subject: [PATCH 374/797] ipv4: fix broadcast packets reception [ Upstream commit ad0ea1989cc4d5905941d0a9e62c63ad6d859cef ] Currently, ingress ipv4 broadcast datagrams are dropped since, in udp_v4_early_demux(), ip_check_mc_rcu() is invoked even on bcast packets. This patch addresses the issue, invoking ip_check_mc_rcu() only for mcast packets. Fixes: 6e5403093261 ("ipv4/udp: Verify multicast group is ours in upd_v4_early_demux()") Signed-off-by: Paolo Abeni Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7f8ab46adf61..21fbb54f11d0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1990,10 +1990,14 @@ void udp_v4_early_demux(struct sk_buff *skb) if (!in_dev) return; - ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, - iph->protocol); - if (!ours) - return; + /* we are supposed to accept bcast packets */ + if (skb->pkt_type == PACKET_MULTICAST) { + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return; + } + sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); } else if (skb->pkt_type == PACKET_HOST) { From 80de2e4115130a392dd528fe023a2508c15617a4 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Tue, 22 Mar 2016 14:56:57 -0400 Subject: [PATCH 375/797] ipv4: initialize flowi4_flags before calling fib_lookup() [ Upstream commit 4cfc86f3dae6ca38ed49cdd78f458a03d4d87992 ] Field fl4.flowi4_flags is not initialized in fib_compute_spec_dst() before calling fib_lookup(), which means fib_table_lookup() is using non-deterministic data at this line: if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { Fix by initializing the entire fl4 structure, which will prevent similar issues as fields are added in the future by ensuring that all fields are initialized to zero unless explicitly initialized to another value. Fixes: 58189ca7b2741 ("net: Fix vti use case with oif in dst lookups") Suggested-by: David Ahern Signed-off-by: Lance Richardson Acked-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 21add552e56a..8a9246deccfe 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -280,7 +280,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) struct in_device *in_dev; struct fib_result res; struct rtable *rt; - struct flowi4 fl4; struct net *net; int scope; @@ -296,14 +295,13 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { - fl4.flowi4_oif = 0; - fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.daddr = ip_hdr(skb)->saddr; - fl4.saddr = 0; - fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); - fl4.flowi4_scope = scope; - fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; - fl4.flowi4_tun_key.tun_id = 0; + struct flowi4 fl4 = { + .flowi4_iif = LOOPBACK_IFINDEX, + .daddr = ip_hdr(skb)->saddr, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_scope = scope, + .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, + }; if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); } else { From 046ea8180ecaf5d8b5823e17714a09526ad7d321 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 23 Mar 2016 16:38:55 +0100 Subject: [PATCH 376/797] ppp: take reference on channels netns [ Upstream commit 1f461dcdd296eecedaffffc6bae2bfa90bd7eb89 ] Let channels hold a reference on their network namespace. Some channel types, like ppp_async and ppp_synctty, can have their userspace controller running in a different namespace. Therefore they can't rely on them to preclude their netns from being removed from under them. ================================================================== BUG: KASAN: use-after-free in ppp_unregister_channel+0x372/0x3a0 at addr ffff880064e217e0 Read of size 8 by task syz-executor/11581 ============================================================================= BUG net_namespace (Not tainted): kasan: bad access detected ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in copy_net_ns+0x6b/0x1a0 age=92569 cpu=3 pid=6906 [< none >] ___slab_alloc+0x4c7/0x500 kernel/mm/slub.c:2440 [< none >] __slab_alloc+0x4c/0x90 kernel/mm/slub.c:2469 [< inline >] slab_alloc_node kernel/mm/slub.c:2532 [< inline >] slab_alloc kernel/mm/slub.c:2574 [< none >] kmem_cache_alloc+0x23a/0x2b0 kernel/mm/slub.c:2579 [< inline >] kmem_cache_zalloc kernel/include/linux/slab.h:597 [< inline >] net_alloc kernel/net/core/net_namespace.c:325 [< none >] copy_net_ns+0x6b/0x1a0 kernel/net/core/net_namespace.c:360 [< none >] create_new_namespaces+0x2f6/0x610 kernel/kernel/nsproxy.c:95 [< none >] copy_namespaces+0x297/0x320 kernel/kernel/nsproxy.c:150 [< none >] copy_process.part.35+0x1bf4/0x5760 kernel/kernel/fork.c:1451 [< inline >] copy_process kernel/kernel/fork.c:1274 [< none >] _do_fork+0x1bc/0xcb0 kernel/kernel/fork.c:1723 [< inline >] SYSC_clone kernel/kernel/fork.c:1832 [< none >] SyS_clone+0x37/0x50 kernel/kernel/fork.c:1826 [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a kernel/arch/x86/entry/entry_64.S:185 INFO: Freed in net_drop_ns+0x67/0x80 age=575 cpu=2 pid=2631 [< none >] __slab_free+0x1fc/0x320 kernel/mm/slub.c:2650 [< inline >] slab_free kernel/mm/slub.c:2805 [< none >] kmem_cache_free+0x2a0/0x330 kernel/mm/slub.c:2814 [< inline >] net_free kernel/net/core/net_namespace.c:341 [< none >] net_drop_ns+0x67/0x80 kernel/net/core/net_namespace.c:348 [< none >] cleanup_net+0x4e5/0x600 kernel/net/core/net_namespace.c:448 [< none >] process_one_work+0x794/0x1440 kernel/kernel/workqueue.c:2036 [< none >] worker_thread+0xdb/0xfc0 kernel/kernel/workqueue.c:2170 [< none >] kthread+0x23f/0x2d0 kernel/drivers/block/aoe/aoecmd.c:1303 [< none >] ret_from_fork+0x3f/0x70 kernel/arch/x86/entry/entry_64.S:468 INFO: Slab 0xffffea0001938800 objects=3 used=0 fp=0xffff880064e20000 flags=0x5fffc0000004080 INFO: Object 0xffff880064e20000 @offset=0 fp=0xffff880064e24200 CPU: 1 PID: 11581 Comm: syz-executor Tainted: G B 4.4.0+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 00000000ffffffff ffff8800662c7790 ffffffff8292049d ffff88003e36a300 ffff880064e20000 ffff880064e20000 ffff8800662c77c0 ffffffff816f2054 ffff88003e36a300 ffffea0001938800 ffff880064e20000 0000000000000000 Call Trace: [< inline >] __dump_stack kernel/lib/dump_stack.c:15 [] dump_stack+0x6f/0xa2 kernel/lib/dump_stack.c:50 [] print_trailer+0xf4/0x150 kernel/mm/slub.c:654 [] object_err+0x2f/0x40 kernel/mm/slub.c:661 [< inline >] print_address_description kernel/mm/kasan/report.c:138 [] kasan_report_error+0x215/0x530 kernel/mm/kasan/report.c:236 [< inline >] kasan_report kernel/mm/kasan/report.c:259 [] __asan_report_load8_noabort+0x3e/0x40 kernel/mm/kasan/report.c:280 [< inline >] ? ppp_pernet kernel/include/linux/compiler.h:218 [] ? ppp_unregister_channel+0x372/0x3a0 kernel/drivers/net/ppp/ppp_generic.c:2392 [< inline >] ppp_pernet kernel/include/linux/compiler.h:218 [] ppp_unregister_channel+0x372/0x3a0 kernel/drivers/net/ppp/ppp_generic.c:2392 [< inline >] ? ppp_pernet kernel/drivers/net/ppp/ppp_generic.c:293 [] ? ppp_unregister_channel+0xe6/0x3a0 kernel/drivers/net/ppp/ppp_generic.c:2392 [] ppp_asynctty_close+0xa3/0x130 kernel/drivers/net/ppp/ppp_async.c:241 [] ? async_lcp_peek+0x5b0/0x5b0 kernel/drivers/net/ppp/ppp_async.c:1000 [] tty_ldisc_close.isra.1+0x99/0xe0 kernel/drivers/tty/tty_ldisc.c:478 [] tty_ldisc_kill+0x40/0x170 kernel/drivers/tty/tty_ldisc.c:744 [] tty_ldisc_release+0x1b3/0x260 kernel/drivers/tty/tty_ldisc.c:772 [] tty_release+0xac1/0x13e0 kernel/drivers/tty/tty_io.c:1901 [] ? release_tty+0x320/0x320 kernel/drivers/tty/tty_io.c:1688 [] __fput+0x236/0x780 kernel/fs/file_table.c:208 [] ____fput+0x15/0x20 kernel/fs/file_table.c:244 [] task_work_run+0x16b/0x200 kernel/kernel/task_work.c:115 [< inline >] exit_task_work kernel/include/linux/task_work.h:21 [] do_exit+0x8b5/0x2c60 kernel/kernel/exit.c:750 [] ? debug_check_no_locks_freed+0x290/0x290 kernel/kernel/locking/lockdep.c:4123 [] ? mm_update_next_owner+0x6f0/0x6f0 kernel/kernel/exit.c:357 [] ? __dequeue_signal+0x136/0x470 kernel/kernel/signal.c:550 [] ? recalc_sigpending_tsk+0x13b/0x180 kernel/kernel/signal.c:145 [] do_group_exit+0x108/0x330 kernel/kernel/exit.c:880 [] get_signal+0x5e4/0x14f0 kernel/kernel/signal.c:2307 [< inline >] ? kretprobe_table_lock kernel/kernel/kprobes.c:1113 [] ? kprobe_flush_task+0xb5/0x450 kernel/kernel/kprobes.c:1158 [] do_signal+0x83/0x1c90 kernel/arch/x86/kernel/signal.c:712 [] ? recycle_rp_inst+0x310/0x310 kernel/include/linux/list.h:655 [] ? setup_sigcontext+0x780/0x780 kernel/arch/x86/kernel/signal.c:165 [] ? finish_task_switch+0x424/0x5f0 kernel/kernel/sched/core.c:2692 [< inline >] ? finish_lock_switch kernel/kernel/sched/sched.h:1099 [] ? finish_task_switch+0x120/0x5f0 kernel/kernel/sched/core.c:2678 [< inline >] ? context_switch kernel/kernel/sched/core.c:2807 [] ? __schedule+0x919/0x1bd0 kernel/kernel/sched/core.c:3283 [] exit_to_usermode_loop+0xf1/0x1a0 kernel/arch/x86/entry/common.c:247 [< inline >] prepare_exit_to_usermode kernel/arch/x86/entry/common.c:282 [] syscall_return_slowpath+0x19f/0x210 kernel/arch/x86/entry/common.c:344 [] int_ret_from_sys_call+0x25/0x9f kernel/arch/x86/entry/entry_64.S:281 Memory state around the buggy address: ffff880064e21680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880064e21700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff880064e21780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff880064e21800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880064e21880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 273ec51dd7ce ("net: ppp_generic - introduce net-namespace functionality v2") Reported-by: Baozeng Ding Signed-off-by: Guillaume Nault Reviewed-by: Cyrill Gorcunov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_generic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 35e8b5a6fd93..174e06ec7c2f 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2293,7 +2293,7 @@ int ppp_register_net_channel(struct net *net, struct ppp_channel *chan) pch->ppp = NULL; pch->chan = chan; - pch->chan_net = net; + pch->chan_net = get_net(net); chan->ppp = pch; init_ppp_file(&pch->file, CHANNEL); pch->file.hdrlen = chan->hdrlen; @@ -2390,6 +2390,8 @@ ppp_unregister_channel(struct ppp_channel *chan) spin_lock_bh(&pn->all_channels_lock); list_del(&pch->list); spin_unlock_bh(&pn->all_channels_lock); + put_net(pch->chan_net); + pch->chan_net = NULL; pch->file.dead = 1; wake_up_interruptible(&pch->file.rwait); From 759e8f3896d4e7b6d8f374216ab3ae8191e22213 Mon Sep 17 00:00:00 2001 From: "subashab@codeaurora.org" Date: Wed, 23 Mar 2016 22:39:50 -0600 Subject: [PATCH 377/797] xfrm: Fix crash observed during device unregistration and decryption [ Upstream commit 071d36bf21bcc837be00cea55bcef8d129e7f609 ] A crash is observed when a decrypted packet is processed in receive path. get_rps_cpus() tries to dereference the skb->dev fields but it appears that the device is freed from the poison pattern. [] get_rps_cpu+0x94/0x2f0 [] netif_rx_internal+0x140/0x1cc [] netif_rx+0x74/0x94 [] xfrm_input+0x754/0x7d0 [] xfrm_input_resume+0x10/0x1c [] esp_input_done+0x20/0x30 [] process_one_work+0x244/0x3fc [] worker_thread+0x2f8/0x418 [] kthread+0xe0/0xec -013|get_rps_cpu( | dev = 0xFFFFFFC08B688000, | skb = 0xFFFFFFC0C76AAC00 -> ( | dev = 0xFFFFFFC08B688000 -> ( | name = "...................................................... | name_hlist = (next = 0xAAAAAAAAAAAAAAAA, pprev = 0xAAAAAAAAAAA Following are the sequence of events observed - - Encrypted packet in receive path from netdevice is queued - Encrypted packet queued for decryption (asynchronous) - Netdevice brought down and freed - Packet is decrypted and returned through callback in esp_input_done - Packet is queued again for process in network stack using netif_rx Since the device appears to have been freed, the dereference of skb->dev in get_rps_cpus() leads to an unhandled page fault exception. Fix this by holding on to device reference when queueing packets asynchronously and releasing the reference on call back return. v2: Make the change generic to xfrm as mentioned by Steffen and update the title to xfrm Suggested-by: Herbert Xu Signed-off-by: Jerome Stanislaus Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ad7f5b3f9b61..1c4ad477ce93 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -292,12 +292,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; skb_dst_force(skb); + dev_hold(skb->dev); nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) return 0; resume: + dev_put(skb->dev); + spin_lock(&x->lock); if (nexthdr <= 0) { if (nexthdr == -EBADMSG) { From 9603d0a58d3069a1ec9fc94090d470ae520118f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 28 Mar 2016 22:38:16 +0200 Subject: [PATCH 378/797] qmi_wwan: add "D-Link DWM-221 B1" device id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e84810c7b85a2d7897797b3ad3e879168a8e032a ] Thomas reports: "Windows: 00 diagnostics 01 modem 02 at-port 03 nmea 04 nic Linux: T: Bus=02 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2001 ProdID=7e19 Rev=02.32 S: Manufacturer=Mobile Connect S: Product=Mobile Connect S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage" Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index df77467c7e93..a34f491224c1 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -699,6 +699,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */ {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ + {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ From 26dd42ebff94ff481af56704cd1b4dd32ca8579f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Mar 2016 08:43:41 -0700 Subject: [PATCH 379/797] ipv6: udp: fix UDP_MIB_IGNOREDMULTI updates [ Upstream commit 2d4212261fdf13e29728ddb5ea9d60c342cc92b5 ] IPv6 counters updates use a different macro than IPv4. Fixes: 36cbb2452cbaf ("udp: Increment UDP_MIB_IGNOREDMULTI for arriving unmatched multicasts") Signed-off-by: Eric Dumazet Cc: Rick Jones Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1e293a552693..6665e1a0bfe1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -837,8 +837,8 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, flush_stack(stack, count, skb, count - 1); } else { if (!inner_flushed) - UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); consume_skb(skb); } return 0; From df371b19630cf045e00f1b09a721bb7103266796 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Tue, 29 Mar 2016 18:48:08 +0800 Subject: [PATCH 380/797] bridge: Allow set bridge ageing time when switchdev disabled [ Upstream commit 5e263f712691615fb802f06c98d7638c378f5d11 ] When NET_SWITCHDEV=n, switchdev_port_attr_set will return -EOPNOTSUPP, we should ignore this error code and continue to set the ageing time. Fixes: c62987bbd8a1 ("bridge: push bridge setting ageing_time down to switchdev") Signed-off-by: Haishuang Yan Acked-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_stp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 0e658f47a5da..eff69cb270d2 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -586,7 +586,7 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) int err; err = switchdev_port_attr_set(br->dev, &attr); - if (err) + if (err && err != -EOPNOTSUPP) return err; br->ageing_time = t; From 18baf0e01eefd27156d8a8fc5ade1ad8930a1b3d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 31 Mar 2016 18:10:31 +0200 Subject: [PATCH 381/797] rtnl: fix msg size calculation in if_nlmsg_size() [ Upstream commit c57c7a95da842807b475b823ed2e5435c42cb3b0 ] Size of the attribute IFLA_PHYS_PORT_NAME was missing. Fixes: db24a9044ee1 ("net: add support for phys_port_name") CC: David Ahern Signed-off-by: Nicolas Dichtel Acked-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 34ba7a08876d..ca966f7de351 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -905,6 +905,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } From e137eeb38d2431ded3ec1aff84183258f1dd4162 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Mar 2016 02:13:18 +0200 Subject: [PATCH 382/797] tun, bpf: fix suspicious RCU usage in tun_{attach, detach}_filter [ Upstream commit 5a5abb1fa3b05dd6aa821525832644c1e7d2905f ] Sasha Levin reported a suspicious rcu_dereference_protected() warning found while fuzzing with trinity that is similar to this one: [ 52.765684] net/core/filter.c:2262 suspicious rcu_dereference_protected() usage! [ 52.765688] other info that might help us debug this: [ 52.765695] rcu_scheduler_active = 1, debug_locks = 1 [ 52.765701] 1 lock held by a.out/1525: [ 52.765704] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 [ 52.765721] stack backtrace: [ 52.765728] CPU: 1 PID: 1525 Comm: a.out Not tainted 4.5.0+ #264 [...] [ 52.765768] Call Trace: [ 52.765775] [] dump_stack+0x85/0xc8 [ 52.765784] [] lockdep_rcu_suspicious+0xd5/0x110 [ 52.765792] [] sk_detach_filter+0x82/0x90 [ 52.765801] [] tun_detach_filter+0x35/0x90 [tun] [ 52.765810] [] __tun_chr_ioctl+0x354/0x1130 [tun] [ 52.765818] [] ? selinux_file_ioctl+0x130/0x210 [ 52.765827] [] tun_chr_ioctl+0x13/0x20 [tun] [ 52.765834] [] do_vfs_ioctl+0x96/0x690 [ 52.765843] [] ? security_file_ioctl+0x43/0x60 [ 52.765850] [] SyS_ioctl+0x79/0x90 [ 52.765858] [] do_syscall_64+0x62/0x140 [ 52.765866] [] entry_SYSCALL64_slow_path+0x25/0x25 Same can be triggered with PROVE_RCU (+ PROVE_RCU_REPEATEDLY) enabled from tun_attach_filter() when user space calls ioctl(tun_fd, TUN{ATTACH, DETACH}FILTER, ...) for adding/removing a BPF filter on tap devices. Since the fix in f91ff5b9ff52 ("net: sk_{detach|attach}_filter() rcu fixes") sk_attach_filter()/sk_detach_filter() now dereferences the filter with rcu_dereference_protected(), checking whether socket lock is held in control path. Since its introduction in 994051625981 ("tun: socket filter support"), tap filters are managed under RTNL lock from __tun_chr_ioctl(). Thus the sock_owned_by_user(sk) doesn't apply in this specific case and therefore triggers the false positive. Extend the BPF API with __sk_attach_filter()/__sk_detach_filter() pair that is used by tap filters and pass in lockdep_rtnl_is_held() for the rcu_dereference_protected() checks instead. Reported-by: Sasha Levin Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 8 +++++--- include/linux/filter.h | 4 ++++ net/core/filter.c | 33 +++++++++++++++++++++------------ 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f0db770e8b2f..9bc7b0c7d471 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -621,7 +621,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte /* Re-attach the filter to persist device */ if (!skip_filter && (tun->filter_attached == true)) { - err = sk_attach_filter(&tun->fprog, tfile->socket.sk); + err = __sk_attach_filter(&tun->fprog, tfile->socket.sk, + lockdep_rtnl_is_held()); if (!err) goto out; } @@ -1804,7 +1805,7 @@ static void tun_detach_filter(struct tun_struct *tun, int n) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - sk_detach_filter(tfile->socket.sk); + __sk_detach_filter(tfile->socket.sk, lockdep_rtnl_is_held()); } tun->filter_attached = false; @@ -1817,7 +1818,8 @@ static int tun_attach_filter(struct tun_struct *tun) for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); + ret = __sk_attach_filter(&tun->fprog, tfile->socket.sk, + lockdep_rtnl_is_held()); if (ret) { tun_detach_filter(tun, i); return ret; diff --git a/include/linux/filter.h b/include/linux/filter.h index 5972ffe5719a..5110d4211866 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -446,8 +446,12 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, + bool locked); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); +int __sk_detach_filter(struct sock *sk, bool locked); + int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/net/core/filter.c b/net/core/filter.c index 37157c4c1a78..f393a22b9d50 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1139,7 +1139,8 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); -static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, + bool locked) { struct sk_filter *fp, *old_fp; @@ -1155,10 +1156,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return -ENOMEM; } - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + old_fp = rcu_dereference_protected(sk->sk_filter, locked); rcu_assign_pointer(sk->sk_filter, fp); - if (old_fp) sk_filter_uncharge(sk, old_fp); @@ -1175,7 +1174,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) * occurs or there is insufficient memory for the filter a negative * errno code is returned. On success the return is zero. */ -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, + bool locked) { unsigned int fsize = bpf_classic_proglen(fprog); unsigned int bpf_fsize = bpf_prog_size(fprog->len); @@ -1213,7 +1213,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk); + err = __sk_attach_prog(prog, sk, locked); if (err < 0) { __bpf_prog_release(prog); return err; @@ -1221,7 +1221,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return 0; } -EXPORT_SYMBOL_GPL(sk_attach_filter); +EXPORT_SYMBOL_GPL(__sk_attach_filter); + +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +{ + return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk)); +} int sk_attach_bpf(u32 ufd, struct sock *sk) { @@ -1240,7 +1245,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return -EINVAL; } - err = __sk_attach_prog(prog, sk); + err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk)); if (err < 0) { bpf_prog_put(prog); return err; @@ -1913,7 +1918,7 @@ static int __init register_sk_filter_ops(void) } late_initcall(register_sk_filter_ops); -int sk_detach_filter(struct sock *sk) +int __sk_detach_filter(struct sock *sk, bool locked) { int ret = -ENOENT; struct sk_filter *filter; @@ -1921,8 +1926,7 @@ int sk_detach_filter(struct sock *sk) if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; - filter = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + filter = rcu_dereference_protected(sk->sk_filter, locked); if (filter) { RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); @@ -1931,7 +1935,12 @@ int sk_detach_filter(struct sock *sk) return ret; } -EXPORT_SYMBOL_GPL(sk_detach_filter); +EXPORT_SYMBOL_GPL(__sk_detach_filter); + +int sk_detach_filter(struct sock *sk) +{ + return __sk_detach_filter(sk, sock_owned_by_user(sk)); +} int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) From 9daaadbe7ba903615811fdad3e50150eef8e222e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 8 Apr 2016 13:26:48 +0800 Subject: [PATCH 383/797] tuntap: restore default qdisc [ Upstream commit 016adb7260f481168c03e09f785184d6d5278894 ] After commit f84bb1eac027 ("net: fix IFF_NO_QUEUE for drivers using alloc_netdev"), default qdisc was changed to noqueue because tuntap does not set tx_queue_len during .setup(). This patch restores default qdisc by setting tx_queue_len in tun_setup(). Fixes: f84bb1eac027 ("net: fix IFF_NO_QUEUE for drivers using alloc_netdev") Cc: Phil Sutter Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Acked-by: Phil Sutter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9bc7b0c7d471..2d186bd66d43 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1001,7 +1001,6 @@ static void tun_net_init(struct net_device *dev) /* Zero header length */ dev->type = ARPHRD_NONE; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; - dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ break; case IFF_TAP: @@ -1013,7 +1012,6 @@ static void tun_net_init(struct net_device *dev) eth_hw_addr_random(dev); - dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ break; } } @@ -1464,6 +1462,8 @@ static void tun_setup(struct net_device *dev) dev->ethtool_ops = &tun_ethtool_ops; dev->destructor = tun_free_netdev; + /* We prefer our own queue length */ + dev->tx_queue_len = TUN_READQ_SIZE; } /* Trivial set of netlink ops to allow deleting tun or tap From ad730152036610d28f6f47326393aae3044e4d2f Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sun, 3 Apr 2016 22:09:23 +0800 Subject: [PATCH 384/797] ipv4: l2tp: fix a potential issue in l2tp_ip_recv [ Upstream commit 5745b8232e942abd5e16e85fa9b27cc21324acf0 ] pskb_may_pull() can change skb->data, so we have to load ptr/optr at the right place. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index ec22078b0914..42de4ccd159f 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } From 9bd8af9979f543fb605fbfb79e0e8ceeffb88c9c Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sun, 3 Apr 2016 22:09:24 +0800 Subject: [PATCH 385/797] ipv6: l2tp: fix a potential issue in l2tp_ip6_recv [ Upstream commit be447f305494e019dfc37ea4cdf3b0e4200b4eba ] pskb_may_pull() can change skb->data, so we have to load ptr/optr at the right place. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip6.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index a2c8747d2936..9ee4ddb6b397 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -135,12 +135,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -168,6 +167,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } From 4f4de9ab660c0dca4030b74613d8ac3cea5747c9 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 1 Apr 2016 17:17:50 -0300 Subject: [PATCH 386/797] ip6_tunnel: set rtnl_link_ops before calling register_netdevice [ Upstream commit b6ee376cb0b7fb4e7e07d6cd248bd40436fb9ba6 ] When creating an ip6tnl tunnel with ip tunnel, rtnl_link_ops is not set before ip6_tnl_create2 is called. When register_netdevice is called, there is no linkinfo attribute in the NEWLINK message because of that. Setting rtnl_link_ops before calling register_netdevice fixes that. Fixes: 0b112457229d ("ip6tnl: add support of link creation via rtnl") Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6c5dfec7a377..3991b21e24ad 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -343,12 +343,12 @@ static int ip6_tnl_create2(struct net_device *dev) t = netdev_priv(dev); + dev->rtnl_link_ops = &ip6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &ip6_link_ops; dev_hold(dev); ip6_tnl_link(ip6n, t); From 5598928f39fc1ffd86b43444c50b378fd08a449e Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 5 Apr 2016 18:41:08 +0200 Subject: [PATCH 387/797] ipv6: Count in extension headers in skb->network_header [ Upstream commit 3ba3458fb9c050718b95275a3310b74415e767e2 ] When sending a UDPv6 message longer than MTU, account for the length of fragmentable IPv6 extension headers in skb->network_header offset. Same as we do in alloc_new_skb path in __ip6_append_data(). This ensures that later on __ip6_make_skb() will make space in headroom for fragmentable extension headers: /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); Prevents a splat due to skb_under_panic: skbuff: skb_under_panic: text:ffffffff8143397b len:2126 put:14 \ head:ffff880005bacf50 data:ffff880005bacf4a tail:0x48 end:0xc0 dev:lo ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:104! invalid opcode: 0000 [#1] KASAN CPU: 0 PID: 160 Comm: reproducer Not tainted 4.6.0-rc2 #65 [...] Call Trace: [] skb_push+0x79/0x80 [] eth_header+0x2b/0x100 [] neigh_resolve_output+0x210/0x310 [] ip6_finish_output2+0x4a7/0x7c0 [] ip6_output+0x16a/0x280 [] ip6_local_out+0xb1/0xf0 [] ip6_send_skb+0x45/0xd0 [] udp_v6_send_skb+0x246/0x5d0 [] udpv6_sendmsg+0xa6e/0x1090 [...] Reported-by: Ji Jianwen Signed-off-by: Jakub Sitnicki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 31144c486c52..a175152d3e46 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1091,8 +1091,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu, unsigned int flags, - const struct flowi6 *fl6) + int exthdrlen, int transhdrlen, int mtu, + unsigned int flags, const struct flowi6 *fl6) { struct sk_buff *skb; @@ -1117,7 +1117,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_put(skb, fragheaderlen + transhdrlen); /* initialize network header pointer */ - skb_reset_network_header(skb); + skb_set_network_header(skb, exthdrlen); /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; @@ -1359,7 +1359,7 @@ static int __ip6_append_data(struct sock *sk, (rt->dst.dev->features & NETIF_F_UFO) && (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, + hh_len, fragheaderlen, exthdrlen, transhdrlen, mtu, flags, fl6); if (err) goto error; From c1ea2d028ffb5b2aeaf183ac3207992f168c51a2 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 7 Apr 2016 21:28:38 -0700 Subject: [PATCH 388/797] mpls: find_outdev: check for err ptr in addition to NULL check [ Upstream commit 94a57f1f8a9de90ab4b0f8748361ff8be706c80c ] find_outdev calls inet{,6}_fib_lookup_dev() or dev_get_by_index() to find the output device. In case of an error, inet{,6}_fib_lookup_dev() returns error pointer and dev_get_by_index() returns NULL. But the function only checks for NULL and thus can end up calling dev_put on an ERR_PTR. This patch adds an additional check for err ptr after the NULL check. Before: Trying to add an mpls route with no oif from user, no available path to 10.1.1.8 and no default route: $ip -f mpls route add 100 as 200 via inet 10.1.1.8 [ 822.337195] BUG: unable to handle kernel NULL pointer dereference at 00000000000003a3 [ 822.340033] IP: [] mpls_nh_assign_dev+0x10b/0x182 [ 822.340033] PGD 1db38067 PUD 1de9e067 PMD 0 [ 822.340033] Oops: 0000 [#1] SMP [ 822.340033] Modules linked in: [ 822.340033] CPU: 0 PID: 11148 Comm: ip Not tainted 4.5.0-rc7+ #54 [ 822.340033] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014 [ 822.340033] task: ffff88001db82580 ti: ffff88001dad4000 task.ti: ffff88001dad4000 [ 822.340033] RIP: 0010:[] [] mpls_nh_assign_dev+0x10b/0x182 [ 822.340033] RSP: 0018:ffff88001dad7a88 EFLAGS: 00010282 [ 822.340033] RAX: ffffffffffffff9b RBX: ffffffffffffff9b RCX: 0000000000000002 [ 822.340033] RDX: 00000000ffffff9b RSI: 0000000000000008 RDI: 0000000000000000 [ 822.340033] RBP: ffff88001ddc9ea0 R08: ffff88001e9f1768 R09: 0000000000000000 [ 822.340033] R10: ffff88001d9c1100 R11: ffff88001e3c89f0 R12: ffffffff8187e0c0 [ 822.340033] R13: ffffffff8187e0c0 R14: ffff88001ddc9e80 R15: 0000000000000004 [ 822.340033] FS: 00007ff9ed798700(0000) GS:ffff88001fc00000(0000) knlGS:0000000000000000 [ 822.340033] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 822.340033] CR2: 00000000000003a3 CR3: 000000001de89000 CR4: 00000000000006f0 [ 822.340033] Stack: [ 822.340033] 0000000000000000 0000000100000000 0000000000000000 0000000000000000 [ 822.340033] 0000000000000000 0801010a00000000 0000000000000000 0000000000000000 [ 822.340033] 0000000000000004 ffffffff8148749b ffffffff8187e0c0 000000000000001c [ 822.340033] Call Trace: [ 822.340033] [] ? mpls_rt_alloc+0x2b/0x3e [ 822.340033] [] ? mpls_rtm_newroute+0x358/0x3e2 [ 822.340033] [] ? get_page+0x5/0xa [ 822.340033] [] ? rtnetlink_rcv_msg+0x17e/0x191 [ 822.340033] [] ? __kmalloc_track_caller+0x8c/0x9e [ 822.340033] [] ? rht_key_hashfn.isra.20.constprop.57+0x14/0x1f [ 822.340033] [] ? __rtnl_unlock+0xc/0xc [ 822.340033] [] ? netlink_rcv_skb+0x36/0x82 [ 822.340033] [] ? rtnetlink_rcv+0x1f/0x28 [ 822.340033] [] ? netlink_unicast+0x106/0x189 [ 822.340033] [] ? netlink_sendmsg+0x27f/0x2c8 [ 822.340033] [] ? sock_sendmsg_nosec+0x10/0x1b [ 822.340033] [] ? ___sys_sendmsg+0x182/0x1e3 [ 822.340033] [] ? __alloc_pages_nodemask+0x11c/0x1e4 [ 822.340033] [] ? PageAnon+0x5/0xd [ 822.340033] [] ? __page_set_anon_rmap+0x45/0x52 [ 822.340033] [] ? get_page+0x5/0xa [ 822.340033] [] ? __lru_cache_add+0x1a/0x3a [ 822.340033] [] ? current_kernel_time64+0x9/0x30 [ 822.340033] [] ? __sys_sendmsg+0x3c/0x5a [ 822.340033] [] ? entry_SYSCALL_64_fastpath+0x12/0x6a [ 822.340033] Code: 83 08 04 00 00 65 ff 00 48 8b 3c 24 e8 40 7c f2 ff eb 13 48 c7 c3 9f ff ff ff eb 0f 89 ce e8 f1 ae f1 ff 48 89 c3 48 85 db 74 15 <48> 8b 83 08 04 00 00 65 ff 08 48 81 fb 00 f0 ff ff 76 0d eb 07 [ 822.340033] RIP [] mpls_nh_assign_dev+0x10b/0x182 [ 822.340033] RSP [ 822.340033] CR2: 00000000000003a3 [ 822.435363] ---[ end trace 98cc65e6f6b8bf11 ]--- After patch: $ip -f mpls route add 100 as 200 via inet 10.1.1.8 RTNETLINK answers: Network is unreachable Signed-off-by: Roopa Prabhu Reported-by: David Miller Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mpls/af_mpls.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c32fc411a911..881bc2072809 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -518,6 +518,9 @@ static struct net_device *find_outdev(struct net *net, if (!dev) return ERR_PTR(-ENODEV); + if (IS_ERR(dev)) + return dev; + /* The caller is holding rtnl anyways, so release the dev reference */ dev_put(dev); From 4337f6e548c1203c626740639b6826981d26beee Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Apr 2016 12:27:08 +0200 Subject: [PATCH 389/797] USB: uas: Limit qdepth at the scsi-host level commit 198de51dbc3454d95b015ca0a055b673f85f01bb upstream. Commit 64d513ac31bd ("scsi: use host wide tags by default") causes the SCSI core to queue more commands then we can handle on devices with multiple LUNs, limit the queue depth at the scsi-host level instead of per slave to fix this. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1315013 Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index ce0cd6e20d4f..31872bcb0ad8 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -800,7 +800,6 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_BROKEN_FUA) sdev->broken_fua = 1; - scsi_change_queue_depth(sdev, devinfo->qdepth - 2); return 0; } @@ -932,6 +931,12 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) if (result) goto set_alt0; + /* + * 1 tag is reserved for untagged commands + + * 1 tag to avoid off by one errors in some bridge firmwares + */ + shost->can_queue = devinfo->qdepth - 2; + usb_set_intfdata(intf, shost); result = scsi_add_host(shost, &intf->dev); if (result) From f9a6b3caddf3ab9b9b490648018c8b02de2171f2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Apr 2016 12:27:09 +0200 Subject: [PATCH 390/797] USB: uas: Add a new NO_REPORT_LUNS quirk commit 1363074667a6b7d0507527742ccd7bbed5e3ceaa upstream. Add a new NO_REPORT_LUNS quirk and set it for Seagate drives with an usb-id of: 0bc2:331a, as these will fail to respond to a REPORT_LUNS command. Reported-and-tested-by: David Webb Signed-off-by: Hans de Goede Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 2 ++ drivers/usb/storage/uas.c | 14 +++++++++++++- drivers/usb/storage/unusual_uas.h | 7 +++++++ drivers/usb/storage/usb.c | 5 ++++- include/linux/usb_usual.h | 2 ++ 5 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 742f69d18fc8..0e4102ae1a61 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3928,6 +3928,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. sector if the number is odd); i = IGNORE_DEVICE (don't bind to this device); + j = NO_REPORT_LUNS (don't use report luns + command, uas only); l = NOT_LOCKABLE (don't try to lock and unlock ejectable media); m = MAX_SECTORS_64 (don't transfer more diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 31872bcb0ad8..9baf081174ce 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -2,7 +2,7 @@ * USB Attached SCSI * Note that this is not the same as the USB Mass Storage driver * - * Copyright Hans de Goede for Red Hat, Inc. 2013 - 2014 + * Copyright Hans de Goede for Red Hat, Inc. 2013 - 2016 * Copyright Matthew Wilcox for Intel Corp, 2010 * Copyright Sarah Sharp for Intel Corp, 2010 * @@ -757,6 +757,17 @@ static int uas_eh_bus_reset_handler(struct scsi_cmnd *cmnd) return SUCCESS; } +static int uas_target_alloc(struct scsi_target *starget) +{ + struct uas_dev_info *devinfo = (struct uas_dev_info *) + dev_to_shost(starget->dev.parent)->hostdata; + + if (devinfo->flags & US_FL_NO_REPORT_LUNS) + starget->no_report_luns = 1; + + return 0; +} + static int uas_slave_alloc(struct scsi_device *sdev) { struct uas_dev_info *devinfo = @@ -807,6 +818,7 @@ static struct scsi_host_template uas_host_template = { .module = THIS_MODULE, .name = "uas", .queuecommand = uas_queuecommand, + .target_alloc = uas_target_alloc, .slave_alloc = uas_slave_alloc, .slave_configure = uas_slave_configure, .eh_abort_handler = uas_eh_abort_handler, diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index ccc113e83d88..53341a77d89f 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -64,6 +64,13 @@ UNUSUAL_DEV(0x0bc2, 0x3312, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_ATA_1X), +/* Reported-by: David Webb */ +UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, + "Seagate", + "Expansion Desk", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_REPORT_LUNS), + /* Reported-by: Hans de Goede */ UNUSUAL_DEV(0x0bc2, 0x3320, 0x0000, 0x9999, "Seagate", diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 43576ed31ccd..9de988a0f856 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -482,7 +482,7 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 | US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE | US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES | - US_FL_MAX_SECTORS_240); + US_FL_MAX_SECTORS_240 | US_FL_NO_REPORT_LUNS); p = quirks; while (*p) { @@ -532,6 +532,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) case 'i': f |= US_FL_IGNORE_DEVICE; break; + case 'j': + f |= US_FL_NO_REPORT_LUNS; + break; case 'l': f |= US_FL_NOT_LOCKABLE; break; diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 7f5f78bd15ad..245f57dbbb61 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -79,6 +79,8 @@ /* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */ \ US_FLAG(MAX_SECTORS_240, 0x08000000) \ /* Sets max_sectors to 240 */ \ + US_FLAG(NO_REPORT_LUNS, 0x10000000) \ + /* Cannot handle REPORT_LUNS */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; From ac1373d660d5ceb65ee20e3fd9de727579efe98f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 7 Jan 2016 11:01:00 +0100 Subject: [PATCH 391/797] cdc-acm: fix NULL pointer reference commit 29c6dd591bbd592472247441de9fa694acdabae8 upstream. The union descriptor must be checked. Its usage was conditional before the parser was introduced. This is important, because many RNDIS device, which also use the common parser, have bogus extra descriptors. Signed-off-by: Oliver Neukum Tested-by: Vasily Galkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_ether.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 3da70bf9936a..7cba2c3759df 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -160,6 +160,12 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) info->u = header.usb_cdc_union_desc; info->header = header.usb_cdc_header_desc; info->ether = header.usb_cdc_ether_desc; + if (!info->u) { + if (rndis) + goto skip; + else /* in that case a quirk is mandatory */ + goto bad_desc; + } /* we need a master/control interface (what we're * probed with) and a slave/data interface; union * descriptors sort this all out. @@ -256,7 +262,7 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) goto bad_desc; } - } else if (!info->header || !info->u || (!rndis && !info->ether)) { + } else if (!info->header || (!rndis && !info->ether)) { dev_dbg(&intf->dev, "missing cdc %s%s%sdescriptor\n", info->header ? "" : "header ", info->u ? "" : "union ", From 30d6a9fd6c11bd7b9e8f673da0f9caa22e2280fa Mon Sep 17 00:00:00 2001 From: Yuki Shibuya Date: Thu, 24 Mar 2016 05:17:03 +0000 Subject: [PATCH 392/797] KVM: x86: Inject pending interrupt even if pending nmi exist commit 321c5658c5e9192dea0d58ab67cf1791e45b2b26 upstream. Non maskable interrupts (NMI) are preferred to interrupts in current implementation. If a NMI is pending and NMI is blocked by the result of nmi_allowed(), pending interrupt is not injected and enable_irq_window() is not executed, even if interrupts injection is allowed. In old kernel (e.g. 2.6.32), schedule() is often called in NMI context. In this case, interrupts are needed to execute iret that intends end of NMI. The flag of blocking new NMI is not cleared until the guest execute the iret, and interrupts are blocked by pending NMI. Due to this, iret can't be invoked in the guest, and the guest is starved until block is cleared by some events (e.g. canceling injection). This patch injects pending interrupts, when it's allowed, even if NMI is blocked. And, If an interrupts is pending after executing inject_pending_event(), enable_irq_window() is executed regardless of NMI pending counter. Signed-off-by: Yuki Shibuya Suggested-by: Paolo Bonzini Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8bfc5fc6a39b..7eb4ebd3ebea 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6024,12 +6024,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) } /* try to inject new event if pending */ - if (vcpu->arch.nmi_pending) { - if (kvm_x86_ops->nmi_allowed(vcpu)) { - --vcpu->arch.nmi_pending; - vcpu->arch.nmi_injected = true; - kvm_x86_ops->set_nmi(vcpu); - } + if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { + --vcpu->arch.nmi_pending; + vcpu->arch.nmi_injected = true; + kvm_x86_ops->set_nmi(vcpu); } else if (kvm_cpu_has_injectable_intr(vcpu)) { /* * Because interrupts can be injected asynchronously, we are @@ -6474,10 +6472,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (inject_pending_event(vcpu, req_int_win) != 0) req_immediate_exit = true; /* enable NMI/IRQ window open exits if needed */ - else if (vcpu->arch.nmi_pending) - kvm_x86_ops->enable_nmi_window(vcpu); - else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) - kvm_x86_ops->enable_irq_window(vcpu); + else { + if (vcpu->arch.nmi_pending) + kvm_x86_ops->enable_nmi_window(vcpu); + if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) + kvm_x86_ops->enable_irq_window(vcpu); + } if (kvm_lapic_enabled(vcpu)) { update_cr8_intercept(vcpu); From 84f2443e213727323064e581e281d778831fa859 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Mar 2016 17:56:57 +0200 Subject: [PATCH 393/797] KVM: x86: reduce default value of halt_poll_ns parameter commit 14ebda3394fd3e5388747e742e510b0802a65d24 upstream. Windows lets applications choose the frequency of the timer tick, and in Windows 10 the maximum rate was changed from 1024 Hz to 2048 Hz. Unfortunately, because of the way the Windows API works, most applications who need a higher rate than the default 64 Hz will just do timeGetDevCaps(&tc, sizeof(tc)); timeBeginPeriod(tc.wPeriodMin); and pick the maximum rate. This causes very high CPU usage when playing media or games on Windows 10, even if the guest does not actually use the CPU very much, because the frequent timer tick causes halt_poll_ns to kick in. There is no really good solution, especially because Microsoft could sooner or later bump the limit to 4096 Hz, but for now the best we can do is lower a bit the upper limit for halt_poll_ns. :-( Reported-by: Jon Panozzo Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 30cfd64295a0..9d2abb2a41d2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -41,7 +41,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 -#define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_HALT_POLL_NS_DEFAULT 400000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS From af22bb95d98c2df7eadecc9445c275b8847f54a9 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 3 Feb 2016 03:35:49 +0000 Subject: [PATCH 394/797] MIPS: Fix MSA ld unaligned failure cases commit fa8ff601d72bad3078ddf5ef17a5547700d06908 upstream. Copying the content of an MSA vector from user memory may involve TLB faults & mapping in pages. This will fail when preemption is disabled due to an inability to acquire mmap_sem from do_page_fault, which meant such vector loads to unmapped pages would always fail to be emulated. Fix this by disabling preemption later only around the updating of vector register state. This change does however introduce a race between performing the load into thread context & the thread being preempted, saving its current live context & clobbering the loaded value. This should be a rare occureence, so optimise for the fast path by simply repeating the load if we are preempted. Additionally if the copy failed then the failure path was taken with preemption left disabled, leading to the kernel typically encountering further issues around sleeping whilst atomic. The change to where preemption is disabled avoids this issue. Fixes: e4aa1f153add "MIPS: MSA unaligned memory access support" Reported-by: James Hogan Signed-off-by: Paul Burton Reviewed-by: James Hogan Cc: Leonid Yegoshin Cc: Maciej W. Rozycki Cc: James Cowgill Cc: Markos Chandras Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/12345/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/unaligned.c | 49 +++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 490cea569d57..5c62065cbf22 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -885,7 +885,7 @@ static void emulate_load_store_insn(struct pt_regs *regs, { union mips_instruction insn; unsigned long value; - unsigned int res; + unsigned int res, preempted; unsigned long origpc; unsigned long orig31; void __user *fault_addr = NULL; @@ -1226,27 +1226,36 @@ static void emulate_load_store_insn(struct pt_regs *regs, if (!access_ok(VERIFY_READ, addr, sizeof(*fpr))) goto sigbus; - /* - * Disable preemption to avoid a race between copying - * state from userland, migrating to another CPU and - * updating the hardware vector register below. - */ - preempt_disable(); + do { + /* + * If we have live MSA context keep track of + * whether we get preempted in order to avoid + * the register context we load being clobbered + * by the live context as it's saved during + * preemption. If we don't have live context + * then it can't be saved to clobber the value + * we load. + */ + preempted = test_thread_flag(TIF_USEDMSA); - res = __copy_from_user_inatomic(fpr, addr, - sizeof(*fpr)); - if (res) - goto fault; + res = __copy_from_user_inatomic(fpr, addr, + sizeof(*fpr)); + if (res) + goto fault; - /* - * Update the hardware register if it is in use by the - * task in this quantum, in order to avoid having to - * save & restore the whole vector context. - */ - if (test_thread_flag(TIF_USEDMSA)) - write_msa_wr(wd, fpr, df); - - preempt_enable(); + /* + * Update the hardware register if it is in use + * by the task in this quantum, in order to + * avoid having to save & restore the whole + * vector context. + */ + preempt_disable(); + if (test_thread_flag(TIF_USEDMSA)) { + write_msa_wr(wd, fpr, df); + preempted = 0; + } + preempt_enable(); + } while (preempted); break; case msa_st_op: From 01f083c7e37adf613dca8e7177b460fc3e0a3e56 Mon Sep 17 00:00:00 2001 From: Govindraj Raja Date: Fri, 4 Mar 2016 15:28:22 +0000 Subject: [PATCH 395/797] pinctrl: pistachio: fix mfio84-89 function description and pinmux. commit e9adb336d0bf391be23e820975ca5cd12c31d781 upstream. mfio 84 to 89 are described wrongly, fix it to describe the right pin and add them to right pin-mux group. The correct order is: pll1_lock => mips_pll -- MFIO_83 pll2_lock => audio_pll -- MFIO_84 pll3_lock => rpu_v_pll -- MFIO_85 pll4_lock => rpu_l_pll -- MFIO_86 pll5_lock => sys_pll -- MFIO_87 pll6_lock => wifi_pll -- MFIO_88 pll7_lock => bt_pll -- MFIO_89 Cc: linux-gpio@vger.kernel.org Cc: devicetree@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: James Hartley Fixes: cefc03e5995e("pinctrl: Add Pistachio SoC pin control driver") Signed-off-by: Govindraj Raja Acked-by: Andrew Bresticker Acked-by: Rob Herring Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- .../pinctrl/img,pistachio-pinctrl.txt | 12 +++++----- drivers/pinctrl/pinctrl-pistachio.c | 24 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt index 08a4a32c8eb0..0326154c7925 100644 --- a/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt @@ -134,12 +134,12 @@ mfio80 ddr_debug, mips_trace_data, mips_debug mfio81 dreq0, mips_trace_data, eth_debug mfio82 dreq1, mips_trace_data, eth_debug mfio83 mips_pll_lock, mips_trace_data, usb_debug -mfio84 sys_pll_lock, mips_trace_data, usb_debug -mfio85 wifi_pll_lock, mips_trace_data, sdhost_debug -mfio86 bt_pll_lock, mips_trace_data, sdhost_debug -mfio87 rpu_v_pll_lock, dreq2, socif_debug -mfio88 rpu_l_pll_lock, dreq3, socif_debug -mfio89 audio_pll_lock, dreq4, dreq5 +mfio84 audio_pll_lock, mips_trace_data, usb_debug +mfio85 rpu_v_pll_lock, mips_trace_data, sdhost_debug +mfio86 rpu_l_pll_lock, mips_trace_data, sdhost_debug +mfio87 sys_pll_lock, dreq2, socif_debug +mfio88 wifi_pll_lock, dreq3, socif_debug +mfio89 bt_pll_lock, dreq4, dreq5 tck trstn tdi diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index 85c9046c690e..6b1a47f8c096 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -469,27 +469,27 @@ static const char * const pistachio_mips_pll_lock_groups[] = { "mfio83", }; -static const char * const pistachio_sys_pll_lock_groups[] = { +static const char * const pistachio_audio_pll_lock_groups[] = { "mfio84", }; -static const char * const pistachio_wifi_pll_lock_groups[] = { +static const char * const pistachio_rpu_v_pll_lock_groups[] = { "mfio85", }; -static const char * const pistachio_bt_pll_lock_groups[] = { +static const char * const pistachio_rpu_l_pll_lock_groups[] = { "mfio86", }; -static const char * const pistachio_rpu_v_pll_lock_groups[] = { +static const char * const pistachio_sys_pll_lock_groups[] = { "mfio87", }; -static const char * const pistachio_rpu_l_pll_lock_groups[] = { +static const char * const pistachio_wifi_pll_lock_groups[] = { "mfio88", }; -static const char * const pistachio_audio_pll_lock_groups[] = { +static const char * const pistachio_bt_pll_lock_groups[] = { "mfio89", }; @@ -559,12 +559,12 @@ enum pistachio_mux_option { PISTACHIO_FUNCTION_DREQ4, PISTACHIO_FUNCTION_DREQ5, PISTACHIO_FUNCTION_MIPS_PLL_LOCK, + PISTACHIO_FUNCTION_AUDIO_PLL_LOCK, + PISTACHIO_FUNCTION_RPU_V_PLL_LOCK, + PISTACHIO_FUNCTION_RPU_L_PLL_LOCK, PISTACHIO_FUNCTION_SYS_PLL_LOCK, PISTACHIO_FUNCTION_WIFI_PLL_LOCK, PISTACHIO_FUNCTION_BT_PLL_LOCK, - PISTACHIO_FUNCTION_RPU_V_PLL_LOCK, - PISTACHIO_FUNCTION_RPU_L_PLL_LOCK, - PISTACHIO_FUNCTION_AUDIO_PLL_LOCK, PISTACHIO_FUNCTION_DEBUG_RAW_CCA_IND, PISTACHIO_FUNCTION_DEBUG_ED_SEC20_CCA_IND, PISTACHIO_FUNCTION_DEBUG_ED_SEC40_CCA_IND, @@ -620,12 +620,12 @@ static const struct pistachio_function pistachio_functions[] = { FUNCTION(dreq4), FUNCTION(dreq5), FUNCTION(mips_pll_lock), + FUNCTION(audio_pll_lock), + FUNCTION(rpu_v_pll_lock), + FUNCTION(rpu_l_pll_lock), FUNCTION(sys_pll_lock), FUNCTION(wifi_pll_lock), FUNCTION(bt_pll_lock), - FUNCTION(rpu_v_pll_lock), - FUNCTION(rpu_l_pll_lock), - FUNCTION(audio_pll_lock), FUNCTION(debug_raw_cca_ind), FUNCTION(debug_ed_sec20_cca_ind), FUNCTION(debug_ed_sec40_cca_ind), From 8536e37882600eaf011663aafb8abeb556bfca86 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 7 Mar 2016 19:40:57 +0100 Subject: [PATCH 396/797] pinctrl: sh-pfc: only use dummy states for non-DT platforms commit 0129801be4b87226bf502f18f5a9eabd356d1058 upstream. If pinctrl_provide_dummies() is used unconditionally, then the dummy state will be used even on DT platforms when the "init" state was intentionally left out. Instead of "default", the dummy "init" state will then be used during probe. Thus, when probing an I2C controller on cold boot, communication triggered by bus notifiers broke because the pins were not initialized. Do it like OMAP2: use the dummy state only for non-DT platforms. Fixes: ef0eebc05130 ("drivers/pinctrl: Add the concept of an "init" state") Reported-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Acked-by: Sergei Shtylyov Tested-by: Geert Uytterhoeven Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sh-pfc/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c index 181ea98a63b7..2b0d70217bbd 100644 --- a/drivers/pinctrl/sh-pfc/core.c +++ b/drivers/pinctrl/sh-pfc/core.c @@ -545,7 +545,9 @@ static int sh_pfc_probe(struct platform_device *pdev) return ret; } - pinctrl_provide_dummies(); + /* Enable dummy states for those platforms without pinctrl support */ + if (!of_have_populated_dt()) + pinctrl_provide_dummies(); ret = sh_pfc_init_ranges(pfc); if (ret < 0) From 6bf00fff3c7b0ee91ccb9257ffadeb2f3a335069 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 12 Mar 2016 19:44:57 +0100 Subject: [PATCH 397/797] pinctrl: sunxi: Fix A33 external interrupts not working commit 5e7515ba78fff2f5407eaa2f97c1d5c07801ac3d upstream. pinctrl-sun8i-a33.c (and the dts) declare only 2 interrupt banks, where as the closely related a23 has 3 banks. This matches with the datasheet for the A33 where only interrupt banks B and G are specified where as the A23 has banks A, B and G. However the A33 being the A23 derative it is means that the interrupt configure/status io-addresses for the 2 banks it has are not changed from the A23, iow they have the same address as if bank A was still present. Where as the sunxi pinctrl currently tries to use the A23 bank A addresses for bank B, since the pinctrl code does not know about the removed bank A. Add a irq_bank_base parameter and use this where appropriate to take the missing bank A into account. This fixes external interrupts not working on the A33 (tested with an i2c touchscreen controller which uses an external interrupt). Signed-off-by: Hans de Goede Acked-by: Maxime Ripard Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c | 1 + drivers/pinctrl/sunxi/pinctrl-sunxi.c | 17 ++++++++++------- drivers/pinctrl/sunxi/pinctrl-sunxi.h | 21 +++++++++++---------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c index 00265f0435a7..8b381d69df86 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c @@ -485,6 +485,7 @@ static const struct sunxi_pinctrl_desc sun8i_a33_pinctrl_data = { .pins = sun8i_a33_pins, .npins = ARRAY_SIZE(sun8i_a33_pins), .irq_banks = 2, + .irq_bank_base = 1, }; static int sun8i_a33_pinctrl_probe(struct platform_device *pdev) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index dead97daca35..a4a5b504c532 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -578,7 +578,7 @@ static void sunxi_pinctrl_irq_release_resources(struct irq_data *d) static int sunxi_pinctrl_irq_set_type(struct irq_data *d, unsigned int type) { struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d); - u32 reg = sunxi_irq_cfg_reg(d->hwirq); + u32 reg = sunxi_irq_cfg_reg(d->hwirq, pctl->desc->irq_bank_base); u8 index = sunxi_irq_cfg_offset(d->hwirq); unsigned long flags; u32 regval; @@ -625,7 +625,8 @@ static int sunxi_pinctrl_irq_set_type(struct irq_data *d, unsigned int type) static void sunxi_pinctrl_irq_ack(struct irq_data *d) { struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d); - u32 status_reg = sunxi_irq_status_reg(d->hwirq); + u32 status_reg = sunxi_irq_status_reg(d->hwirq, + pctl->desc->irq_bank_base); u8 status_idx = sunxi_irq_status_offset(d->hwirq); /* Clear the IRQ */ @@ -635,7 +636,7 @@ static void sunxi_pinctrl_irq_ack(struct irq_data *d) static void sunxi_pinctrl_irq_mask(struct irq_data *d) { struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d); - u32 reg = sunxi_irq_ctrl_reg(d->hwirq); + u32 reg = sunxi_irq_ctrl_reg(d->hwirq, pctl->desc->irq_bank_base); u8 idx = sunxi_irq_ctrl_offset(d->hwirq); unsigned long flags; u32 val; @@ -652,7 +653,7 @@ static void sunxi_pinctrl_irq_mask(struct irq_data *d) static void sunxi_pinctrl_irq_unmask(struct irq_data *d) { struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d); - u32 reg = sunxi_irq_ctrl_reg(d->hwirq); + u32 reg = sunxi_irq_ctrl_reg(d->hwirq, pctl->desc->irq_bank_base); u8 idx = sunxi_irq_ctrl_offset(d->hwirq); unsigned long flags; u32 val; @@ -744,7 +745,7 @@ static void sunxi_pinctrl_irq_handler(struct irq_desc *desc) if (bank == pctl->desc->irq_banks) return; - reg = sunxi_irq_status_reg_from_bank(bank); + reg = sunxi_irq_status_reg_from_bank(bank, pctl->desc->irq_bank_base); val = readl(pctl->membase + reg); if (val) { @@ -1023,9 +1024,11 @@ int sunxi_pinctrl_init(struct platform_device *pdev, for (i = 0; i < pctl->desc->irq_banks; i++) { /* Mask and clear all IRQs before registering a handler */ - writel(0, pctl->membase + sunxi_irq_ctrl_reg_from_bank(i)); + writel(0, pctl->membase + sunxi_irq_ctrl_reg_from_bank(i, + pctl->desc->irq_bank_base)); writel(0xffffffff, - pctl->membase + sunxi_irq_status_reg_from_bank(i)); + pctl->membase + sunxi_irq_status_reg_from_bank(i, + pctl->desc->irq_bank_base)); irq_set_chained_handler_and_data(pctl->irq[i], sunxi_pinctrl_irq_handler, diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.h b/drivers/pinctrl/sunxi/pinctrl-sunxi.h index e248e81a0f9e..0afce1ab12d0 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.h +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.h @@ -97,6 +97,7 @@ struct sunxi_pinctrl_desc { int npins; unsigned pin_base; unsigned irq_banks; + unsigned irq_bank_base; bool irq_read_needs_mux; }; @@ -233,12 +234,12 @@ static inline u32 sunxi_pull_offset(u16 pin) return pin_num * PULL_PINS_BITS; } -static inline u32 sunxi_irq_cfg_reg(u16 irq) +static inline u32 sunxi_irq_cfg_reg(u16 irq, unsigned bank_base) { u8 bank = irq / IRQ_PER_BANK; u8 reg = (irq % IRQ_PER_BANK) / IRQ_CFG_IRQ_PER_REG * 0x04; - return IRQ_CFG_REG + bank * IRQ_MEM_SIZE + reg; + return IRQ_CFG_REG + (bank_base + bank) * IRQ_MEM_SIZE + reg; } static inline u32 sunxi_irq_cfg_offset(u16 irq) @@ -247,16 +248,16 @@ static inline u32 sunxi_irq_cfg_offset(u16 irq) return irq_num * IRQ_CFG_IRQ_BITS; } -static inline u32 sunxi_irq_ctrl_reg_from_bank(u8 bank) +static inline u32 sunxi_irq_ctrl_reg_from_bank(u8 bank, unsigned bank_base) { - return IRQ_CTRL_REG + bank * IRQ_MEM_SIZE; + return IRQ_CTRL_REG + (bank_base + bank) * IRQ_MEM_SIZE; } -static inline u32 sunxi_irq_ctrl_reg(u16 irq) +static inline u32 sunxi_irq_ctrl_reg(u16 irq, unsigned bank_base) { u8 bank = irq / IRQ_PER_BANK; - return sunxi_irq_ctrl_reg_from_bank(bank); + return sunxi_irq_ctrl_reg_from_bank(bank, bank_base); } static inline u32 sunxi_irq_ctrl_offset(u16 irq) @@ -265,16 +266,16 @@ static inline u32 sunxi_irq_ctrl_offset(u16 irq) return irq_num * IRQ_CTRL_IRQ_BITS; } -static inline u32 sunxi_irq_status_reg_from_bank(u8 bank) +static inline u32 sunxi_irq_status_reg_from_bank(u8 bank, unsigned bank_base) { - return IRQ_STATUS_REG + bank * IRQ_MEM_SIZE; + return IRQ_STATUS_REG + (bank_base + bank) * IRQ_MEM_SIZE; } -static inline u32 sunxi_irq_status_reg(u16 irq) +static inline u32 sunxi_irq_status_reg(u16 irq, unsigned bank_base) { u8 bank = irq / IRQ_PER_BANK; - return sunxi_irq_status_reg_from_bank(bank); + return sunxi_irq_status_reg_from_bank(bank, bank_base); } static inline u32 sunxi_irq_status_offset(u16 irq) From e7c6abc464ac71ca734ea57a778872793d51ff29 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 24 Mar 2016 13:15:45 +0100 Subject: [PATCH 398/797] pinctrl: nomadik: fix pull debug print inversion commit 6ee334559324a55725e22463de633b99ad99fcad upstream. Pull up was reported as pull down and vice versa. Fix this. Fixes: 8f1774a2a971 "pinctrl: nomadik: improve GPIO debug prints" Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/nomadik/pinctrl-nomadik.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index eebfae0c9b7c..f844b4ae7f79 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -995,7 +995,7 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s, int val; if (pull) - pullidx = data_out ? 1 : 2; + pullidx = data_out ? 2 : 1; seq_printf(s, " gpio-%-3d (%-20.20s) in %s %s", gpio, From 2e638de516983b9575c845e878e1c41f5bcd2509 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 9 Mar 2016 02:45:36 +0200 Subject: [PATCH 399/797] pinctrl: freescale: imx: fix bogus check of of_iomap() return value commit 9a4f424531dabd877259ae0071b8bcc4dede9eb5 upstream. On error path of_iomap() returns NULL, hence IS_ERR() check is invalid and may cause a NULL pointer dereference, the change fixes this problem. While we are here invert a device node check to simplify the code. Fixes: 26d8cde5260b ("pinctrl: freescale: imx: add shared input select reg support") Signed-off-by: Vladimir Zapolskiy Acked-by: Shawn Guo Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/freescale/pinctrl-imx.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index a5bb93987378..1029aa7889b5 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -726,19 +726,18 @@ int imx_pinctrl_probe(struct platform_device *pdev, if (of_property_read_bool(dev_np, "fsl,input-sel")) { np = of_parse_phandle(dev_np, "fsl,input-sel", 0); - if (np) { - ipctl->input_sel_base = of_iomap(np, 0); - if (IS_ERR(ipctl->input_sel_base)) { - of_node_put(np); - dev_err(&pdev->dev, - "iomuxc input select base address not found\n"); - return PTR_ERR(ipctl->input_sel_base); - } - } else { + if (!np) { dev_err(&pdev->dev, "iomuxc fsl,input-sel property not found\n"); return -EINVAL; } + + ipctl->input_sel_base = of_iomap(np, 0); of_node_put(np); + if (!ipctl->input_sel_base) { + dev_err(&pdev->dev, + "iomuxc input select base address not found\n"); + return -ENOMEM; + } } imx_pinctrl_desc.name = dev_name(&pdev->dev); From 5046b85eef40f725f509cae12a954f445c9c3e59 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 22 Mar 2016 01:04:05 -0300 Subject: [PATCH 400/797] au0828: fix au0828_v4l2_close() dev_state race condition commit ed940cd27416f9887864b95e1f8f8845aa9d6391 upstream. au0828_v4l2_close() check for dev_state == DEV_DISCONNECTED will fail to detect the device disconnected state correctly, if au0828_v4l2_open() runs to set the DEV_INITIALIZED bit. A loop test of bind/unbind found this bug by increasing the likelihood of au0828_v4l2_open() occurring while unbind is in progress. When au0828_v4l2_close() fails to detect that the device is in disconnect state, it attempts to power down the device and fails with the following general protection fault: [ 260.992962] Call Trace: [ 260.993008] [] ? xc5000_sleep+0x8f/0xd0 [xc5000] [ 260.993095] [] ? fe_standby+0x3c/0x50 [tuner] [ 260.993186] [] au0828_v4l2_close+0x53c/0x620 [au0828] [ 260.993298] [] v4l2_release+0xf0/0x210 [videodev] [ 260.993382] [] __fput+0x1fc/0x6c0 [ 260.993449] [] ____fput+0xe/0x10 [ 260.993519] [] task_work_run+0x133/0x1f0 [ 260.993602] [] exit_to_usermode_loop+0x140/0x170 [ 260.993681] [] syscall_return_slowpath+0x16a/0x1a0 [ 260.993754] [] entry_SYSCALL_64_fastpath+0xa6/0xa8 Signed-off-by: Shuah Khan Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/au0828/au0828-video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c index 45c622e234f7..36fde46ea2cf 100644 --- a/drivers/media/usb/au0828/au0828-video.c +++ b/drivers/media/usb/au0828/au0828-video.c @@ -1007,7 +1007,7 @@ static int au0828_v4l2_close(struct file *filp) del_timer_sync(&dev->vbi_timeout); } - if (dev->dev_state == DEV_DISCONNECTED) + if (dev->dev_state & DEV_DISCONNECTED) goto end; if (dev->users == 1) { From 797e21364c9323ccdb2e8e712d5ef66e7f277798 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 22 Mar 2016 09:21:57 -0300 Subject: [PATCH 401/797] au0828: Fix dev_state handling commit e8e3039f5b941f7825d335f8ca11c12a8104db11 upstream. The au0828 dev_state is actually a bit mask. It should not be checking with "==" but, instead, with a logic and. There are some places where it was doing it wrong. Fix that by replacing the dev_state set/clear/test with the bitops. As reviewed by Shuah: "Looks good. Tested running bind/unbind au0828 loop for 1000 times. Didn't see any problems and the v4l2_querycap() problem has been fixed with this patch. After the above test, ran bind/unbind snd_usb_audio 1000 times. Didn't see any problems. Generated media graph and the graph looks good." Reviewed-by: Shuah Khan Tested-by: Shuah Khan Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/au0828/au0828-core.c | 2 +- drivers/media/usb/au0828/au0828-input.c | 4 +- drivers/media/usb/au0828/au0828-video.c | 63 ++++++++++++------------- drivers/media/usb/au0828/au0828.h | 9 ++-- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c index 0934024fb89d..d91ded795c93 100644 --- a/drivers/media/usb/au0828/au0828-core.c +++ b/drivers/media/usb/au0828/au0828-core.c @@ -159,7 +159,7 @@ static void au0828_usb_disconnect(struct usb_interface *interface) Set the status so poll routines can check and avoid access after disconnect. */ - dev->dev_state = DEV_DISCONNECTED; + set_bit(DEV_DISCONNECTED, &dev->dev_state); au0828_rc_unregister(dev); /* Digital TV */ diff --git a/drivers/media/usb/au0828/au0828-input.c b/drivers/media/usb/au0828/au0828-input.c index b0f067971979..3d6687f0407d 100644 --- a/drivers/media/usb/au0828/au0828-input.c +++ b/drivers/media/usb/au0828/au0828-input.c @@ -130,7 +130,7 @@ static int au0828_get_key_au8522(struct au0828_rc *ir) bool first = true; /* do nothing if device is disconnected */ - if (ir->dev->dev_state == DEV_DISCONNECTED) + if (test_bit(DEV_DISCONNECTED, &ir->dev->dev_state)) return 0; /* Check IR int */ @@ -260,7 +260,7 @@ static void au0828_rc_stop(struct rc_dev *rc) cancel_delayed_work_sync(&ir->work); /* do nothing if device is disconnected */ - if (ir->dev->dev_state != DEV_DISCONNECTED) { + if (!test_bit(DEV_DISCONNECTED, &ir->dev->dev_state)) { /* Disable IR */ au8522_rc_clear(ir, 0xe0, 1 << 4); } diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c index 36fde46ea2cf..7b2fe1b56039 100644 --- a/drivers/media/usb/au0828/au0828-video.c +++ b/drivers/media/usb/au0828/au0828-video.c @@ -104,14 +104,13 @@ static inline void print_err_status(struct au0828_dev *dev, static int check_dev(struct au0828_dev *dev) { - if (dev->dev_state & DEV_DISCONNECTED) { + if (test_bit(DEV_DISCONNECTED, &dev->dev_state)) { pr_info("v4l2 ioctl: device not present\n"); return -ENODEV; } - if (dev->dev_state & DEV_MISCONFIGURED) { - pr_info("v4l2 ioctl: device is misconfigured; " - "close and open it again\n"); + if (test_bit(DEV_MISCONFIGURED, &dev->dev_state)) { + pr_info("v4l2 ioctl: device is misconfigured; close and open it again\n"); return -EIO; } return 0; @@ -519,8 +518,8 @@ static inline int au0828_isoc_copy(struct au0828_dev *dev, struct urb *urb) if (!dev) return 0; - if ((dev->dev_state & DEV_DISCONNECTED) || - (dev->dev_state & DEV_MISCONFIGURED)) + if (test_bit(DEV_DISCONNECTED, &dev->dev_state) || + test_bit(DEV_MISCONFIGURED, &dev->dev_state)) return 0; if (urb->status < 0) { @@ -766,10 +765,10 @@ static int au0828_stream_interrupt(struct au0828_dev *dev) int ret = 0; dev->stream_state = STREAM_INTERRUPT; - if (dev->dev_state == DEV_DISCONNECTED) + if (test_bit(DEV_DISCONNECTED, &dev->dev_state)) return -ENODEV; else if (ret) { - dev->dev_state = DEV_MISCONFIGURED; + set_bit(DEV_MISCONFIGURED, &dev->dev_state); dprintk(1, "%s device is misconfigured!\n", __func__); return ret; } @@ -958,7 +957,7 @@ static int au0828_v4l2_open(struct file *filp) int ret; dprintk(1, - "%s called std_set %d dev_state %d stream users %d users %d\n", + "%s called std_set %d dev_state %ld stream users %d users %d\n", __func__, dev->std_set_in_tuner_core, dev->dev_state, dev->streaming_users, dev->users); @@ -977,7 +976,7 @@ static int au0828_v4l2_open(struct file *filp) au0828_analog_stream_enable(dev); au0828_analog_stream_reset(dev); dev->stream_state = STREAM_OFF; - dev->dev_state |= DEV_INITIALIZED; + set_bit(DEV_INITIALIZED, &dev->dev_state); } dev->users++; mutex_unlock(&dev->lock); @@ -991,7 +990,7 @@ static int au0828_v4l2_close(struct file *filp) struct video_device *vdev = video_devdata(filp); dprintk(1, - "%s called std_set %d dev_state %d stream users %d users %d\n", + "%s called std_set %d dev_state %ld stream users %d users %d\n", __func__, dev->std_set_in_tuner_core, dev->dev_state, dev->streaming_users, dev->users); @@ -1007,7 +1006,7 @@ static int au0828_v4l2_close(struct file *filp) del_timer_sync(&dev->vbi_timeout); } - if (dev->dev_state & DEV_DISCONNECTED) + if (test_bit(DEV_DISCONNECTED, &dev->dev_state)) goto end; if (dev->users == 1) { @@ -1036,7 +1035,7 @@ static void au0828_init_tuner(struct au0828_dev *dev) .type = V4L2_TUNER_ANALOG_TV, }; - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); if (dev->std_set_in_tuner_core) @@ -1108,7 +1107,7 @@ static int vidioc_querycap(struct file *file, void *priv, struct video_device *vdev = video_devdata(file); struct au0828_dev *dev = video_drvdata(file); - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); strlcpy(cap->driver, "au0828", sizeof(cap->driver)); @@ -1151,7 +1150,7 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv, { struct au0828_dev *dev = video_drvdata(file); - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); f->fmt.pix.width = dev->width; @@ -1170,7 +1169,7 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, { struct au0828_dev *dev = video_drvdata(file); - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); return au0828_set_format(dev, VIDIOC_TRY_FMT, f); @@ -1182,7 +1181,7 @@ static int vidioc_s_fmt_vid_cap(struct file *file, void *priv, struct au0828_dev *dev = video_drvdata(file); int rc; - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); rc = check_dev(dev); @@ -1204,7 +1203,7 @@ static int vidioc_s_std(struct file *file, void *priv, v4l2_std_id norm) { struct au0828_dev *dev = video_drvdata(file); - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); if (norm == dev->std) @@ -1236,7 +1235,7 @@ static int vidioc_g_std(struct file *file, void *priv, v4l2_std_id *norm) { struct au0828_dev *dev = video_drvdata(file); - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); *norm = dev->std; @@ -1259,7 +1258,7 @@ static int vidioc_enum_input(struct file *file, void *priv, [AU0828_VMUX_DEBUG] = "tv debug" }; - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); tmp = input->index; @@ -1289,7 +1288,7 @@ static int vidioc_g_input(struct file *file, void *priv, unsigned int *i) { struct au0828_dev *dev = video_drvdata(file); - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); *i = dev->ctrl_input; @@ -1300,7 +1299,7 @@ static void au0828_s_input(struct au0828_dev *dev, int index) { int i; - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); switch (AUVI_INPUT(index).type) { @@ -1385,7 +1384,7 @@ static int vidioc_g_audio(struct file *file, void *priv, struct v4l2_audio *a) { struct au0828_dev *dev = video_drvdata(file); - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); a->index = dev->ctrl_ainput; @@ -1405,7 +1404,7 @@ static int vidioc_s_audio(struct file *file, void *priv, const struct v4l2_audio if (a->index != dev->ctrl_ainput) return -EINVAL; - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); return 0; } @@ -1417,7 +1416,7 @@ static int vidioc_g_tuner(struct file *file, void *priv, struct v4l2_tuner *t) if (t->index != 0) return -EINVAL; - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); strcpy(t->name, "Auvitek tuner"); @@ -1437,7 +1436,7 @@ static int vidioc_s_tuner(struct file *file, void *priv, if (t->index != 0) return -EINVAL; - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); au0828_init_tuner(dev); @@ -1459,7 +1458,7 @@ static int vidioc_g_frequency(struct file *file, void *priv, if (freq->tuner != 0) return -EINVAL; - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); freq->frequency = dev->ctrl_freq; return 0; @@ -1474,7 +1473,7 @@ static int vidioc_s_frequency(struct file *file, void *priv, if (freq->tuner != 0) return -EINVAL; - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); au0828_init_tuner(dev); @@ -1500,7 +1499,7 @@ static int vidioc_g_fmt_vbi_cap(struct file *file, void *priv, { struct au0828_dev *dev = video_drvdata(file); - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); format->fmt.vbi.samples_per_line = dev->vbi_width; @@ -1526,7 +1525,7 @@ static int vidioc_cropcap(struct file *file, void *priv, if (cc->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); cc->bounds.left = 0; @@ -1548,7 +1547,7 @@ static int vidioc_g_register(struct file *file, void *priv, { struct au0828_dev *dev = video_drvdata(file); - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); reg->val = au0828_read(dev, reg->reg); @@ -1561,7 +1560,7 @@ static int vidioc_s_register(struct file *file, void *priv, { struct au0828_dev *dev = video_drvdata(file); - dprintk(1, "%s called std_set %d dev_state %d\n", __func__, + dprintk(1, "%s called std_set %d dev_state %ld\n", __func__, dev->std_set_in_tuner_core, dev->dev_state); return au0828_writereg(dev, reg->reg, reg->val); diff --git a/drivers/media/usb/au0828/au0828.h b/drivers/media/usb/au0828/au0828.h index 60b59391ea2a..d1b6405a05a4 100644 --- a/drivers/media/usb/au0828/au0828.h +++ b/drivers/media/usb/au0828/au0828.h @@ -21,6 +21,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -122,9 +123,9 @@ enum au0828_stream_state { /* device state */ enum au0828_dev_state { - DEV_INITIALIZED = 0x01, - DEV_DISCONNECTED = 0x02, - DEV_MISCONFIGURED = 0x04 + DEV_INITIALIZED = 0, + DEV_DISCONNECTED = 1, + DEV_MISCONFIGURED = 2 }; struct au0828_dev; @@ -248,7 +249,7 @@ struct au0828_dev { int input_type; int std_set_in_tuner_core; unsigned int ctrl_input; - enum au0828_dev_state dev_state; + long unsigned int dev_state; /* defined at enum au0828_dev_state */; enum au0828_stream_state stream_state; wait_queue_head_t open; From 4d4b032bd7bb43650a71dd804eb9bbb589d3d818 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 26 Feb 2016 08:21:35 -0300 Subject: [PATCH 402/797] coda: fix error path in case of missing pdata on non-DT platform commit bc717d5e92c8c079280eb4acbe335c6f25041aa2 upstream. If we bail out this early, v4l2_device_register() has not been called yet, so no need to call v4l2_device_unregister(). Fixes: b7bd660a51f0 ("[media] coda: Call v4l2_device_unregister() from a single location") Reported-by: Michael Olbrich Signed-off-by: Philipp Zabel Reviewed-by: Fabio Estevam Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/coda/coda-common.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 15516a6e3a39..323aad3c89de 100644 --- a/drivers/media/platform/coda/coda-common.c +++ b/drivers/media/platform/coda/coda-common.c @@ -2119,14 +2119,12 @@ static int coda_probe(struct platform_device *pdev) pdev_id = of_id ? of_id->data : platform_get_device_id(pdev); - if (of_id) { + if (of_id) dev->devtype = of_id->data; - } else if (pdev_id) { + else if (pdev_id) dev->devtype = &coda_devdata[pdev_id->driver_data]; - } else { - ret = -EINVAL; - goto err_v4l2_register; - } + else + return -EINVAL; spin_lock_init(&dev->irqlock); INIT_LIST_HEAD(&dev->instances); From 5da980e3a596b34825678f5892d2e8a23626049b Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 9 Sep 2015 11:38:56 -0300 Subject: [PATCH 403/797] v4l: vsp1: Set the SRU CTRL0 register when starting the stream commit f6acfcdc5b8cdc9ddd53a459361820b9efe958c4 upstream. Commit 58f896d859ce ("[media] v4l: vsp1: sru: Make the intensity controllable during streaming") refactored the stream start code and removed the SRU CTRL0 register write by mistake. Add it back. Fixes: 58f896d859ce ("[media] v4l: vsp1: sru: Make the intensity controllable during streaming") Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/vsp1/vsp1_sru.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/vsp1/vsp1_sru.c b/drivers/media/platform/vsp1/vsp1_sru.c index 6310acab60e7..d41ae950d1a1 100644 --- a/drivers/media/platform/vsp1/vsp1_sru.c +++ b/drivers/media/platform/vsp1/vsp1_sru.c @@ -154,6 +154,7 @@ static int sru_s_stream(struct v4l2_subdev *subdev, int enable) mutex_lock(sru->ctrls.lock); ctrl0 |= vsp1_sru_read(sru, VI6_SRU_CTRL0) & (VI6_SRU_CTRL0_PARAM0_MASK | VI6_SRU_CTRL0_PARAM1_MASK); + vsp1_sru_write(sru, VI6_SRU_CTRL0, ctrl0); mutex_unlock(sru->ctrls.lock); vsp1_sru_write(sru, VI6_SRU_CTRL1, VI6_SRU_CTRL1_PARAM5); From e4f2138e3ce6dd2ab4f7cf0ebef8dd44320deedb Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Wed, 2 Mar 2016 10:34:43 +0100 Subject: [PATCH 404/797] pcmcia: db1xxx_ss: fix last irq_to_gpio user commit e34b6fcf9b09ec9d93503edd5f81489791ffd602 upstream. remove the usage of removed irq_to_gpio() function. On pre-DB1200 boards, pass the actual carddetect GPIO number instead of the IRQ, because we need the gpio to actually test card status (inserted or not) and can get the irq number with gpio_to_irq() instead. Tested on DB1300 and DB1500, this patch fixes PCMCIA on the DB1500, which used irq_to_gpio(). Fixes: 832f5dacfa0b ("MIPS: Remove all the uses of custom gpio.h") Signed-off-by: Manuel Lauss Acked-by: Arnd Bergmann Reviewed-by: Linus Walleij Cc: linux-pcmcia@lists.infradead.org Cc: Linux-MIPS Patchwork: https://patchwork.linux-mips.org/patch/12747/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/alchemy/devboards/db1000.c | 18 ++++++++---------- arch/mips/alchemy/devboards/db1550.c | 4 ++-- drivers/pcmcia/db1xxx_ss.c | 11 +++++++++-- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/mips/alchemy/devboards/db1000.c b/arch/mips/alchemy/devboards/db1000.c index bdeed9d13c6f..433c4b9a9f0a 100644 --- a/arch/mips/alchemy/devboards/db1000.c +++ b/arch/mips/alchemy/devboards/db1000.c @@ -503,15 +503,15 @@ int __init db1000_dev_setup(void) if (board == BCSR_WHOAMI_DB1500) { c0 = AU1500_GPIO2_INT; c1 = AU1500_GPIO5_INT; - d0 = AU1500_GPIO0_INT; - d1 = AU1500_GPIO3_INT; + d0 = 0; /* GPIO number, NOT irq! */ + d1 = 3; /* GPIO number, NOT irq! */ s0 = AU1500_GPIO1_INT; s1 = AU1500_GPIO4_INT; } else if (board == BCSR_WHOAMI_DB1100) { c0 = AU1100_GPIO2_INT; c1 = AU1100_GPIO5_INT; - d0 = AU1100_GPIO0_INT; - d1 = AU1100_GPIO3_INT; + d0 = 0; /* GPIO number, NOT irq! */ + d1 = 3; /* GPIO number, NOT irq! */ s0 = AU1100_GPIO1_INT; s1 = AU1100_GPIO4_INT; @@ -545,15 +545,15 @@ int __init db1000_dev_setup(void) } else if (board == BCSR_WHOAMI_DB1000) { c0 = AU1000_GPIO2_INT; c1 = AU1000_GPIO5_INT; - d0 = AU1000_GPIO0_INT; - d1 = AU1000_GPIO3_INT; + d0 = 0; /* GPIO number, NOT irq! */ + d1 = 3; /* GPIO number, NOT irq! */ s0 = AU1000_GPIO1_INT; s1 = AU1000_GPIO4_INT; platform_add_devices(db1000_devs, ARRAY_SIZE(db1000_devs)); } else if ((board == BCSR_WHOAMI_PB1500) || (board == BCSR_WHOAMI_PB1500R2)) { c0 = AU1500_GPIO203_INT; - d0 = AU1500_GPIO201_INT; + d0 = 1; /* GPIO number, NOT irq! */ s0 = AU1500_GPIO202_INT; twosocks = 0; flashsize = 64; @@ -566,7 +566,7 @@ int __init db1000_dev_setup(void) */ } else if (board == BCSR_WHOAMI_PB1100) { c0 = AU1100_GPIO11_INT; - d0 = AU1100_GPIO9_INT; + d0 = 9; /* GPIO number, NOT irq! */ s0 = AU1100_GPIO10_INT; twosocks = 0; flashsize = 64; @@ -583,7 +583,6 @@ int __init db1000_dev_setup(void) } else return 0; /* unknown board, no further dev setup to do */ - irq_set_irq_type(d0, IRQ_TYPE_EDGE_BOTH); irq_set_irq_type(c0, IRQ_TYPE_LEVEL_LOW); irq_set_irq_type(s0, IRQ_TYPE_LEVEL_LOW); @@ -597,7 +596,6 @@ int __init db1000_dev_setup(void) c0, d0, /*s0*/0, 0, 0); if (twosocks) { - irq_set_irq_type(d1, IRQ_TYPE_EDGE_BOTH); irq_set_irq_type(c1, IRQ_TYPE_LEVEL_LOW); irq_set_irq_type(s1, IRQ_TYPE_LEVEL_LOW); diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c index 5740bcfdfc7f..6c37b9326f41 100644 --- a/arch/mips/alchemy/devboards/db1550.c +++ b/arch/mips/alchemy/devboards/db1550.c @@ -514,7 +514,7 @@ static void __init db1550_devices(void) AU1000_PCMCIA_MEM_PHYS_ADDR + 0x000400000 - 1, AU1000_PCMCIA_IO_PHYS_ADDR, AU1000_PCMCIA_IO_PHYS_ADDR + 0x000010000 - 1, - AU1550_GPIO3_INT, AU1550_GPIO0_INT, + AU1550_GPIO3_INT, 0, /*AU1550_GPIO21_INT*/0, 0, 0); db1x_register_pcmcia_socket( @@ -524,7 +524,7 @@ static void __init db1550_devices(void) AU1000_PCMCIA_MEM_PHYS_ADDR + 0x004400000 - 1, AU1000_PCMCIA_IO_PHYS_ADDR + 0x004000000, AU1000_PCMCIA_IO_PHYS_ADDR + 0x004010000 - 1, - AU1550_GPIO5_INT, AU1550_GPIO1_INT, + AU1550_GPIO5_INT, 1, /*AU1550_GPIO22_INT*/0, 0, 1); platform_device_register(&db1550_nand_dev); diff --git a/drivers/pcmcia/db1xxx_ss.c b/drivers/pcmcia/db1xxx_ss.c index 4c2fa05b4589..944674ee3464 100644 --- a/drivers/pcmcia/db1xxx_ss.c +++ b/drivers/pcmcia/db1xxx_ss.c @@ -56,6 +56,7 @@ struct db1x_pcmcia_sock { int stschg_irq; /* card-status-change irq */ int card_irq; /* card irq */ int eject_irq; /* db1200/pb1200 have these */ + int insert_gpio; /* db1000 carddetect gpio */ #define BOARD_TYPE_DEFAULT 0 /* most boards */ #define BOARD_TYPE_DB1200 1 /* IRQs aren't gpios */ @@ -83,7 +84,7 @@ static int db1200_card_inserted(struct db1x_pcmcia_sock *sock) /* carddetect gpio: low-active */ static int db1000_card_inserted(struct db1x_pcmcia_sock *sock) { - return !gpio_get_value(irq_to_gpio(sock->insert_irq)); + return !gpio_get_value(sock->insert_gpio); } static int db1x_card_inserted(struct db1x_pcmcia_sock *sock) @@ -457,9 +458,15 @@ static int db1x_pcmcia_socket_probe(struct platform_device *pdev) r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "card"); sock->card_irq = r ? r->start : 0; - /* insert: irq which triggers on card insertion/ejection */ + /* insert: irq which triggers on card insertion/ejection + * BIG FAT NOTE: on DB1000/1100/1500/1550 we pass a GPIO here! + */ r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "insert"); sock->insert_irq = r ? r->start : -1; + if (sock->board_type == BOARD_TYPE_DEFAULT) { + sock->insert_gpio = r ? r->start : -1; + sock->insert_irq = r ? gpio_to_irq(r->start) : -1; + } /* stschg: irq which trigger on card status change (optional) */ r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "stschg"); From 06e2d7dd90cbafd2d911f86785b69cec1bcd3b02 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Tue, 5 Apr 2016 11:13:39 +0200 Subject: [PATCH 405/797] rbd: use GFP_NOIO consistently for request allocations commit 2224d879c7c0f85c14183ef82eb48bd875ceb599 upstream. As of 5a60e87603c4c533492c515b7f62578189b03c9c, RBD object request allocations are made via rbd_obj_request_create() with GFP_NOIO. However, subsequent OSD request allocations in rbd_osd_req_create*() use GFP_ATOMIC. With heavy page cache usage (e.g. OSDs running on same host as krbd client), rbd_osd_req_create() order-1 GFP_ATOMIC allocations have been observed to fail, where direct reclaim would have allowed GFP_NOIO allocations to succeed. Suggested-by: Vlastimil Babka Suggested-by: Neil Brown Signed-off-by: David Disseldorp Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 81ea69fee7ca..fbdddd6f94b8 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1955,7 +1955,7 @@ static struct ceph_osd_request *rbd_osd_req_create( osdc = &rbd_dev->rbd_client->client->osdc; osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, - GFP_ATOMIC); + GFP_NOIO); if (!osd_req) return NULL; /* ENOMEM */ @@ -2004,7 +2004,7 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) rbd_dev = img_request->rbd_dev; osdc = &rbd_dev->rbd_client->client->osdc; osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops, - false, GFP_ATOMIC); + false, GFP_NOIO); if (!osd_req) return NULL; /* ENOMEM */ @@ -2506,7 +2506,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, bio_chain_clone_range(&bio_list, &bio_offset, clone_size, - GFP_ATOMIC); + GFP_NOIO); if (!obj_request->bio_list) goto out_unwind; } else if (type == OBJ_REQUEST_PAGES) { From 5f6e35c2f9464c8b67b54ea5a919105c5f46f514 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 3 Apr 2016 15:23:37 +0300 Subject: [PATCH 406/797] virtio: virtio 1.0 cs04 spec compliance for reset commit 05dbcb430795b2e1fb1d5c757f8619d3dbed0a1c upstream. The spec says: after writing 0 to device_status, the driver MUST wait for a read of device_status to return 0 before reinitializing the device. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_pci_modern.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 8e5cf194cc0b..4469202eaa8e 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -17,6 +17,7 @@ * */ +#include #define VIRTIO_PCI_NO_LEGACY #include "virtio_pci_common.h" @@ -271,9 +272,13 @@ static void vp_reset(struct virtio_device *vdev) struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* 0 status means a reset. */ vp_iowrite8(0, &vp_dev->common->device_status); - /* Flush out the status write, and flush in device writes, - * including MSI-X interrupts, if any. */ - vp_ioread8(&vp_dev->common->device_status); + /* After writing 0 to device_status, the driver MUST wait for a read of + * device_status to return 0 before reinitializing the device. + * This will flush out the status write, and flush in device writes, + * including MSI-X interrupts, if any. + */ + while (vp_ioread8(&vp_dev->common->device_status)) + msleep(1); /* Flush pending VQ/configuration callbacks. */ vp_synchronize_vectors(vdev); } From 7de4ebacd23756a6862e5691f326b807ccc4da91 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 31 Mar 2016 17:22:45 +0200 Subject: [PATCH 407/797] mac80211: properly deal with station hashtable insert errors commit 62b14b241ca6f790a17ccd9dd9f62ce1b006d406 upstream. The original hand-implemented hash-table in mac80211 couldn't result in insertion errors, and while converting to rhashtable I evidently forgot to check the errors. This surfaced now only because Ben is adding many identical keys and that resulted in hidden insertion errors. Fixes: 7bedd0cfad4e1 ("mac80211: use rhashtable for station table") Reported-by: Ben Greear Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/sta_info.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f91d1873218c..4e2dbe52fd9c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -256,11 +256,11 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) } /* Caller must hold local->sta_mtx */ -static void sta_info_hash_add(struct ieee80211_local *local, - struct sta_info *sta) +static int sta_info_hash_add(struct ieee80211_local *local, + struct sta_info *sta) { - rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -503,7 +503,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_BLOCK_BA); /* make the station visible */ - sta_info_hash_add(local, sta); + err = sta_info_hash_add(local, sta); + if (err) + goto out_drop_sta; list_add_tail_rcu(&sta->list, &local->sta_list); @@ -538,6 +540,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_remove: sta_info_hash_del(local, sta); list_del_rcu(&sta->list); + out_drop_sta: local->num_sta--; synchronize_net(); __cleanup_single_sta(sta); From 9da98ac378b2aaccdaa02ba10c87123eb997ff22 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Jan 2016 23:05:31 +0100 Subject: [PATCH 408/797] mac80211: avoid excessive stack usage in sta_info commit 0ef049dc1167fe834d0ad5d63f89eddc5c70f6e4 upstream. When CONFIG_OPTIMIZE_INLINING is set, the sta_info_insert_finish function consumes more stack than normally, exceeding the 1024 byte limit on ARM: net/mac80211/sta_info.c: In function 'sta_info_insert_finish': net/mac80211/sta_info.c:561:1: error: the frame size of 1080 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] It turns out that there are two functions that put a 'struct station_info' on the stack: __sta_info_destroy_part2 and sta_info_insert_finish, and this structure alone requires up to 792 bytes. Hoping that both are called rarely enough, this replaces the on-stack structure with a dynamic allocation, which unfortunately requires some suboptimal error handling for out-of-memory. The __sta_info_destroy_part2 function is actually affected by the stack usage twice because it calls cfg80211_del_sta_sinfo(), which has another instance of struct station_info on its stack. Signed-off-by: Arnd Bergmann Fixes: 98b6218388e3 ("mac80211/cfg80211: add station events") Fixes: 6f7a8d26e266 ("mac80211: send statistics with delete station event") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/sta_info.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 4e2dbe52fd9c..67066d048e6f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -484,11 +484,17 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - struct station_info sinfo; + struct station_info *sinfo; int err = 0; lockdep_assert_held(&local->sta_mtx); + sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL); + if (!sinfo) { + err = -ENOMEM; + goto out_err; + } + /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { err = -EEXIST; @@ -522,10 +528,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); - memset(&sinfo, 0, sizeof(sinfo)); - sinfo.filled = 0; - sinfo.generation = local->sta_generation; - cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); + sinfo->generation = local->sta_generation; + cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); + kfree(sinfo); sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); @@ -885,7 +890,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - struct station_info sinfo = {}; + struct station_info *sinfo; int ret; /* @@ -923,8 +928,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta) sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); - sta_set_sinfo(sta, &sinfo); - cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); + sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL); + if (sinfo) + sta_set_sinfo(sta, sinfo); + cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); + kfree(sinfo); rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); From 2c0824ac94b5bce8d07c2aa806e311ddfdcd46fe Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 25 Jan 2016 15:46:35 +0200 Subject: [PATCH 409/797] mac80211: fix ibss scan parameters commit d321cd014e51baab475efbdec468255b9e0ec822 upstream. When joining IBSS a full scan should be initiated in order to search for existing cell, unless the fixed_channel parameter was set. A default channel to create the IBSS on if no cell was found is provided as well. However - a scan is initiated only on the default channel provided regardless of whether ifibss->fixed_channel is set or not, with the obvious result of the cell not joining existing IBSS cell that is on another channel. Fixes: 76bed0f43b27 ("mac80211: IBSS fix scan request") Signed-off-by: Sara Sharon Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/ibss.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 6a12b0f5cac8..980e9e9b6684 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -7,6 +7,7 @@ * Copyright 2007, Michael Wu * Copyright 2009, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1484,14 +1485,21 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); - num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy, - &ifibss->chandef, - channels, - ARRAY_SIZE(channels)); scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); - ieee80211_request_ibss_scan(sdata, ifibss->ssid, - ifibss->ssid_len, channels, num, - scan_width); + + if (ifibss->fixed_channel) { + num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy, + &ifibss->chandef, + channels, + ARRAY_SIZE(channels)); + ieee80211_request_ibss_scan(sdata, ifibss->ssid, + ifibss->ssid_len, channels, + num, scan_width); + } else { + ieee80211_request_ibss_scan(sdata, ifibss->ssid, + ifibss->ssid_len, NULL, + 0, scan_width); + } } else { int interval = IEEE80211_SCAN_INTERVAL; From 542afcb4e070c96ddf87436b49034a0c89ccd95b Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Mon, 25 Jan 2016 14:43:24 +0100 Subject: [PATCH 410/797] mac80211: fix unnecessary frame drops in mesh fwding commit cf44012810ccdd8fd947518e965cb04b7b8498be upstream. The ieee80211_queue_stopped() expects hw queue number but it was given raw WMM AC number instead. This could cause frame drops and problems with traffic in some cases - most notably if driver doesn't map AC numbers to queue numbers 1:1 and uses ieee80211_stop_queues() and ieee80211_wake_queue() only without ever calling ieee80211_wake_queues(). On ath10k it was possible to hit this problem in the following case: 1. wlan0 uses queue 0 (ath10k maps queues per vif) 2. offchannel uses queue 15 3. queues 1-14 are unused 4. ieee80211_stop_queues() 5. ieee80211_wake_queue(q=0) 6. ieee80211_wake_queue(q=15) (other queues are not woken up because both driver and mac80211 know other queues are unused) 7. ieee80211_rx_h_mesh_fwding() 8. ieee80211_select_queue_80211() returns 2 9. ieee80211_queue_stopped(q=2) returns true 10. frame is dropped (oops!) Fixes: d3c1597b8d1b ("mac80211: fix forwarded mesh frame queue mapping") Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4cbf36cae806..a3bb8f7f5fc5 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2250,7 +2250,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - u16 q, hdrlen; + u16 ac, q, hdrlen; hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -2319,7 +2319,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) ether_addr_equal(sdata->vif.addr, hdr->addr3)) return RX_CONTINUE; - q = ieee80211_select_queue_80211(sdata, skb, hdr); + ac = ieee80211_select_queue_80211(sdata, skb, hdr); + q = sdata->vif.hw_queue[ac]; if (ieee80211_queue_stopped(&local->hw, q)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); return RX_DROP_MONITOR; From d70ab4ad8769944e2e01e678d1c04318d15a5332 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Thu, 21 Jan 2016 14:23:07 +0100 Subject: [PATCH 411/797] mac80211: fix txq queue related crashes commit 2a58d42c1e018ad514d4e23fd33fb2ded95d3ee6 upstream. The driver can access the queue simultanously while mac80211 tears down the interface. Without spinlock protection this could lead to corrupting sk_buff_head and subsequently to an invalid pointer dereference. Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/iface.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c9e325d2e120..7a2b7915093b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -977,7 +977,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, if (sdata->vif.txq) { struct txq_info *txqi = to_txq_info(sdata->vif.txq); + spin_lock_bh(&txqi->queue.lock); ieee80211_purge_tx_queue(&local->hw, &txqi->queue); + spin_unlock_bh(&txqi->queue.lock); + atomic_set(&sdata->txqs_len[txqi->txq.ac], 0); } From f10d159204b6cdfba4d111f52a2668e960f8dd10 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 10 Mar 2016 11:30:14 +0900 Subject: [PATCH 412/797] usb: renesas_usbhs: avoid NULL pointer derefernce in usbhsf_pkt_handler() commit 894f2fc44f2f3f48c36c973b1123f6ab298be160 upstream. When unexpected situation happened (e.g. tx/rx irq happened while DMAC is used), the usbhsf_pkt_handler() was possible to cause NULL pointer dereference like the followings: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: Oops: 80000007 [#1] SMP ARM Modules linked in: usb_f_acm u_serial g_serial libcomposite CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.5.0-rc6-00842-gac57066-dirty #63 Hardware name: Generic R8A7790 (Flattened Device Tree) task: c0729c00 ti: c0724000 task.ti: c0724000 PC is at 0x0 LR is at usbhsf_pkt_handler+0xac/0x118 pc : [<00000000>] lr : [] psr: 60000193 sp : c0725db8 ip : 00000000 fp : c0725df4 r10: 00000001 r9 : 00000193 r8 : ef3ccab4 r7 : ef3cca10 r6 : eea4586c r5 : 00000000 r4 : ef19ceb4 r3 : 00000000 r2 : 0000009c r1 : c0725dc4 r0 : ef19ceb4 This patch adds a condition to avoid the dereference. Fixes: e73a989 ("usb: renesas_usbhs: add DMAEngine support") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/fifo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index c0f5c652d272..32df73820726 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -190,7 +190,8 @@ static int usbhsf_pkt_handler(struct usbhs_pipe *pipe, int type) goto __usbhs_pkt_handler_end; } - ret = func(pkt, &is_done); + if (likely(func)) + ret = func(pkt, &is_done); if (is_done) __usbhsf_pkt_del(pkt); From 46e081b3015dd0d97e443babf0a7ec2058637d83 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 10 Mar 2016 11:30:15 +0900 Subject: [PATCH 413/797] usb: renesas_usbhs: disable TX IRQ before starting TX DMAC transfer commit 6490865c67825277b29638e839850882600b48ec upstream. This patch adds a code to surely disable TX IRQ of the pipe before starting TX DMAC transfer. Otherwise, a lot of unnecessary TX IRQs may happen in rare cases when DMAC is used. Fixes: e73a989 ("usb: renesas_usbhs: add DMAEngine support") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/fifo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 32df73820726..f1893e08e51a 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -890,6 +890,7 @@ static int usbhsf_dma_prepare_push(struct usbhs_pkt *pkt, int *is_done) pkt->trans = len; + usbhsf_tx_irq_ctrl(pipe, 0); INIT_WORK(&pkt->work, xfer_work); schedule_work(&pkt->work); From 277882f94e1d2e82285eb3dc91944634174a9b18 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 4 Apr 2016 20:40:20 +0900 Subject: [PATCH 414/797] usb: renesas_usbhs: fix to avoid using a disabled ep in usbhsg_queue_done() commit 4fccb0767fdbdb781a9c5b5c15ee7b219443c89d upstream. This patch fixes an issue that usbhsg_queue_done() may cause kernel panic when dma callback is running and usb_ep_disable() is called by interrupt handler. (Especially, we can reproduce this issue using g_audio with usb-dmac driver.) For example of a flow: usbhsf_dma_complete (on tasklet) --> usbhsf_pkt_handler (on tasklet) --> usbhsg_queue_done (on tasklet) *** interrupt happened and usb_ep_disable() is called *** --> usbhsg_queue_pop (on tasklet) Then, oops happened. Fixes: e73a989 ("usb: renesas_usbhs: add DMAEngine support") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/mod_gadget.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 8f7a78e70975..fa14198daf77 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -158,10 +158,14 @@ static void usbhsg_queue_done(struct usbhs_priv *priv, struct usbhs_pkt *pkt) struct usbhs_pipe *pipe = pkt->pipe; struct usbhsg_uep *uep = usbhsg_pipe_to_uep(pipe); struct usbhsg_request *ureq = usbhsg_pkt_to_ureq(pkt); + unsigned long flags; ureq->req.actual = pkt->actual; - usbhsg_queue_pop(uep, ureq, 0); + usbhs_lock(priv, flags); + if (uep) + __usbhsg_queue_pop(uep, ureq, 0); + usbhs_unlock(priv, flags); } static void usbhsg_queue_push(struct usbhsg_uep *uep, From 01e647a4aec55a156efccdf542579830fd1516eb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 29 Mar 2016 22:27:27 +0200 Subject: [PATCH 415/797] iio: st_magn: always define ST_MAGN_TRIGGER_SET_STATE commit 9b090a98e95c2530ef0ce474e3b6218621b8ae25 upstream. When CONFIG_IIO_TRIGGER is enabled but CONFIG_IIO_BUFFER is not, we get a build error in the st_magn driver: drivers/iio/magnetometer/st_magn_core.c:573:23: error: 'ST_MAGN_TRIGGER_SET_STATE' undeclared here (not in a function) .set_trigger_state = ST_MAGN_TRIGGER_SET_STATE, ^~~~~~~~~~~~~~~~~~~~~~~~~ Apparently, this ST_MAGN_TRIGGER_SET_STATE macro was meant to be set to NULL when the definition is not available because st_magn_buffer.c is not compiled, but the alternative definition was not included in the original patch. This adds it. Signed-off-by: Arnd Bergmann Fixes: 74f5683f35fe ("iio: st_magn: Add irq trigger handling") Acked-by: Denis Ciocca Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/st_magn.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/magnetometer/st_magn.h b/drivers/iio/magnetometer/st_magn.h index 06a4d9c35581..9daca4681922 100644 --- a/drivers/iio/magnetometer/st_magn.h +++ b/drivers/iio/magnetometer/st_magn.h @@ -44,6 +44,7 @@ static inline int st_magn_allocate_ring(struct iio_dev *indio_dev) static inline void st_magn_deallocate_ring(struct iio_dev *indio_dev) { } +#define ST_MAGN_TRIGGER_SET_STATE NULL #endif /* CONFIG_IIO_BUFFER */ #endif /* ST_MAGN_H */ From 82bd14ade909c156f297ad34f0e6935931c6149f Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Tue, 29 Mar 2016 15:35:45 +0300 Subject: [PATCH 416/797] iio: accel: bmc150: fix endianness when reading axes commit 2215f31dc6f88634c1916362e922b1ecdce0a6b3 upstream. For big endian platforms, reading the axes will return invalid values. The device stores each axis value in a 16 bit little endian register. The driver uses regmap_read_bulk to get the axis value, resulting in a 16 bit little endian value. This needs to be converted to cpu endianness to work on big endian platforms. Fix endianness for big endian platforms by converting the values for the axes read from little endian to cpu. This is also partially fixed in commit b6fb9b6d6552 ("iio: accel: bmc150: optimize transfers in trigger handler"). Signed-off-by: Irina Tirdea Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/bmc150-accel-core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index 2d33f1e821db..291c61a41c9a 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -547,7 +547,7 @@ static int bmc150_accel_get_axis(struct bmc150_accel_data *data, { int ret; int axis = chan->scan_index; - unsigned int raw_val; + __le16 raw_val; mutex_lock(&data->mutex); ret = bmc150_accel_set_power_state(data, true); @@ -557,14 +557,14 @@ static int bmc150_accel_get_axis(struct bmc150_accel_data *data, } ret = regmap_bulk_read(data->regmap, BMC150_ACCEL_AXIS_TO_REG(axis), - &raw_val, 2); + &raw_val, sizeof(raw_val)); if (ret < 0) { dev_err(data->dev, "Error reading axis %d\n", axis); bmc150_accel_set_power_state(data, false); mutex_unlock(&data->mutex); return ret; } - *val = sign_extend32(raw_val >> chan->scan_type.shift, + *val = sign_extend32(le16_to_cpu(raw_val) >> chan->scan_type.shift, chan->scan_type.realbits - 1); ret = bmc150_accel_set_power_state(data, false); mutex_unlock(&data->mutex); @@ -988,6 +988,7 @@ static const struct iio_event_spec bmc150_accel_event = { .realbits = (bits), \ .storagebits = 16, \ .shift = 16 - (bits), \ + .endianness = IIO_LE, \ }, \ .event_spec = &bmc150_accel_event, \ .num_event_specs = 1 \ From 5a103e7d6bf05ff4d053ee73a001d255bbc8951a Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Mon, 28 Mar 2016 20:15:46 +0300 Subject: [PATCH 417/797] iio: gyro: bmg160: fix buffer read values commit b475c59b113db1e66eb9527ffdec3c5241c847e5 upstream. When reading gyroscope axes using iio buffers, the values returned are always 0. In the interrupt handler, the return value of the read operation is returned to the user instead of the value read. Return the value read to the user. This is also fixed in commit 82d8e5da1a33 ("iio: accel: bmg160: optimize transfers in trigger handler"). Signed-off-by: Irina Tirdea Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/gyro/bmg160_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index 02ff789852a0..0d3edf97ec3a 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -780,7 +780,7 @@ static irqreturn_t bmg160_trigger_handler(int irq, void *p) mutex_unlock(&data->mutex); goto err; } - data->buffer[i++] = ret; + data->buffer[i++] = val; } mutex_unlock(&data->mutex); From 51789682df3111fa675c1bc0a8498e142fb97bdf Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Tue, 29 Mar 2016 15:37:30 +0300 Subject: [PATCH 418/797] iio: gyro: bmg160: fix endianness when reading axes commit 95e7ff034175db7d8aefabe7716c4d42bea24fde upstream. For big endian platforms, reading the axes will return invalid values. The device stores each axis value in a 16 bit little endian register. The driver uses regmap_read_bulk to get the axis value, resulting in a 16 bit little endian value. This needs to be converted to cpu endianness to work on big endian platforms. Fix endianness for big endian platforms by converting the values for the axes read from little endian to cpu. This is also partially fixed in commit 82d8e5da1a33 ("iio: accel: bmg160: optimize transfers in trigger handler"). Signed-off-by: Irina Tirdea Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/gyro/bmg160_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index 0d3edf97ec3a..acb3b303d800 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -452,7 +452,7 @@ static int bmg160_get_temp(struct bmg160_data *data, int *val) static int bmg160_get_axis(struct bmg160_data *data, int axis, int *val) { int ret; - unsigned int raw_val; + __le16 raw_val; mutex_lock(&data->mutex); ret = bmg160_set_power_state(data, true); @@ -462,7 +462,7 @@ static int bmg160_get_axis(struct bmg160_data *data, int axis, int *val) } ret = regmap_bulk_read(data->regmap, BMG160_AXIS_TO_REG(axis), &raw_val, - 2); + sizeof(raw_val)); if (ret < 0) { dev_err(data->dev, "Error reading axis %d\n", axis); bmg160_set_power_state(data, false); @@ -470,7 +470,7 @@ static int bmg160_get_axis(struct bmg160_data *data, int axis, int *val) return ret; } - *val = sign_extend32(raw_val, 15); + *val = sign_extend32(le16_to_cpu(raw_val), 15); ret = bmg160_set_power_state(data, false); mutex_unlock(&data->mutex); if (ret < 0) @@ -733,6 +733,7 @@ static const struct iio_event_spec bmg160_event = { .sign = 's', \ .realbits = 16, \ .storagebits = 16, \ + .endianness = IIO_LE, \ }, \ .event_spec = &bmg160_event, \ .num_event_specs = 1 \ From a7ac655b62b8855116049726347a11d054efd01b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 28 Mar 2016 21:18:56 -0400 Subject: [PATCH 419/797] sd: Fix excessive capacity printing on devices with blocks bigger than 512 bytes commit f08bb1e0dbdd0297258d0b8cd4dbfcc057e57b2a upstream. During revalidate we check whether device capacity has changed before we decide whether to output disk information or not. The check for old capacity failed to take into account that we scaled sdkp->capacity based on the reported logical block size. And therefore the capacity test would always fail for devices with sectors bigger than 512 bytes and we would print several copies of the same discovery information. Avoid scaling sdkp->capacity and instead adjust the value on the fly when setting the block device capacity and generating fake C/H/S geometry. Signed-off-by: Martin K. Petersen Reported-by: Hannes Reinecke Reviewed-by: Hannes Reinicke Reviewed-by: Ewan Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 28 ++++++++-------------------- drivers/scsi/sd.h | 7 ++++++- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cc84ea7d09cc..0d7c6e86f149 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1275,18 +1275,19 @@ static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdp = sdkp->device; struct Scsi_Host *host = sdp->host; + sector_t capacity = logical_to_sectors(sdp, sdkp->capacity); int diskinfo[4]; /* default to most commonly used values */ - diskinfo[0] = 0x40; /* 1 << 6 */ - diskinfo[1] = 0x20; /* 1 << 5 */ - diskinfo[2] = sdkp->capacity >> 11; - + diskinfo[0] = 0x40; /* 1 << 6 */ + diskinfo[1] = 0x20; /* 1 << 5 */ + diskinfo[2] = capacity >> 11; + /* override with calculated, extended default, or driver values */ if (host->hostt->bios_param) - host->hostt->bios_param(sdp, bdev, sdkp->capacity, diskinfo); + host->hostt->bios_param(sdp, bdev, capacity, diskinfo); else - scsicam_bios_param(bdev, sdkp->capacity, diskinfo); + scsicam_bios_param(bdev, capacity, diskinfo); geo->heads = diskinfo[0]; geo->sectors = diskinfo[1]; @@ -2337,14 +2338,6 @@ sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer) if (sdkp->capacity > 0xffffffff) sdp->use_16_for_rw = 1; - /* Rescale capacity to 512-byte units */ - if (sector_size == 4096) - sdkp->capacity <<= 3; - else if (sector_size == 2048) - sdkp->capacity <<= 2; - else if (sector_size == 1024) - sdkp->capacity <<= 1; - blk_queue_physical_block_size(sdp->request_queue, sdkp->physical_block_size); sdkp->device->sector_size = sector_size; @@ -2812,11 +2805,6 @@ static int sd_try_extended_inquiry(struct scsi_device *sdp) return 0; } -static inline u32 logical_to_sectors(struct scsi_device *sdev, u32 blocks) -{ - return blocks << (ilog2(sdev->sector_size) - 9); -} - /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -2900,7 +2888,7 @@ static int sd_revalidate_disk(struct gendisk *disk) /* Combine with controller limits */ q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q)); - set_capacity(disk, sdkp->capacity); + set_capacity(disk, logical_to_sectors(sdp, sdkp->capacity)); sd_config_write_same(sdkp); kfree(buffer); diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 5f2a84aff29f..654630bb7d0e 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -65,7 +65,7 @@ struct scsi_disk { struct device dev; struct gendisk *disk; atomic_t openers; - sector_t capacity; /* size in 512-byte sectors */ + sector_t capacity; /* size in logical blocks */ u32 max_xfer_blocks; u32 opt_xfer_blocks; u32 max_ws_blocks; @@ -146,6 +146,11 @@ static inline int scsi_medium_access_command(struct scsi_cmnd *scmd) return 0; } +static inline sector_t logical_to_sectors(struct scsi_device *sdev, sector_t blocks) +{ + return blocks << (ilog2(sdev->sector_size) - 9); +} + /* * A DIF-capable target device can be formatted with different * protection schemes. Currently 0 through 3 are defined: From c452dfc33274832a0f23b80ff2829b6fae9dd95d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sat, 26 Mar 2016 16:14:37 -0400 Subject: [PATCH 420/797] fs: add file_dentry() commit d101a125954eae1d397adda94ca6319485a50493 upstream. This series fixes bugs in nfs and ext4 due to 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay"). Regular files opened on overlayfs will result in the file being opened on the underlying filesystem, while f_path points to the overlayfs mount/dentry. This confuses filesystems which get the dentry from struct file and assume it's theirs. Add a new helper, file_dentry() [*], to get the filesystem's own dentry from the file. This checks file->f_path.dentry->d_flags against DCACHE_OP_REAL, and returns file->f_path.dentry if DCACHE_OP_REAL is not set (this is the common, non-overlayfs case). In the uncommon case it will call into overlayfs's ->d_real() to get the underlying dentry, matching file_inode(file). The reason we need to check against the inode is that if the file is copied up while being open, d_real() would return the upper dentry, while the open file comes from the lower dentry. [*] If possible, it's better simply to use file_inode() instead. Signed-off-by: Miklos Szeredi Signed-off-by: Theodore Ts'o Tested-by: Goldwyn Rodrigues Reviewed-by: Trond Myklebust Cc: David Howells Cc: Al Viro Cc: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 5 ++++- fs/overlayfs/super.c | 33 +++++++++++++++++++++++++++++++++ include/linux/dcache.h | 10 ++++++++++ include/linux/fs.h | 10 ++++++++++ 4 files changed, 57 insertions(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index 877bcbbd03ff..18effa378f97 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1666,7 +1666,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_DELETE | - DCACHE_OP_SELECT_INODE)); + DCACHE_OP_SELECT_INODE | + DCACHE_OP_REAL)); dentry->d_op = op; if (!op) return; @@ -1684,6 +1685,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) dentry->d_flags |= DCACHE_OP_PRUNE; if (op->d_select_inode) dentry->d_flags |= DCACHE_OP_SELECT_INODE; + if (op->d_real) + dentry->d_flags |= DCACHE_OP_REAL; } EXPORT_SYMBOL(d_set_d_op); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 000b2ed05c29..a1acc6004a91 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -276,6 +276,37 @@ static void ovl_dentry_release(struct dentry *dentry) } } +static struct dentry *ovl_d_real(struct dentry *dentry, struct inode *inode) +{ + struct dentry *real; + + if (d_is_dir(dentry)) { + if (!inode || inode == d_inode(dentry)) + return dentry; + goto bug; + } + + real = ovl_dentry_upper(dentry); + if (real && (!inode || inode == d_inode(real))) + return real; + + real = ovl_dentry_lower(dentry); + if (!real) + goto bug; + + if (!inode || inode == d_inode(real)) + return real; + + /* Handle recursion */ + if (real->d_flags & DCACHE_OP_REAL) + return real->d_op->d_real(real, inode); + +bug: + WARN(1, "ovl_d_real(%pd4, %s:%lu\n): real dentry not found\n", dentry, + inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); + return dentry; +} + static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) { struct ovl_entry *oe = dentry->d_fsdata; @@ -320,11 +351,13 @@ static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, .d_select_inode = ovl_d_select_inode, + .d_real = ovl_d_real, }; static const struct dentry_operations ovl_reval_dentry_operations = { .d_release = ovl_dentry_release, .d_select_inode = ovl_d_select_inode, + .d_real = ovl_d_real, .d_revalidate = ovl_dentry_revalidate, .d_weak_revalidate = ovl_dentry_weak_revalidate, }; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8a2e009c8a5a..f513dd855cb2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -161,6 +161,7 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); struct inode *(*d_select_inode)(struct dentry *, unsigned); + struct dentry *(*d_real)(struct dentry *, struct inode *); } ____cacheline_aligned; /* @@ -227,6 +228,7 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ #define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */ +#define DCACHE_OP_REAL 0x08000000 extern seqlock_t rename_lock; @@ -582,4 +584,12 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) return upper; } +static inline struct dentry *d_real(struct dentry *dentry) +{ + if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) + return dentry->d_op->d_real(dentry, NULL); + else + return dentry; +} + #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 22c5a0cf16e3..ab3d8d9bb3ef 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1207,6 +1207,16 @@ static inline struct inode *file_inode(const struct file *f) return f->f_inode; } +static inline struct dentry *file_dentry(const struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + + if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) + return dentry->d_op->d_real(dentry, file_inode(file)); + else + return dentry; +} + static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { return locks_lock_inode_wait(file_inode(filp), fl); From fda9797a6aaad1a8044614fbbdb265dda4328c41 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sat, 26 Mar 2016 16:14:39 -0400 Subject: [PATCH 421/797] nfs: use file_dentry() commit be62a1a8fd116f5cd9e53726601f970e16e17558 upstream. NFS may be used as lower layer of overlayfs and accessing f_path.dentry can lead to a crash. Fix by replacing direct access of file->f_path.dentry with the file_dentry() accessor, which will always return a native object. Fixes: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay") Signed-off-by: Miklos Szeredi Tested-by: Goldwyn Rodrigues Acked-by: Trond Myklebust Signed-off-by: Theodore Ts'o Cc: David Howells Cc: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/nfs/dir.c | 6 +++--- fs/nfs/inode.c | 2 +- fs/nfs/nfs4file.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ce5a21861074..5fc2162afb67 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -377,7 +377,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, again: timestamp = jiffies; gencount = nfs_inc_attr_generation_counter(); - error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages, + error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { /* We requested READDIRPLUS, but the server doesn't grok it */ @@ -560,7 +560,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en count++; if (desc->plus != 0) - nfs_prime_dcache(desc->file->f_path.dentry, entry); + nfs_prime_dcache(file_dentry(desc->file), entry); status = nfs_readdir_add_to_array(entry, page); if (status != 0) @@ -864,7 +864,7 @@ static bool nfs_dir_mapping_need_revalidate(struct inode *dir) */ static int nfs_readdir(struct file *file, struct dir_context *ctx) { - struct dentry *dentry = file->f_path.dentry; + struct dentry *dentry = file_dentry(file); struct inode *inode = d_inode(dentry); nfs_readdir_descriptor_t my_desc, *desc = &my_desc; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3e2071a177fd..f714b98cfd74 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -927,7 +927,7 @@ int nfs_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; - ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); + ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode); if (IS_ERR(ctx)) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index db9b5fea5b3e..679e003818b1 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -26,7 +26,7 @@ static int nfs4_file_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; - struct dentry *dentry = filp->f_path.dentry; + struct dentry *dentry = file_dentry(filp); struct dentry *parent = NULL; struct inode *dir; unsigned openflags = filp->f_flags; @@ -57,7 +57,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) parent = dget_parent(dentry); dir = d_inode(parent); - ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); + ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; From df041ded89c13d75f1408b768980fed31eef8dea Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 30 Mar 2016 19:03:13 -0400 Subject: [PATCH 422/797] btrfs: fix crash/invalid memory access on fsync when using overlayfs commit de17e793b104d690e1d007dfc5cb6b4f649598ca upstream. If the lower or upper directory of an overlayfs mount belong to a btrfs file system and we fsync the file through the overlayfs' merged directory we ended up accessing an inode that didn't belong to btrfs as if it were a btrfs inode at btrfs_sync_file() resulting in a crash like the following: [ 7782.588845] BUG: unable to handle kernel NULL pointer dereference at 0000000000000544 [ 7782.590624] IP: [] btrfs_sync_file+0x11b/0x3e9 [btrfs] [ 7782.591931] PGD 4d954067 PUD 1e878067 PMD 0 [ 7782.592016] Oops: 0002 [#6] PREEMPT SMP DEBUG_PAGEALLOC [ 7782.592016] Modules linked in: btrfs overlay ppdev crc32c_generic evdev xor raid6_pq psmouse pcspkr sg serio_raw acpi_cpufreq parport_pc parport tpm_tis i2c_piix4 tpm i2c_core processor button loop autofs4 ext4 crc16 mbcache jbd2 sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix virtio_pci libata virtio_ring virtio scsi_mod e1000 floppy [last unloaded: btrfs] [ 7782.592016] CPU: 10 PID: 16437 Comm: xfs_io Tainted: G D 4.5.0-rc6-btrfs-next-26+ #1 [ 7782.592016] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [ 7782.592016] task: ffff88001b8d40c0 ti: ffff880137488000 task.ti: ffff880137488000 [ 7782.592016] RIP: 0010:[] [] btrfs_sync_file+0x11b/0x3e9 [btrfs] [ 7782.592016] RSP: 0018:ffff88013748be40 EFLAGS: 00010286 [ 7782.592016] RAX: 0000000080000000 RBX: ffff880133b30c88 RCX: 0000000000000001 [ 7782.592016] RDX: 0000000000000001 RSI: ffffffff8148fec0 RDI: 00000000ffffffff [ 7782.592016] RBP: ffff88013748bec0 R08: 0000000000000001 R09: 0000000000000000 [ 7782.624248] R10: ffff88013748be40 R11: 0000000000000246 R12: 0000000000000000 [ 7782.624248] R13: 0000000000000000 R14: 00000000009305a0 R15: ffff880015e3be40 [ 7782.624248] FS: 00007fa83b9cb700(0000) GS:ffff88023ed40000(0000) knlGS:0000000000000000 [ 7782.624248] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7782.624248] CR2: 0000000000000544 CR3: 00000001fa652000 CR4: 00000000000006e0 [ 7782.624248] Stack: [ 7782.624248] ffffffff8108b5cc ffff88013748bec0 0000000000000246 ffff8800b005ded0 [ 7782.624248] ffff880133b30d60 8000000000000000 7fffffffffffffff 0000000000000246 [ 7782.624248] 0000000000000246 ffffffff81074f9b ffffffff8104357c ffff880015e3be40 [ 7782.624248] Call Trace: [ 7782.624248] [] ? arch_local_irq_save+0x9/0xc [ 7782.624248] [] ? ___might_sleep+0xce/0x217 [ 7782.624248] [] ? __do_page_fault+0x3c0/0x43a [ 7782.624248] [] vfs_fsync_range+0x8c/0x9e [ 7782.624248] [] vfs_fsync+0x1c/0x1e [ 7782.624248] [] do_fsync+0x31/0x4a [ 7782.624248] [] SyS_fsync+0x10/0x14 [ 7782.624248] [] entry_SYSCALL_64_fastpath+0x12/0x6b [ 7782.624248] Code: 85 c0 0f 85 e2 02 00 00 48 8b 45 b0 31 f6 4c 29 e8 48 ff c0 48 89 45 a8 48 8d 83 d8 00 00 00 48 89 c7 48 89 45 a0 e8 fc 43 18 e1 41 ff 84 24 44 05 00 00 48 8b 83 58 ff ff ff 48 c1 e8 07 83 [ 7782.624248] RIP [] btrfs_sync_file+0x11b/0x3e9 [btrfs] [ 7782.624248] RSP [ 7782.624248] CR2: 0000000000000544 [ 7782.661994] ---[ end trace 721e14960eb939bc ]--- This started happening since commit 4bacc9c9234 (overlayfs: Make f_path always point to the overlay and f_inode to the underlay) and even though after this change we could still access the btrfs inode through struct file->f_mapping->host or struct file->f_inode, we would end up resulting in more similar issues later on at check_parent_dirs_for_sync() because the dentry we got (from struct file->f_path.dentry) was from overlayfs and not from btrfs, that is, we had no way of getting the dentry that belonged to btrfs (we always got the dentry that belonged to overlayfs). The new patch from Miklos Szeredi, titled "vfs: add file_dentry()" and recently submitted to linux-fsdevel, adds a file_dentry() API that allows us to get the btrfs dentry from the input file and therefore being able to fsync when the upper and lower directories belong to btrfs filesystems. This issue has been reported several times by users in the mailing list and bugzilla. A test case for xfstests is being submitted as well. Fixes: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=101951 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=109791 Signed-off-by: Filipe Manana Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0f09526aa7d9..5e5db3687e34 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1885,7 +1885,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { - struct dentry *dentry = file->f_path.dentry; + struct dentry *dentry = file_dentry(file); struct inode *inode = d_inode(dentry); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; From 321299a96e20cbc6aac615a4daae95f42235b467 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 1 Apr 2016 01:31:28 -0400 Subject: [PATCH 423/797] ext4: add lockdep annotations for i_data_sem commit daf647d2dd58cec59570d7698a45b98e580f2076 upstream. With the internal Quota feature, mke2fs creates empty quota inodes and quota usage tracking is enabled as soon as the file system is mounted. Since quotacheck is no longer preallocating all of the blocks in the quota inode that are likely needed to be written to, we are now seeing a lockdep false positive caused by needing to allocate a quota block from inside ext4_map_blocks(), while holding i_data_sem for a data inode. This results in this complaint: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ei->i_data_sem); lock(&s->s_dquot.dqio_mutex); lock(&ei->i_data_sem); lock(&s->s_dquot.dqio_mutex); Google-Bug-Id: 27907753 Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 23 +++++++++++++++++++++++ fs/ext4/move_extent.c | 11 +++++++++-- fs/ext4/super.c | 25 +++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cc7ca4e87144..d4156e1c128d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -849,6 +849,29 @@ do { \ #include "extents_status.h" +/* + * Lock subclasses for i_data_sem in the ext4_inode_info structure. + * + * These are needed to avoid lockdep false positives when we need to + * allocate blocks to the quota inode during ext4_map_blocks(), while + * holding i_data_sem for a normal (non-quota) inode. Since we don't + * do quota tracking for the quota inode, this avoids deadlock (as + * well as infinite recursion, since it isn't turtles all the way + * down...) + * + * I_DATA_SEM_NORMAL - Used for most inodes + * I_DATA_SEM_OTHER - Used by move_inode.c for the second normal inode + * where the second inode has larger inode number + * than the first + * I_DATA_SEM_QUOTA - Used for quota inodes only + */ +enum { + I_DATA_SEM_NORMAL = 0, + I_DATA_SEM_OTHER, + I_DATA_SEM_QUOTA, +}; + + /* * fourth extended file system inode data in memory */ diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index e032a0423e35..9bdbf98240a0 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -60,10 +60,10 @@ ext4_double_down_write_data_sem(struct inode *first, struct inode *second) { if (first < second) { down_write(&EXT4_I(first)->i_data_sem); - down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); + down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER); } else { down_write(&EXT4_I(second)->i_data_sem); - down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING); + down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER); } } @@ -483,6 +483,13 @@ mext_check_arguments(struct inode *orig_inode, return -EBUSY; } + if (IS_NOQUOTA(orig_inode) || IS_NOQUOTA(donor_inode)) { + ext4_debug("ext4 move extent: The argument files should " + "not be quota files [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EBUSY; + } + /* Ext4 move extent supports only extent based file */ if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: orig file is not extents " diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c9ab67da6e5a..b54cb0025646 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4936,6 +4936,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type) EXT4_SB(sb)->s_jquota_fmt, type); } +static void lockdep_set_quota_inode(struct inode *inode, int subclass) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + + /* The first argument of lockdep_set_subclass has to be + * *exactly* the same as the argument to init_rwsem() --- in + * this case, in init_once() --- or lockdep gets unhappy + * because the name of the lock is set using the + * stringification of the argument to init_rwsem(). + */ + (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */ + lockdep_set_subclass(&ei->i_data_sem, subclass); +} + /* * Standard function to be called on quota_on */ @@ -4975,8 +4989,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (err) return err; } - - return dquot_quota_on(sb, type, format_id, path); + lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); + err = dquot_quota_on(sb, type, format_id, path); + if (err) + lockdep_set_quota_inode(path->dentry->d_inode, + I_DATA_SEM_NORMAL); + return err; } static int ext4_quota_enable(struct super_block *sb, int type, int format_id, @@ -5002,8 +5020,11 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, /* Don't account quota for quota files to avoid recursion */ qf_inode->i_flags |= S_NOQUOTA; + lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA); err = dquot_enable(qf_inode, type, format_id, flags); iput(qf_inode); + if (err) + lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL); return err; } From ee8516a130918aa1421d426ec978985240a672ed Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 3 Apr 2016 17:03:37 -0400 Subject: [PATCH 424/797] ext4: ignore quota mount options if the quota feature is enabled commit c325a67c72903e1cc30e990a15ce745bda0dbfde upstream. Previously, ext4 would fail the mount if the file system had the quota feature enabled and quota mount options (used for the older quota setups) were present. This broke xfstests, since xfs silently ignores the usrquote and grpquota mount options if they are specified. This commit changes things so that we are consistent with xfs; having the mount options specified is harmless, so no sense break users by forbidding them. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b54cb0025646..ba1cf0bf2f81 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1292,9 +1292,9 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) return -1; } if (ext4_has_feature_quota(sb)) { - ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " - "when QUOTA feature is enabled"); - return -1; + ext4_msg(sb, KERN_INFO, "Journaled quota options " + "ignored when QUOTA feature is enabled"); + return 1; } qname = match_strdup(args); if (!qname) { @@ -1657,10 +1657,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, return -1; } if (ext4_has_feature_quota(sb)) { - ext4_msg(sb, KERN_ERR, - "Cannot set journaled quota options " + ext4_msg(sb, KERN_INFO, + "Quota format mount options ignored " "when QUOTA feature is enabled"); - return -1; + return 1; } sbi->s_jquota_fmt = m->mount_opt; #endif @@ -1721,11 +1721,11 @@ static int parse_options(char *options, struct super_block *sb, #ifdef CONFIG_QUOTA if (ext4_has_feature_quota(sb) && (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { - ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " - "feature is enabled"); - return 0; - } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota " + "mount options ignored."); + clear_opt(sb, USRQUOTA); + clear_opt(sb, GRPQUOTA); + } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) clear_opt(sb, USRQUOTA); From ab6c5069d1b14ac316b8c9f6aeda986dfe87fe47 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2016 15:47:48 +0200 Subject: [PATCH 425/797] iommu: Don't overwrite domain pointer when there is no default_domain commit eebb8034a5be8c2177cbf07ca2ecd2ff8a058958 upstream. IOMMU drivers that do not support default domains, but make use of the the group->domain pointer can get that pointer overwritten with NULL on device add/remove. Make sure this can't happen by only overwriting the domain pointer when it is NULL. Fixes: 1228236de5f9 ('iommu: Move default domain allocation to iommu_group_get_for_dev()') Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0e3b0092ec92..515bb8b80952 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -848,7 +848,8 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (!group->default_domain) { group->default_domain = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA); - group->domain = group->default_domain; + if (!group->domain) + group->domain = group->default_domain; } ret = iommu_group_add_device(group, dev); From 1653a3b0e9436c10eb307c318776cf91fe18ff08 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 30 Mar 2016 23:37:21 +0100 Subject: [PATCH 426/797] Btrfs: fix file/data loss caused by fsync after rename and new inode commit 56f23fdbb600e6087db7b009775b95ce07cc3195 upstream. If we rename an inode A (be it a file or a directory), create a new inode B with the old name of inode A and under the same parent directory, fsync inode B and then power fail, at log tree replay time we end up removing inode A completely. If inode A is a directory then all its files are gone too. Example scenarios where this happens: This is reproducible with the following steps, taken from a couple of test cases written for fstests which are going to be submitted upstream soon: # Scenario 1 mkfs.btrfs -f /dev/sdc mount /dev/sdc /mnt mkdir -p /mnt/a/x echo "hello" > /mnt/a/x/foo echo "world" > /mnt/a/x/bar sync mv /mnt/a/x /mnt/a/y mkdir /mnt/a/x xfs_io -c fsync /mnt/a/x The next time the fs is mounted, log tree replay happens and the directory "y" does not exist nor do the files "foo" and "bar" exist anywhere (neither in "y" nor in "x", nor the root nor anywhere). # Scenario 2 mkfs.btrfs -f /dev/sdc mount /dev/sdc /mnt mkdir /mnt/a echo "hello" > /mnt/a/foo sync mv /mnt/a/foo /mnt/a/bar echo "world" > /mnt/a/foo xfs_io -c fsync /mnt/a/foo The next time the fs is mounted, log tree replay happens and the file "bar" does not exists anymore. A file with the name "foo" exists and it matches the second file we created. Another related problem that does not involve file/data loss is when a new inode is created with the name of a deleted snapshot and we fsync it: mkfs.btrfs -f /dev/sdc mount /dev/sdc /mnt mkdir /mnt/testdir btrfs subvolume snapshot /mnt /mnt/testdir/snap btrfs subvolume delete /mnt/testdir/snap rmdir /mnt/testdir mkdir /mnt/testdir xfs_io -c fsync /mnt/testdir # or fsync some file inside /mnt/testdir The next time the fs is mounted the log replay procedure fails because it attempts to delete the snapshot entry (which has dir item key type of BTRFS_ROOT_ITEM_KEY) as if it were a regular (non-root) entry, resulting in the following error that causes mount to fail: [52174.510532] BTRFS info (device dm-0): failed to delete reference to snap, inode 257 parent 257 [52174.512570] ------------[ cut here ]------------ [52174.513278] WARNING: CPU: 12 PID: 28024 at fs/btrfs/inode.c:3986 __btrfs_unlink_inode+0x178/0x351 [btrfs]() [52174.514681] BTRFS: Transaction aborted (error -2) [52174.515630] Modules linked in: btrfs dm_flakey dm_mod overlay crc32c_generic ppdev xor raid6_pq acpi_cpufreq parport_pc tpm_tis sg parport tpm evdev i2c_piix4 proc [52174.521568] CPU: 12 PID: 28024 Comm: mount Tainted: G W 4.5.0-rc6-btrfs-next-27+ #1 [52174.522805] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [52174.524053] 0000000000000000 ffff8801df2a7710 ffffffff81264e93 ffff8801df2a7758 [52174.524053] 0000000000000009 ffff8801df2a7748 ffffffff81051618 ffffffffa03591cd [52174.524053] 00000000fffffffe ffff88015e6e5000 ffff88016dbc3c88 ffff88016dbc3c88 [52174.524053] Call Trace: [52174.524053] [] dump_stack+0x67/0x90 [52174.524053] [] warn_slowpath_common+0x99/0xb2 [52174.524053] [] ? __btrfs_unlink_inode+0x178/0x351 [btrfs] [52174.524053] [] warn_slowpath_fmt+0x48/0x50 [52174.524053] [] __btrfs_unlink_inode+0x178/0x351 [btrfs] [52174.524053] [] ? iput+0xb0/0x284 [52174.524053] [] btrfs_unlink_inode+0x1c/0x3d [btrfs] [52174.524053] [] check_item_in_log+0x1fe/0x29b [btrfs] [52174.524053] [] replay_dir_deletes+0x167/0x1cf [btrfs] [52174.524053] [] fixup_inode_link_count+0x289/0x2aa [btrfs] [52174.524053] [] fixup_inode_link_counts+0xcb/0x105 [btrfs] [52174.524053] [] btrfs_recover_log_trees+0x258/0x32c [btrfs] [52174.524053] [] ? replay_one_extent+0x511/0x511 [btrfs] [52174.524053] [] open_ctree+0x1dd4/0x21b9 [btrfs] [52174.524053] [] btrfs_mount+0x97e/0xaed [btrfs] [52174.524053] [] ? trace_hardirqs_on+0xd/0xf [52174.524053] [] mount_fs+0x67/0x131 [52174.524053] [] vfs_kern_mount+0x6c/0xde [52174.524053] [] btrfs_mount+0x1ac/0xaed [btrfs] [52174.524053] [] ? trace_hardirqs_on+0xd/0xf [52174.524053] [] ? lockdep_init_map+0xb9/0x1b3 [52174.524053] [] mount_fs+0x67/0x131 [52174.524053] [] vfs_kern_mount+0x6c/0xde [52174.524053] [] do_mount+0x8a6/0x9e8 [52174.524053] [] ? strndup_user+0x3f/0x59 [52174.524053] [] SyS_mount+0x77/0x9f [52174.524053] [] entry_SYSCALL_64_fastpath+0x12/0x6b [52174.561288] ---[ end trace 6b53049efb1a3ea6 ]--- Fix this by forcing a transaction commit when such cases happen. This means we check in the commit root of the subvolume tree if there was any other inode with the same reference when the inode we are fsync'ing is a new inode (created in the current transaction). Test cases for fstests, covering all the scenarios given above, were submitted upstream for fstests: * fstests: generic test for fsync after renaming directory https://patchwork.kernel.org/patch/8694281/ * fstests: generic test for fsync after renaming file https://patchwork.kernel.org/patch/8694301/ * fstests: add btrfs test for fsync after snapshot deletion https://patchwork.kernel.org/patch/8670671/ Signed-off-by: Filipe Manana Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 137 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 323e12cc9d2f..0e044d7ee721 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4406,6 +4406,127 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, return ret; } +/* + * When we are logging a new inode X, check if it doesn't have a reference that + * matches the reference from some other inode Y created in a past transaction + * and that was renamed in the current transaction. If we don't do this, then at + * log replay time we can lose inode Y (and all its files if it's a directory): + * + * mkdir /mnt/x + * echo "hello world" > /mnt/x/foobar + * sync + * mv /mnt/x /mnt/y + * mkdir /mnt/x # or touch /mnt/x + * xfs_io -c fsync /mnt/x + * + * mount fs, trigger log replay + * + * After the log replay procedure, we would lose the first directory and all its + * files (file foobar). + * For the case where inode Y is not a directory we simply end up losing it: + * + * echo "123" > /mnt/foo + * sync + * mv /mnt/foo /mnt/bar + * echo "abc" > /mnt/foo + * xfs_io -c fsync /mnt/foo + * + * + * We also need this for cases where a snapshot entry is replaced by some other + * entry (file or directory) otherwise we end up with an unreplayable log due to + * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as + * if it were a regular entry: + * + * mkdir /mnt/x + * btrfs subvolume snapshot /mnt /mnt/x/snap + * btrfs subvolume delete /mnt/x/snap + * rmdir /mnt/x + * mkdir /mnt/x + * fsync /mnt/x or fsync some new file inside it + * + * + * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in + * the same transaction. + */ +static int btrfs_check_ref_name_override(struct extent_buffer *eb, + const int slot, + const struct btrfs_key *key, + struct inode *inode) +{ + int ret; + struct btrfs_path *search_path; + char *name = NULL; + u32 name_len = 0; + u32 item_size = btrfs_item_size_nr(eb, slot); + u32 cur_offset = 0; + unsigned long ptr = btrfs_item_ptr_offset(eb, slot); + + search_path = btrfs_alloc_path(); + if (!search_path) + return -ENOMEM; + search_path->search_commit_root = 1; + search_path->skip_locking = 1; + + while (cur_offset < item_size) { + u64 parent; + u32 this_name_len; + u32 this_len; + unsigned long name_ptr; + struct btrfs_dir_item *di; + + if (key->type == BTRFS_INODE_REF_KEY) { + struct btrfs_inode_ref *iref; + + iref = (struct btrfs_inode_ref *)(ptr + cur_offset); + parent = key->offset; + this_name_len = btrfs_inode_ref_name_len(eb, iref); + name_ptr = (unsigned long)(iref + 1); + this_len = sizeof(*iref) + this_name_len; + } else { + struct btrfs_inode_extref *extref; + + extref = (struct btrfs_inode_extref *)(ptr + + cur_offset); + parent = btrfs_inode_extref_parent(eb, extref); + this_name_len = btrfs_inode_extref_name_len(eb, extref); + name_ptr = (unsigned long)&extref->name; + this_len = sizeof(*extref) + this_name_len; + } + + if (this_name_len > name_len) { + char *new_name; + + new_name = krealloc(name, this_name_len, GFP_NOFS); + if (!new_name) { + ret = -ENOMEM; + goto out; + } + name_len = this_name_len; + name = new_name; + } + + read_extent_buffer(eb, name, name_ptr, this_name_len); + di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root, + search_path, parent, + name, this_name_len, 0); + if (di && !IS_ERR(di)) { + ret = 1; + goto out; + } else if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } + btrfs_release_path(search_path); + + cur_offset += this_len; + } + ret = 0; +out: + btrfs_free_path(search_path); + kfree(name); + return ret; +} + /* log a single inode in the tree log. * At least one parent directory for this inode must exist in the tree * or be logged already. @@ -4578,6 +4699,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (min_key.type == BTRFS_INODE_ITEM_KEY) need_log_inode_item = false; + if ((min_key.type == BTRFS_INODE_REF_KEY || + min_key.type == BTRFS_INODE_EXTREF_KEY) && + BTRFS_I(inode)->generation == trans->transid) { + ret = btrfs_check_ref_name_override(path->nodes[0], + path->slots[0], + &min_key, inode); + if (ret < 0) { + err = ret; + goto out_unlock; + } else if (ret > 0) { + err = 1; + btrfs_set_log_full_commit(root->fs_info, trans); + goto out_unlock; + } + } + /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ if (min_key.type == BTRFS_XATTR_ITEM_KEY) { if (ins_nr == 0) From f6dffe77180ba8ac38e94247cf2a323614f2e876 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Mon, 8 Feb 2016 14:49:24 -0800 Subject: [PATCH 427/797] arm64: replace read_lock to rcu lock in call_step_hook commit cf0a25436f05753aca5151891aea4fd130556e2a upstream. BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:917 in_atomic(): 1, irqs_disabled(): 128, pid: 383, name: sh Preemption disabled at:[] kgdb_cpu_enter+0x158/0x6b8 CPU: 3 PID: 383 Comm: sh Tainted: G W 4.1.13-rt13 #2 Hardware name: Freescale Layerscape 2085a RDB Board (DT) Call trace: [] dump_backtrace+0x0/0x128 [] show_stack+0x24/0x30 [] dump_stack+0x80/0xa0 [] ___might_sleep+0x18c/0x1a0 [] __rt_spin_lock+0x2c/0x40 [] rt_read_lock+0x40/0x58 [] single_step_handler+0x38/0xd8 [] do_debug_exception+0x58/0xb8 Exception stack(0xffff80834a1e7c80 to 0xffff80834a1e7da0) 7c80: ffffff9c ffffffff 92c23ba0 0000ffff 4a1e7e40 ffff8083 001bfcc4 ffff8000 7ca0: f2000400 00000000 00000000 00000000 4a1e7d80 ffff8083 0049501c ffff8000 7cc0: 00005402 00000000 00aaa210 ffff8000 4a1e7ea0 ffff8083 000833f4 ffff8000 7ce0: ffffff9c ffffffff 92c23ba0 0000ffff 4a1e7ea0 ffff8083 001bfcc0 ffff8000 7d00: 4a0fc400 ffff8083 00005402 00000000 4a1e7d40 ffff8083 00490324 ffff8000 7d20: ffffff9c 00000000 92c23ba0 0000ffff 000a0000 00000000 00000000 00000000 7d40: 00000008 00000000 00080000 00000000 92c23b8b 0000ffff 92c23b8e 0000ffff 7d60: 00000038 00000000 00001cb2 00000000 00000005 00000000 92d7b498 0000ffff 7d80: 01010101 01010101 92be9000 0000ffff 00000000 00000000 00000030 00000000 [] el1_dbg+0x18/0x6c This issue is similar with 62c6c61("arm64: replace read_lock to rcu lock in call_break_hook"), but comes to single_step_handler. This also solves kgdbts boot test silent hang issue on 4.4 -rt kernel. Signed-off-by: Yang Shi Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/debug-monitors.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 8aee3aeec3e6..c1492ba1f6d1 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -186,20 +186,21 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) /* EL1 Single Step Handler hooks */ static LIST_HEAD(step_hook); -static DEFINE_RWLOCK(step_hook_lock); +static DEFINE_SPINLOCK(step_hook_lock); void register_step_hook(struct step_hook *hook) { - write_lock(&step_hook_lock); - list_add(&hook->node, &step_hook); - write_unlock(&step_hook_lock); + spin_lock(&step_hook_lock); + list_add_rcu(&hook->node, &step_hook); + spin_unlock(&step_hook_lock); } void unregister_step_hook(struct step_hook *hook) { - write_lock(&step_hook_lock); - list_del(&hook->node); - write_unlock(&step_hook_lock); + spin_lock(&step_hook_lock); + list_del_rcu(&hook->node); + spin_unlock(&step_hook_lock); + synchronize_rcu(); } /* @@ -213,15 +214,15 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr) struct step_hook *hook; int retval = DBG_HOOK_ERROR; - read_lock(&step_hook_lock); + rcu_read_lock(); - list_for_each_entry(hook, &step_hook, node) { + list_for_each_entry_rcu(hook, &step_hook, node) { retval = hook->fn(regs, esr); if (retval == DBG_HOOK_HANDLED) break; } - read_unlock(&step_hook_lock); + rcu_read_unlock(); return retval; } From 3c1a5d344e9721bed684382aab375ca5a143ef92 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Feb 2016 18:45:41 +0100 Subject: [PATCH 428/797] perf: Do not double free commit 130056275ade730e7a79c110212c8815202773ee upstream. In case of: err_file: fput(event_file), we'll end up calling perf_release() which in turn will free the event. Do not then free the event _again_. Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dvyukov@google.com Cc: eranian@google.com Cc: oleg@redhat.com Cc: panand@redhat.com Cc: sasha.levin@oracle.com Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160224174947.697350349@infradead.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index faf2067fc8e2..060c66ea61b6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8583,7 +8583,12 @@ SYSCALL_DEFINE5(perf_event_open, perf_unpin_context(ctx); put_ctx(ctx); err_alloc: - free_event(event); + /* + * If event_file is set, the fput() above will have called ->release() + * and that will take care of freeing the event. + */ + if (!event_file) + free_event(event); err_cpus: put_online_cpus(); err_task: From 695ca6389e0949e44ce1bdbcd422b37e5eb38f4c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Feb 2016 18:45:46 +0100 Subject: [PATCH 429/797] perf: Cure event->pending_disable race commit 28a967c3a2f99fa3b5f762f25cb2a319d933571b upstream. Because event_sched_out() checks event->pending_disable _before_ actually disabling the event, it can happen that the event fires after it checks but before it gets disabled. This would leave event->pending_disable set and the queued irq_work will try and process it. However, if the event trigger was during schedule(), the event might have been de-scheduled by the time the irq_work runs, and perf_event_disable_local() will fail. Fix this by checking event->pending_disable _after_ we call event->pmu->del(). This depends on the latter being a compiler barrier, such that the compiler does not lift the load and re-creates the problem. Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dvyukov@google.com Cc: eranian@google.com Cc: oleg@redhat.com Cc: panand@redhat.com Cc: sasha.levin@oracle.com Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160224174948.040469884@infradead.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 060c66ea61b6..1e889a078dbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1580,14 +1580,14 @@ event_sched_out(struct perf_event *event, perf_pmu_disable(event->pmu); + event->tstamp_stopped = tstamp; + event->pmu->del(event, 0); + event->oncpu = -1; event->state = PERF_EVENT_STATE_INACTIVE; if (event->pending_disable) { event->pending_disable = 0; event->state = PERF_EVENT_STATE_OFF; } - event->tstamp_stopped = tstamp; - event->pmu->del(event, 0); - event->oncpu = -1; if (!is_software_event(event)) cpuctx->active_oncpu--; From e16b94ab4ccd5b31fa160978c601206a169de2bc Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 4 Apr 2016 12:40:37 +0300 Subject: [PATCH 430/797] mmc: sdhci-pci: Add support and PCI IDs for more Broxton host controllers commit 01d6b2a40a0fa73c90e05b1033f181a51fec9292 upstream. Add support and PCI IDs for more Broxton host controllers Other BXT IDs were added in v4.4 so cc'ing stable. This patch is dependent on commit 163cbe31e516 ("mmc: sdhci-pci: Fix card detect race for Intel BXT/APL") but that is already in stable since v4.4.4. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-core.c | 25 +++++++++++++++++++++++++ drivers/mmc/host/sdhci-pci.h | 3 +++ 2 files changed, 28 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 45ee07d3a761..610154836d79 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -390,6 +390,7 @@ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot) slot->cd_idx = 0; slot->cd_override_level = true; if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXT_SD || + slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXTM_SD || slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD) slot->host->mmc_host_ops.get_cd = bxt_get_cd; @@ -1171,6 +1172,30 @@ static const struct pci_device_id pci_ids[] = { .driver_data = (kernel_ulong_t)&sdhci_intel_byt_sd, }, + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_BXTM_EMMC, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_byt_emmc, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_BXTM_SDIO, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_byt_sdio, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_BXTM_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_byt_sd, + }, + { .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_DEVICE_ID_INTEL_APL_EMMC, diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index d1a0b4db60db..89e7151684a1 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -28,6 +28,9 @@ #define PCI_DEVICE_ID_INTEL_BXT_SD 0x0aca #define PCI_DEVICE_ID_INTEL_BXT_EMMC 0x0acc #define PCI_DEVICE_ID_INTEL_BXT_SDIO 0x0ad0 +#define PCI_DEVICE_ID_INTEL_BXTM_SD 0x1aca +#define PCI_DEVICE_ID_INTEL_BXTM_EMMC 0x1acc +#define PCI_DEVICE_ID_INTEL_BXTM_SDIO 0x1ad0 #define PCI_DEVICE_ID_INTEL_APL_SD 0x5aca #define PCI_DEVICE_ID_INTEL_APL_EMMC 0x5acc #define PCI_DEVICE_ID_INTEL_APL_SDIO 0x5ad0 From 39bd2591a80eb0ccd291a5a97456e667083bbcd0 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Thu, 25 Feb 2016 09:37:05 +0100 Subject: [PATCH 431/797] ALSA: hda - Fixup speaker pass-through control for nid 0x14 on ALC225 commit 2ae955774f29bbd7d16149cb0ae8d0319bf2ecc4 upstream. On one of the machines we enable, we found that the actual speaker volume did not always correspond to the volume set in alsamixer. This patch fixes that problem. This patch was orginally written by Kailang @ Realtek, I've rebased it to fit sound git master. BugLink: https://bugs.launchpad.net/bugs/1549660 Co-Authored-By: Kailang Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3671eb89dd28..c709efc68a0c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3801,6 +3801,10 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, static void alc_headset_mode_default(struct hda_codec *codec) { + static struct coef_fw coef0225[] = { + UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10), + {} + }; static struct coef_fw coef0255[] = { WRITE_COEF(0x45, 0xc089), WRITE_COEF(0x45, 0xc489), @@ -3842,6 +3846,9 @@ static void alc_headset_mode_default(struct hda_codec *codec) }; switch (codec->core.vendor_id) { + case 0x10ec0225: + alc_process_coef_fw(codec, coef0225); + break; case 0x10ec0255: case 0x10ec0256: alc_process_coef_fw(codec, coef0255); @@ -4750,6 +4757,7 @@ enum { ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, ALC255_FIXUP_DELL_SPK_NOISE, + ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, }; static const struct hda_fixup alc269_fixups[] = { @@ -5375,6 +5383,17 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Disable pass-through path for FRONT 14h */ + { 0x20, AC_VERB_SET_COEF_INDEX, 0x36 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x57d7 }, + {} + }, + .chained = true, + .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5648,10 +5667,10 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {0x21, 0x03211020} static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { - SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC225_STANDARD_PINS, {0x14, 0x901701a0}), - SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC225_STANDARD_PINS, {0x14, 0x901701b0}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, From 478c9f35a987a879189afedc1de1c6603487791a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 25 Feb 2016 14:31:59 +0100 Subject: [PATCH 432/797] ALSA: hda - Fix headset support and noise on HP EliteBook 755 G2 commit f883982dc1b117f04579f0896821cd9f2e397f94 upstream. HP EliteBook 755 G2 with ALC3228 (ALC280) codec [103c:221c] requires the known fixup (ALC269_FIXUP_HEADSET_MIC) for making the headset mic working. Also, it suffers from the loopback noise problem, so we should disable aamix path as well. Reported-by: Derick Eddington Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c709efc68a0c..73978c79981f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4758,6 +4758,7 @@ enum { ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, ALC255_FIXUP_DELL_SPK_NOISE, ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC280_FIXUP_HP_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -5394,6 +5395,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC280_FIXUP_HP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5498,6 +5505,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From c362f778f725a8d606e91403e830bc9d7fa10f57 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 1 Apr 2016 11:00:15 +0800 Subject: [PATCH 433/797] ALSA: hda - fix front mic problem for a HP desktop commit e549d190f7b5f94e9ab36bd965028112914d010d upstream. The front mic jack (pink color) can't detect any plug or unplug. After applying this fix, both detecting function and recording function work well. BugLink: https://bugs.launchpad.net/bugs/1564712 Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 73978c79981f..fefe83f2beab 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4759,6 +4759,7 @@ enum { ALC255_FIXUP_DELL_SPK_NOISE, ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC280_FIXUP_HP_HEADSET_MIC, + ALC221_FIXUP_HP_FRONT_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -5401,6 +5402,13 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MIC, }, + [ALC221_FIXUP_HP_FRONT_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x02a19020 }, /* Front Mic */ + { } + }, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5506,6 +5514,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC), + SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From f0cf5ccef6fc0390aa42987e822dd63732d4d2da Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 11 Apr 2016 16:55:26 +0200 Subject: [PATCH 434/797] ALSA: hda/realtek - Enable the ALC292 dock fixup on the Thinkpad T460s commit c636b95ec5980345674ad7960a3c67135a84b687 upstream. The Lenovo Thinkpad T460s requires the alc_fixup_tpt440_dock as well in order to get working sound output on the docking stations headphone jack. Patch tested on a Thinkpad T460s (20F9CT01WW) using a ThinkPad Ultradock on kernel 4.4.6. Signed-off-by: Sven Eckelmann Tested-by: Simon Wunderlich Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fefe83f2beab..1402ba954b3d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4760,6 +4760,7 @@ enum { ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC280_FIXUP_HP_HEADSET_MIC, ALC221_FIXUP_HP_FRONT_MIC, + ALC292_FIXUP_TPT460, }; static const struct hda_fixup alc269_fixups[] = { @@ -5409,6 +5410,12 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC292_FIXUP_TPT460] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_tpt440_dock, + .chained = true, + .chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5563,7 +5570,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), - SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), + SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), @@ -5658,6 +5665,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC283_FIXUP_SENSE_COMBO_JACK, .name = "alc283-sense-combo"}, {.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"}, {.id = ALC292_FIXUP_TPT440, .name = "tpt440"}, + {.id = ALC292_FIXUP_TPT460, .name = "tpt460"}, {} }; #define ALC225_STANDARD_PINS \ From 77ffc8a9e9b36c2311ee1443a9dc5f1a5cf534d3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 4 Apr 2016 11:47:50 +0200 Subject: [PATCH 435/797] ALSA: usb-audio: Add a sample rate quirk for Phoenix Audio TMX320 commit f03b24a851d32ca85dacab01785b24a7ee717d37 upstream. Phoenix Audio TMX320 gives the similar error when the sample rate is asked: usb 2-1.3: 2:1: cannot get freq at ep 0x85 usb 2-1.3: 1:1: cannot get freq at ep 0x2 .... Add the corresponding USB-device ID (1de7:0014) to snd_usb_get_sample_rate_quirk() list. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=110221 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index cd7eac28edee..4f2dedfa7645 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1138,6 +1138,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ return true; } From 157fb2daebc0c42901432b8028488c5ebb4509b1 Mon Sep 17 00:00:00 2001 From: Dennis Kadioglu Date: Wed, 6 Apr 2016 08:39:01 +0200 Subject: [PATCH 436/797] ALSA: usb-audio: Add a quirk for Plantronics BT300 commit b4203ff5464da00b7812e7b480192745b0d66bbf upstream. Plantronics BT300 does not support reading the sample rate which leads to many lines of "cannot get freq at ep 0x1". This patch adds the USB ID of the BT300 to quirks.c and avoids those error messages. Signed-off-by: Dennis Kadioglu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 4f2dedfa7645..001fb4dc0722 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1135,6 +1135,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ + case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ From d622dad32621a1a33ba74b1b29a19cfd5abe069f Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 12 Apr 2016 10:55:03 +0800 Subject: [PATCH 437/797] ALSA: usb-audio: Skip volume controls triggers hangup on Dell USB Dock commit adcdd0d5a1cb779f6d455ae70882c19c527627a8 upstream. This is Dell usb dock audio workaround. It was fixed the master volume keep lower. [Some background: the patch essentially skips the controls of a couple of FU volumes. Although the firmware exposes the dB and the value information via the usb descriptor, changing the values (we set the min volume as default) screws up the device. Although this has been fixed in the newer firmware, the devices are shipped with the old firmware, thus we need the workaround in the driver side. -- tiwai] Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_maps.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index ddca6547399b..1f8fb0d904e0 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -348,6 +348,16 @@ static struct usbmix_name_map bose_companion5_map[] = { { 0 } /* terminator */ }; +/* + * Dell usb dock with ALC4020 codec had a firmware problem where it got + * screwed up when zero volume is passed; just skip it as a workaround + */ +static const struct usbmix_name_map dell_alc4020_map[] = { + { 16, NULL }, + { 19, NULL }, + { 0 } +}; + /* * Control map entries */ @@ -430,6 +440,10 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x0ccd, 0x0028), .map = aureon_51_2_map, }, + { + .id = USB_ID(0x0bda, 0x4014), + .map = dell_alc4020_map, + }, { .id = USB_ID(0x0dba, 0x1000), .map = mbox1_map, From f174a1fd2c0e577646a1263f7c7d01e588ca28bd Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 25 Mar 2016 15:26:55 +0100 Subject: [PATCH 438/797] HID: wacom: fix Bamboo ONE oops commit 580549ef6b3e3fb3b958de490ca99f43a089a2cf upstream. Looks like recent changes in the Wacom driver made the Bamboo ONE crashes. The tablet behaves as if it was a regular Bamboo device with pen, touch and pad, but there is no physical pad connected to it. The weird part is that the pad is still sending events and given that there is no input node connected to it, we get anull pointer exception. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1317116 Signed-off-by: Benjamin Tissoires Acked-by: Ping Cheng Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 01a4f05c1642..3c0f47ac8e53 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2492,6 +2492,17 @@ void wacom_setup_device_quirks(struct wacom *wacom) } } + /* + * Hack for the Bamboo One: + * the device presents a PAD/Touch interface as most Bamboos and even + * sends ghosts PAD data on it. However, later, we must disable this + * ghost interface, and we can not detect it unless we set it here + * to WACOM_DEVICETYPE_PAD or WACOM_DEVICETYPE_TOUCH. + */ + if (features->type == BAMBOO_PEN && + features->pktlen == WACOM_PKGLEN_BBTOUCH3) + features->device_type |= WACOM_DEVICETYPE_PAD; + /* * Raw Wacom-mode pen and touch events both come from interface * 0, whose HID descriptor has an application usage of 0xFF0D From b66a7a3a0947ced3b8dff340e815b708cc4a98a7 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 23 Mar 2016 12:17:09 -0400 Subject: [PATCH 439/797] HID: usbhid: fix inconsistent reset/resume/reset-resume behavior commit 972e6a993f278b416a8ee3ec65475724fc36feb2 upstream. The usbhid driver has inconsistently duplicated code in its post-reset, resume, and reset-resume pathways. reset-resume doesn't check HID_STARTED before trying to restart the I/O queues. resume fails to clear the HID_SUSPENDED flag if HID_STARTED isn't set. resume calls usbhid_restart_queues() with usbhid->lock held and the others call it without holding the lock. The first item in particular causes a problem following a reset-resume if the driver hasn't started up its I/O. URB submission fails because usbhid->urbin is NULL, and this triggers an unending reset-retry loop. This patch fixes the problem by creating a new subroutine, hid_restart_io(), to carry out all the common activities. It also adds some checks that were missing in the original code: After a reset, there's no need to clear any halted endpoints. After a resume, if a reset is pending there's no need to restart any I/O until the reset is finished. After a resume, if the interrupt-IN endpoint is halted there's no need to submit the input URB until the halt has been cleared. Signed-off-by: Alan Stern Reported-by: Daniel Fraga Tested-by: Daniel Fraga Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hid-core.c | 73 ++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 5dd426fee8cc..0df32fe0e345 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -951,14 +951,6 @@ static int usbhid_output_report(struct hid_device *hid, __u8 *buf, size_t count) return ret; } -static void usbhid_restart_queues(struct usbhid_device *usbhid) -{ - if (usbhid->urbout && !test_bit(HID_OUT_RUNNING, &usbhid->iofl)) - usbhid_restart_out_queue(usbhid); - if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl)) - usbhid_restart_ctrl_queue(usbhid); -} - static void hid_free_buffers(struct usb_device *dev, struct hid_device *hid) { struct usbhid_device *usbhid = hid->driver_data; @@ -1404,6 +1396,37 @@ static void hid_cease_io(struct usbhid_device *usbhid) usb_kill_urb(usbhid->urbout); } +static void hid_restart_io(struct hid_device *hid) +{ + struct usbhid_device *usbhid = hid->driver_data; + int clear_halt = test_bit(HID_CLEAR_HALT, &usbhid->iofl); + int reset_pending = test_bit(HID_RESET_PENDING, &usbhid->iofl); + + spin_lock_irq(&usbhid->lock); + clear_bit(HID_SUSPENDED, &usbhid->iofl); + usbhid_mark_busy(usbhid); + + if (clear_halt || reset_pending) + schedule_work(&usbhid->reset_work); + usbhid->retry_delay = 0; + spin_unlock_irq(&usbhid->lock); + + if (reset_pending || !test_bit(HID_STARTED, &usbhid->iofl)) + return; + + if (!clear_halt) { + if (hid_start_in(hid) < 0) + hid_io_error(hid); + } + + spin_lock_irq(&usbhid->lock); + if (usbhid->urbout && !test_bit(HID_OUT_RUNNING, &usbhid->iofl)) + usbhid_restart_out_queue(usbhid); + if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl)) + usbhid_restart_ctrl_queue(usbhid); + spin_unlock_irq(&usbhid->lock); +} + /* Treat USB reset pretty much the same as suspend/resume */ static int hid_pre_reset(struct usb_interface *intf) { @@ -1453,14 +1476,14 @@ static int hid_post_reset(struct usb_interface *intf) return 1; } + /* No need to do another reset or clear a halted endpoint */ spin_lock_irq(&usbhid->lock); clear_bit(HID_RESET_PENDING, &usbhid->iofl); + clear_bit(HID_CLEAR_HALT, &usbhid->iofl); spin_unlock_irq(&usbhid->lock); hid_set_idle(dev, intf->cur_altsetting->desc.bInterfaceNumber, 0, 0); - status = hid_start_in(hid); - if (status < 0) - hid_io_error(hid); - usbhid_restart_queues(usbhid); + + hid_restart_io(hid); return 0; } @@ -1483,25 +1506,9 @@ void usbhid_put_power(struct hid_device *hid) #ifdef CONFIG_PM static int hid_resume_common(struct hid_device *hid, bool driver_suspended) { - struct usbhid_device *usbhid = hid->driver_data; - int status; - - spin_lock_irq(&usbhid->lock); - clear_bit(HID_SUSPENDED, &usbhid->iofl); - usbhid_mark_busy(usbhid); - - if (test_bit(HID_CLEAR_HALT, &usbhid->iofl) || - test_bit(HID_RESET_PENDING, &usbhid->iofl)) - schedule_work(&usbhid->reset_work); - usbhid->retry_delay = 0; - - usbhid_restart_queues(usbhid); - spin_unlock_irq(&usbhid->lock); - - status = hid_start_in(hid); - if (status < 0) - hid_io_error(hid); + int status = 0; + hid_restart_io(hid); if (driver_suspended && hid->driver && hid->driver->resume) status = hid->driver->resume(hid); return status; @@ -1570,12 +1577,8 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message) static int hid_resume(struct usb_interface *intf) { struct hid_device *hid = usb_get_intfdata (intf); - struct usbhid_device *usbhid = hid->driver_data; int status; - if (!test_bit(HID_STARTED, &usbhid->iofl)) - return 0; - status = hid_resume_common(hid, true); dev_dbg(&intf->dev, "resume status %d\n", status); return 0; @@ -1584,10 +1587,8 @@ static int hid_resume(struct usb_interface *intf) static int hid_reset_resume(struct usb_interface *intf) { struct hid_device *hid = usb_get_intfdata(intf); - struct usbhid_device *usbhid = hid->driver_data; int status; - clear_bit(HID_SUSPENDED, &usbhid->iofl); status = hid_post_reset(intf); if (status >= 0 && hid->driver && hid->driver->reset_resume) { int ret = hid->driver->reset_resume(hid); From fc9683f24bc4dce3ac67f78f8b393f3c4159319c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 17 Feb 2016 12:26:33 -0600 Subject: [PATCH 440/797] Revert "x86/PCI: Don't alloc pcibios-irq when MSI is enabled" commit fe25d078874f2c29c38f4160467d74f5756537c9 upstream. Revert 8affb487d4a4 ("x86/PCI: Don't alloc pcibios-irq when MSI is enabled"). This is part of reverting 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()") to fix regressions it introduced. Link: https://bugzilla.kernel.org/show_bug.cgi?id=111211 Fixes: 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()") Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki CC: Jiang Liu CC: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/common.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index eccd4d99e6a4..dc78a4a9a466 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -675,14 +675,6 @@ int pcibios_add_device(struct pci_dev *dev) int pcibios_alloc_irq(struct pci_dev *dev) { - /* - * If the PCI device was already claimed by core code and has - * MSI enabled, probing of the pcibios IRQ will overwrite - * dev->irq. So bail out if MSI is already enabled. - */ - if (pci_dev_msi_enabled(dev)) - return -EBUSY; - return pcibios_enable_irq(dev); } From 2d0d0011ff48f000ec789f9b7e3378886225ec68 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 17 Feb 2016 12:26:38 -0600 Subject: [PATCH 441/797] Revert "PCI: Add helpers to manage pci_dev->irq and pci_dev->irq_managed" commit 67b4eab91caf2ad574cab1b17ae09180ea2e116e upstream. Revert 811a4e6fce09 ("PCI: Add helpers to manage pci_dev->irq and pci_dev->irq_managed"). This is part of reverting 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()") to fix regressions it introduced. Link: https://bugzilla.kernel.org/show_bug.cgi?id=111211 Fixes: 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()") Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki CC: Jiang Liu Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/intel_mid_pci.c | 4 ++-- arch/x86/pci/irq.c | 10 ++++++---- drivers/acpi/pci_irq.c | 10 ++++++---- include/linux/pci.h | 17 ----------------- 4 files changed, 14 insertions(+), 27 deletions(-) diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 0d24e7c10145..8826ff593ebc 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -215,7 +215,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) int polarity; int ret; - if (pci_has_managed_irq(dev)) + if (dev->irq_managed && dev->irq > 0) return 0; switch (intel_mid_identify_cpu()) { @@ -256,7 +256,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) static void intel_mid_pci_irq_disable(struct pci_dev *dev) { - if (pci_has_managed_irq(dev)) { + if (dev->irq_managed && dev->irq > 0) { mp_unmap_irq(dev->irq); dev->irq_managed = 0; /* diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 32e70343e6fd..72108f0b66b1 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1202,7 +1202,7 @@ static int pirq_enable_irq(struct pci_dev *dev) struct pci_dev *temp_dev; int irq; - if (pci_has_managed_irq(dev)) + if (dev->irq_managed && dev->irq > 0) return 0; irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, @@ -1230,7 +1230,8 @@ static int pirq_enable_irq(struct pci_dev *dev) } dev = temp_dev; if (irq >= 0) { - pci_set_managed_irq(dev, irq); + dev->irq_managed = 1; + dev->irq = irq; dev_info(&dev->dev, "PCI->APIC IRQ transform: " "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); return 0; @@ -1258,8 +1259,9 @@ static int pirq_enable_irq(struct pci_dev *dev) static void pirq_disable_irq(struct pci_dev *dev) { - if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) { + if (io_apic_assign_pci_irqs && dev->irq_managed && dev->irq) { mp_unmap_irq(dev->irq); - pci_reset_managed_irq(dev); + dev->irq = 0; + dev->irq_managed = 0; } } diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index c9336751e5e3..172b74df0fa7 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -409,7 +409,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) return 0; } - if (pci_has_managed_irq(dev)) + if (dev->irq_managed && dev->irq > 0) return 0; entry = acpi_pci_irq_lookup(dev, pin); @@ -454,7 +454,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev) kfree(entry); return rc; } - pci_set_managed_irq(dev, rc); + dev->irq = rc; + dev->irq_managed = 1; if (link) snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link); @@ -477,7 +478,7 @@ void acpi_pci_irq_disable(struct pci_dev *dev) u8 pin; pin = dev->pin; - if (!pin || !pci_has_managed_irq(dev)) + if (!pin || !dev->irq_managed || dev->irq <= 0) return; entry = acpi_pci_irq_lookup(dev, pin); @@ -499,6 +500,7 @@ void acpi_pci_irq_disable(struct pci_dev *dev) dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin)); if (gsi >= 0) { acpi_unregister_gsi(gsi); - pci_reset_managed_irq(dev); + dev->irq_managed = 0; + dev->irq = 0; } } diff --git a/include/linux/pci.h b/include/linux/pci.h index 4e554bfff129..e89c7ee7e803 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -989,23 +989,6 @@ static inline int pci_is_managed(struct pci_dev *pdev) return pdev->is_managed; } -static inline void pci_set_managed_irq(struct pci_dev *pdev, unsigned int irq) -{ - pdev->irq = irq; - pdev->irq_managed = 1; -} - -static inline void pci_reset_managed_irq(struct pci_dev *pdev) -{ - pdev->irq = 0; - pdev->irq_managed = 0; -} - -static inline bool pci_has_managed_irq(struct pci_dev *pdev) -{ - return pdev->irq_managed && pdev->irq > 0; -} - void pci_disable_device(struct pci_dev *dev); extern unsigned int pcibios_max_latency; From c1491657c533307ac2f341e1b7ecdf156de3f647 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 17 Feb 2016 12:26:42 -0600 Subject: [PATCH 442/797] Revert "PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6c777e8799a93e3bdb67bec622429e1b48dc90fb upstream. 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()") appeared in v4.3 and helps support IOAPIC hotplug. Олег reported that the Elcus-1553 TA1-PCI driver worked in v4.2 but not v4.3 and bisected it to 991de2e59090. Sunjin reported that the RocketRAID 272x driver worked in v4.2 but not v4.3. In both cases booting with "pci=routirq" is a workaround. I think the problem is that after 991de2e59090, we no longer call pcibios_enable_irq() for upstream bridges. Prior to 991de2e59090, when a driver called pci_enable_device(), we recursively called pcibios_enable_irq() for upstream bridges via pci_enable_bridge(). After 991de2e59090, we call pcibios_enable_irq() from pci_device_probe() instead of the pci_enable_device() path, which does *not* call pcibios_enable_irq() for upstream bridges. Revert 991de2e59090 to fix these driver regressions. Link: https://bugzilla.kernel.org/show_bug.cgi?id=111211 Fixes: 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()") Reported-and-tested-by: Олег Мороз Reported-by: Sunjin Yang Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki CC: Jiang Liu Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pci_x86.h | 2 ++ arch/x86/pci/common.c | 26 ++++++++++++++------------ arch/x86/pci/intel_mid_pci.c | 7 ++----- arch/x86/pci/irq.c | 15 ++++++++++++++- drivers/acpi/pci_irq.c | 9 ++++++++- 5 files changed, 40 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index fa1195dae425..164e3f8d3c3d 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); +extern bool mp_should_keep_irq(struct device *dev); + struct pci_raw_ops { int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val); diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index dc78a4a9a466..8fd6f44aee83 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -673,20 +673,22 @@ int pcibios_add_device(struct pci_dev *dev) return 0; } -int pcibios_alloc_irq(struct pci_dev *dev) -{ - return pcibios_enable_irq(dev); -} - -void pcibios_free_irq(struct pci_dev *dev) -{ - if (pcibios_disable_irq) - pcibios_disable_irq(dev); -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { - return pci_enable_resources(dev, mask); + int err; + + if ((err = pci_enable_resources(dev, mask)) < 0) + return err; + + if (!pci_dev_msi_enabled(dev)) + return pcibios_enable_irq(dev); + return 0; +} + +void pcibios_disable_device (struct pci_dev *dev) +{ + if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq) + pcibios_disable_irq(dev); } int pci_ext_cfg_avail(void) diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 8826ff593ebc..8b93e634af84 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -256,13 +256,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) static void intel_mid_pci_irq_disable(struct pci_dev *dev) { - if (dev->irq_managed && dev->irq > 0) { + if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed && + dev->irq > 0) { mp_unmap_irq(dev->irq); dev->irq_managed = 0; - /* - * Don't reset dev->irq here, otherwise - * intel_mid_pci_irq_enable() will fail on next call. - */ } } diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 72108f0b66b1..9bd115484745 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1257,9 +1257,22 @@ static int pirq_enable_irq(struct pci_dev *dev) return 0; } +bool mp_should_keep_irq(struct device *dev) +{ + if (dev->power.is_prepared) + return true; +#ifdef CONFIG_PM + if (dev->power.runtime_status == RPM_SUSPENDING) + return true; +#endif + + return false; +} + static void pirq_disable_irq(struct pci_dev *dev) { - if (io_apic_assign_pci_irqs && dev->irq_managed && dev->irq) { + if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) && + dev->irq_managed && dev->irq) { mp_unmap_irq(dev->irq); dev->irq = 0; dev->irq_managed = 0; diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 172b74df0fa7..8a10a7ae6a8a 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -481,6 +481,14 @@ void acpi_pci_irq_disable(struct pci_dev *dev) if (!pin || !dev->irq_managed || dev->irq <= 0) return; + /* Keep IOAPIC pin configuration when suspending */ + if (dev->dev.power.is_prepared) + return; +#ifdef CONFIG_PM + if (dev->dev.power.runtime_status == RPM_SUSPENDING) + return; +#endif + entry = acpi_pci_irq_lookup(dev, pin); if (!entry) return; @@ -501,6 +509,5 @@ void acpi_pci_irq_disable(struct pci_dev *dev) if (gsi >= 0) { acpi_unregister_gsi(gsi); dev->irq_managed = 0; - dev->irq = 0; } } From 440e9a240ca22cbed85bca3d6950cd75e1349775 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 21 Jan 2016 11:57:47 +0000 Subject: [PATCH 443/797] staging: android: ion: Set the length of the DMA sg entries in buffer commit 70bc916b2c80913753fb188d4daee50a64d21ba0 upstream. ion_buffer_create() will allocate a buffer and then create a DMA mapping for it, but it forgot to set the length of the page entries. Signed-off-by: Liviu Dudau Signed-off-by: Jon Medhurst Acked-by: Laura Abbott Cc: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index e237e9f3312d..df560216d702 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -251,8 +251,10 @@ static struct ion_buffer *ion_buffer_create(struct ion_heap *heap, * memory coming from the heaps is ready for dma, ie if it has a * cached mapping that mapping has been invalidated */ - for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents, i) + for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents, i) { sg_dma_address(sg) = sg_phys(sg); + sg_dma_len(sg) = sg->length; + } mutex_lock(&dev->buffer_lock); ion_buffer_add(dev, buffer); mutex_unlock(&dev->buffer_lock); From 0d8c1f17e86919fbc645ae9283304738476dd67c Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Mon, 16 Nov 2015 15:55:11 -0200 Subject: [PATCH 444/797] usbvision: fix crash on detecting device with invalid configuration commit fa52bd506f274b7619955917abfde355e3d19ffe upstream. The usbvision driver crashes when a specially crafted usb device with invalid number of interfaces or endpoints is detected. This fix adds checks that the device has proper configuration expected by the driver. Reported-by: Ralf Spenneberg Signed-off-by: Vladis Dronov Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/usbvision/usbvision-video.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c index b693206f66dd..d1dc1a198e3e 100644 --- a/drivers/media/usb/usbvision/usbvision-video.c +++ b/drivers/media/usb/usbvision/usbvision-video.c @@ -1463,9 +1463,23 @@ static int usbvision_probe(struct usb_interface *intf, if (usbvision_device_data[model].interface >= 0) interface = &dev->actconfig->interface[usbvision_device_data[model].interface]->altsetting[0]; - else + else if (ifnum < dev->actconfig->desc.bNumInterfaces) interface = &dev->actconfig->interface[ifnum]->altsetting[0]; + else { + dev_err(&intf->dev, "interface %d is invalid, max is %d\n", + ifnum, dev->actconfig->desc.bNumInterfaces - 1); + ret = -ENODEV; + goto err_usb; + } + + if (interface->desc.bNumEndpoints < 2) { + dev_err(&intf->dev, "interface %d has %d endpoints, but must" + " have minimum 2\n", ifnum, interface->desc.bNumEndpoints); + ret = -ENODEV; + goto err_usb; + } endpoint = &interface->endpoint[1].desc; + if (!usb_endpoint_xfer_isoc(endpoint)) { dev_err(&intf->dev, "%s: interface %d. has non-ISO endpoint!\n", __func__, ifnum); From 4b59a38da5983852008270e81140f611df6f0bfd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 20 Feb 2016 14:19:34 -0800 Subject: [PATCH 445/797] Revert "usb: hub: do not clear BOS field during reset device" commit e5bdfd50d6f76077bf8441d130c606229e100d40 upstream. This reverts commit d8f00cd685f5c8e0def8593e520a7fef12c22407. Tony writes: This upstream commit is causing an oops: d8f00cd685f5 ("usb: hub: do not clear BOS field during reset device") This patch has already been included in several -stable kernels. Here are the affected kernels: 4.5.0-rc4 (current git) 4.4.2 4.3.6 (currently in review) 4.1.18 3.18.27 3.14.61 How to reproduce the problem: Boot kernel with slub debugging enabled (otherwise memory corruption will cause random oopses later instead of immediately) Plug in USB 3.0 disk to xhci USB 3.0 port dd if=/dev/sdc of=/dev/null bs=65536 (where /dev/sdc is the USB 3.0 disk) Unplug USB cable while dd is still going Oops is immediate: Reported-by: Tony Battersby Cc: Du, Changbin Cc: Roger Quadros Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 2a274884c7ea..84df093639ac 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5392,6 +5392,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) } bos = udev->bos; + udev->bos = NULL; for (i = 0; i < SET_CONFIG_TRIES; ++i) { @@ -5484,11 +5485,8 @@ static int usb_reset_and_verify_device(struct usb_device *udev) usb_set_usb2_hardware_lpm(udev, 1); usb_unlocked_enable_lpm(udev); usb_enable_ltm(udev); - /* release the new BOS descriptor allocated by hub_port_init() */ - if (udev->bos != bos) { - usb_release_bos_descriptor(udev); - udev->bos = bos; - } + usb_release_bos_descriptor(udev); + udev->bos = bos; return 0; re_enumerate: From 8c9aef03d3b540b6885e7534a885ea25f62dd9ed Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 20 Apr 2016 15:44:02 +0900 Subject: [PATCH 446/797] Linux 4.4.8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5a493e785aca..1928fcd539cc 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 7 +SUBLEVEL = 8 EXTRAVERSION = NAME = Blurry Fish Butt From 5b616a05de88d4be0136156a26fae9da855939f6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 30 Mar 2016 08:46:31 +0800 Subject: [PATCH 447/797] block: partition: initialize percpuref before sending out KOBJ_ADD commit b30a337ca27c4f40439e4bfb290cba5f88d73bb7 upstream. The initialization of partition's percpu_ref should have been done before sending out KOBJ_ADD uevent, which may cause userspace to read partition table. So the uninitialized percpu_ref may be accessed in data path. This patch fixes this issue reported by Naveen. Reported-by: Naveen Kaje Tested-by: Naveen Kaje Fixes: 6c71013ecb7e2(block: partition: convert percpu ref) Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/partition-generic.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/partition-generic.c b/block/partition-generic.c index 746935a5973c..a241e3900bc9 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -349,15 +349,20 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_del; } + err = hd_ref_init(p); + if (err) { + if (flags & ADDPART_FLAG_WHOLEDISK) + goto out_remove_file; + goto out_del; + } + /* everything is up and running, commence */ rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk suppresses it */ if (!dev_get_uevent_suppress(ddev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); - - if (!hd_ref_init(p)) - return p; + return p; out_free_info: free_part_info(p); @@ -366,6 +371,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, out_free: kfree(p); return ERR_PTR(err); +out_remove_file: + device_remove_file(pdev, &dev_attr_whole_disk); out_del: kobject_put(p->holder_dir); device_del(pdev); From 9fed24fe30c1217c640d2b38403034c2c7fdce12 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Apr 2016 18:51:28 +0800 Subject: [PATCH 448/797] block: loop: fix filesystem corruption in case of aio/dio commit a7297a6a3a3322b054592e8e988981d2f5f29cc4 upstream. Starting from commit e36f620428(block: split bios to max possible length), block core starts to split bio in the middle of bvec. Unfortunately loop dio/aio doesn't consider this situation, and always treat 'iter.iov_offset' as zero. Then filesystem corruption is observed. This patch figures out the offset of the base bvevc via 'bio->bi_iter.bi_bvec_done' and fixes the issue by passing the offset to iov iterator. Fixes: e36f6204288088f (block: split bios to max possible length) Cc: Keith Busch Cc: Al Viro Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 423f4ca7d712..80cf8add46ff 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -488,6 +488,12 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, bio_segments(bio), blk_rq_bytes(cmd->rq)); + /* + * This bio may be started from the middle of the 'bvec' + * because of bio splitting, so offset from the bvec must + * be passed to iov iterator + */ + iter.iov_offset = bio->bi_iter.bi_bvec_done; cmd->iocb.ki_pos = pos; cmd->iocb.ki_filp = file; From adbe236b953f4537f9e5ce86d1c7ace613dec38c Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 6 Apr 2016 10:05:16 +0200 Subject: [PATCH 449/797] x86/mce: Avoid using object after free in genpool commit a3125494cff084b098c80bb36fbe2061ffed9d52 upstream. When we loop over all queued machine check error records to pass them to the registered notifiers we use llist_for_each_entry(). But the loop calls gen_pool_free() for the entry in the body of the loop - and then the iterator looks at node->next after the free. Use llist_for_each_entry_safe() instead. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Gong Chen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-edac Link: http://lkml.kernel.org/r/0205920@agluck-desk.sc.intel.com Link: http://lkml.kernel.org/r/1459929916-12852-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce-genpool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 0a850100c594..2658e2af74ec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -29,7 +29,7 @@ static char gen_pool_buf[MCE_POOLSZ]; void mce_gen_pool_process(void) { struct llist_node *head; - struct mce_evt_llist *node; + struct mce_evt_llist *node, *tmp; struct mce *mce; head = llist_del_all(&mce_event_llist); @@ -37,7 +37,7 @@ void mce_gen_pool_process(void) return; head = llist_reverse_order(head); - llist_for_each_entry(node, head, llnode) { + llist_for_each_entry_safe(node, tmp, head, llnode) { mce = &node->mce; atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); From 1c8497d2035d95e4e26bdf6cec34150bfb972776 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 30 Mar 2016 12:24:47 -0700 Subject: [PATCH 450/797] kvm: x86: do not leak guest xcr0 into host interrupt handlers commit fc5b7f3bf1e1414bd4e91db6918c85ace0c873a5 upstream. An interrupt handler that uses the fpu can kill a KVM VM, if it runs under the following conditions: - the guest's xcr0 register is loaded on the cpu - the guest's fpu context is not loaded - the host is using eagerfpu Note that the guest's xcr0 register and fpu context are not loaded as part of the atomic world switch into "guest mode". They are loaded by KVM while the cpu is still in "host mode". Usage of the fpu in interrupt context is gated by irq_fpu_usable(). The interrupt handler will look something like this: if (irq_fpu_usable()) { kernel_fpu_begin(); [... code that uses the fpu ...] kernel_fpu_end(); } As long as the guest's fpu is not loaded and the host is using eager fpu, irq_fpu_usable() returns true (interrupted_kernel_fpu_idle() returns true). The interrupt handler proceeds to use the fpu with the guest's xcr0 live. kernel_fpu_begin() saves the current fpu context. If this uses XSAVE[OPT], it may leave the xsave area in an undesirable state. According to the SDM, during XSAVE bit i of XSTATE_BV is not modified if bit i is 0 in xcr0. So it's possible that XSTATE_BV[i] == 1 and xcr0[i] == 0 following an XSAVE. kernel_fpu_end() restores the fpu context. Now if any bit i in XSTATE_BV == 1 while xcr0[i] == 0, XRSTOR generates a #GP. The fault is trapped and SIGSEGV is delivered to the current process. Only pre-4.2 kernels appear to be vulnerable to this sequence of events. Commit 653f52c ("kvm,x86: load guest FPU context more eagerly") from 4.2 forces the guest's fpu to always be loaded on eagerfpu hosts. This patch fixes the bug by keeping the host's xcr0 loaded outside of the interrupts-disabled region where KVM switches into guest mode. Suggested-by: Andy Lutomirski Signed-off-by: David Matlack [Move load after goto cancel_injection. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7eb4ebd3ebea..605cea75eb0d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -697,7 +697,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) return 1; } - kvm_put_guest_xcr0(vcpu); vcpu->arch.xcr0 = xcr0; if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) @@ -6495,8 +6494,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->prepare_guest_switch(vcpu); if (vcpu->fpu_active) kvm_load_guest_fpu(vcpu); - kvm_load_guest_xcr0(vcpu); - vcpu->mode = IN_GUEST_MODE; srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); @@ -6519,6 +6516,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } + kvm_load_guest_xcr0(vcpu); + if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); @@ -6568,6 +6567,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); + kvm_put_guest_xcr0(vcpu); + /* Interrupt is enabled by handle_external_intr() */ kvm_x86_ops->handle_external_intr(vcpu); @@ -7215,7 +7216,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) * and assume host would use all available bits. * Guest xcr0 would be loaded later. */ - kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); @@ -7224,8 +7224,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - kvm_put_guest_xcr0(vcpu); - if (!vcpu->guest_fpu_loaded) { vcpu->fpu_counter = 0; return; From 5716a93fef70b4d305e9b3afea50c3027d22cc3c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Apr 2016 09:37:22 +0100 Subject: [PATCH 451/797] KVM: arm/arm64: Handle forward time correction gracefully commit 1c5631c73fc2261a5df64a72c155cb53dcdc0c45 upstream. On a host that runs NTP, corrections can have a direct impact on the background timer that we program on the behalf of a vcpu. In particular, NTP performing a forward correction will result in a timer expiring sooner than expected from a guest point of view. Not a big deal, we kick the vcpu anyway. But on wake-up, the vcpu thread is going to perform a check to find out whether or not it should block. And at that point, the timer check is going to say "timer has not expired yet, go back to sleep". This results in the timer event being lost forever. There are multiple ways to handle this. One would be record that the timer has expired and let kvm_cpu_has_pending_timer return true in that case, but that would be fairly invasive. Another is to check for the "short sleep" condition in the hrtimer callback, and restart the timer for the remaining time when the condition is detected. This patch implements the latter, with a bit of refactoring in order to avoid too much code duplication. Reported-by: Alexander Graf Reviewed-by: Alexander Graf Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/arch_timer.c | 49 +++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index ea6064696fe4..a7b9022b5c8f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -86,6 +86,8 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu->arch.timer_cpu.armed = false; + WARN_ON(!kvm_timer_should_fire(vcpu)); + /* * If the vcpu is blocked we want to wake it up so that it will see * the timer has expired when entering the guest. @@ -93,10 +95,46 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) kvm_vcpu_kick(vcpu); } +static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) +{ + cycle_t cval, now; + + cval = vcpu->arch.timer_cpu.cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + if (now < cval) { + u64 ns; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + return ns; + } + + return 0; +} + static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) { struct arch_timer_cpu *timer; + struct kvm_vcpu *vcpu; + u64 ns; + timer = container_of(hrt, struct arch_timer_cpu, timer); + vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If we should have slept longer, restart it. + */ + ns = kvm_timer_compute_delta(vcpu); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + queue_work(wqueue, &timer->expired); return HRTIMER_NORESTART; } @@ -170,8 +208,6 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) void kvm_timer_schedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - u64 ns; - cycle_t cval, now; BUG_ON(timer_is_armed(timer)); @@ -191,14 +227,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) return; /* The timer has not yet expired, schedule a background timer */ - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - ns = cyclecounter_cyc2ns(timecounter->cc, - cval - now, - timecounter->mask, - &timecounter->frac); - timer_arm(timer, ns); + timer_arm(timer, kvm_timer_compute_delta(vcpu)); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) From 4bb48b5f95a9e40451e259e295d03cd301740440 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Tue, 8 Mar 2016 12:24:35 +0530 Subject: [PATCH 452/797] ARM: dts: AM43x-epos: Fix clk parent for synctimer commit cfe1580a6415bc37fd62d79eb8102a618f7650b2 upstream. commit 55ee7017ee31 ("arm: omap2: board-generic: use omap4_local_timer_init for AM437x") makes synctimer32k as the clocksource on AM43xx. By default the synctimer32k is clocked by 32K RTC OSC on AM43xx. But this 32K RTC OSC is not available on epos boards which makes it fail to boot. Synctimer32k can also be clocked by a peripheral PLL, so making this as clock parent for synctimer3k on epos boards. Fixes: 55ee7017ee31 ("arm: omap2: board-generic: use omap4_local_timer_init for AM437x") Reported-by: Nishanth Menon Signed-off-by: Lokesh Vutla Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am43x-epos-evm.dts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 47954ed990f8..00707aac72fc 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -792,3 +792,8 @@ &mcasp1 { tx-num-evt = <32>; rx-num-evt = <32>; }; + +&synctimer_32kclk { + assigned-clocks = <&mux_synctimer32k_ck>; + assigned-clock-parents = <&clkdiv32k_ick>; +}; From c6cf3b71df047f61b568795fe926f107806eae82 Mon Sep 17 00:00:00 2001 From: Patrick Uiterwijk Date: Tue, 29 Mar 2016 16:57:40 +0000 Subject: [PATCH 453/797] ARM: mvebu: Correct unit address for linksys commit 199831c77c50e6913e893b6bc268ba9f4a9a2bf8 upstream. The USB2 port for Armada 38x is defined to be at 58000, not at 50000. Fixes: 2d0a7addbd10 ("ARM: Kirkwood: Add support for many Synology NAS devices") Signed-off-by: Patrick Uiterwijk Acked-by: Imre Kaloz Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/armada-385-linksys.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi index 3710755c6d76..85d2c377c332 100644 --- a/arch/arm/boot/dts/armada-385-linksys.dtsi +++ b/arch/arm/boot/dts/armada-385-linksys.dtsi @@ -117,7 +117,7 @@ sata@a8000 { }; /* USB part of the eSATA/USB 2.0 port */ - usb@50000 { + usb@58000 { status = "okay"; }; From 81b5ed00246258100df11c87f118a27f0ebceba3 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 11 Mar 2016 10:12:28 -0600 Subject: [PATCH 454/797] ARM: OMAP2: Fix up interconnect barrier initialization for DRA7 commit 456e8d53482537616899a146b706eccd095404e6 upstream. The following commits: commit 3fa609755c11 ("ARM: omap2: restore OMAP4 barrier behaviour") commit f746929ffdc8 ("Revert "ARM: OMAP4: remove dead kconfig option OMAP4_ERRATA_I688"") and commit ea827ad5ffbb ("ARM: DRA7: Provide proper IO map table") came in around the same time, unfortunately this seem to have missed initializing the barrier for DRA7 platforms - omap5_map_io was reused for dra7 till it was split out by the last patch. barrier_init needs to be hence carried forward as it is valid for DRA7 family of processors as they are for OMAP5. Fixes: ea827ad5ffbb7 ("ARM: DRA7: Provide proper IO map table") Reported-by: Laurent Pinchart Reported-by: Tomi Valkeinen Cc: Russell King Signed-off-by: Nishanth Menon Reviewed-by: Laurent Pinchart Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 3eaeaca5da05..3a911d8dea8b 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -368,6 +368,7 @@ void __init omap5_map_io(void) void __init dra7xx_map_io(void) { iotable_init(dra7xx_io_desc, ARRAY_SIZE(dra7xx_io_desc)); + omap_barriers_init(); } #endif /* From 882e790b572a7dadf5323e373d0139bfbc6dce15 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Sat, 26 Mar 2016 23:08:55 -0600 Subject: [PATCH 455/797] ARM: OMAP2+: hwmod: Fix updating of sysconfig register commit 3ca4a238106dedc285193ee47f494a6584b6fd2f upstream. Commit 127500ccb766f ("ARM: OMAP2+: Only write the sysconfig on idle when necessary") talks about verification of sysconfig cache value before updating it, only during idle path. But the patch is adding the verification in the enable path. So, adding the check in a proper place as per the commit description. Not keeping this check during enable path as there is a chance of losing context and it is safe to do on idle as the context of the register will never be lost while the device is active. Signed-off-by: Lokesh Vutla Acked-by: Tero Kristo Cc: Jon Hunter Fixes: commit 127500ccb766 "ARM: OMAP2+: Only write the sysconfig on idle when necessary" [paul@pwsan.com: appears to have been caused by my own mismerge of the originally posted patch] Signed-off-by: Paul Walmsley Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/omap_hwmod.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 8e0bd5939e5a..147c90e70b2e 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -1416,9 +1416,7 @@ static void _enable_sysc(struct omap_hwmod *oh) (sf & SYSC_HAS_CLOCKACTIVITY)) _set_clockactivity(oh, oh->class->sysc->clockact, &v); - /* If the cached value is the same as the new value, skip the write */ - if (oh->_sysc_cache != v) - _write_sysconfig(v, oh); + _write_sysconfig(v, oh); /* * Set the autoidle bit only after setting the smartidle bit @@ -1481,7 +1479,9 @@ static void _idle_sysc(struct omap_hwmod *oh) _set_master_standbymode(oh, idlemode, &v); } - _write_sysconfig(v, oh); + /* If the cached value is the same as the new value, skip the write */ + if (oh->_sysc_cache != v) + _write_sysconfig(v, oh); } /** From 6905c7a4aa1ef675825bc2ab56fd965a573ffb74 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 6 Apr 2016 14:06:48 +0100 Subject: [PATCH 456/797] assoc_array: don't call compare_object() on a node commit 8d4a2ec1e0b41b0cf9a0c5cd4511da7f8e4f3de2 upstream. Changes since V1: fixed the description and added KASan warning. In assoc_array_insert_into_terminal_node(), we call the compare_object() method on all non-empty slots, even when they're not leaves, passing a pointer to an unexpected structure to compare_object(). Currently it causes an out-of-bound read access in keyring_compare_object detected by KASan (see below). The issue is easily reproduced with keyutils testsuite. Only call compare_object() when the slot is a leave. KASan warning: ================================================================== BUG: KASAN: slab-out-of-bounds in keyring_compare_object+0x213/0x240 at addr ffff880060a6f838 Read of size 8 by task keyctl/1655 ============================================================================= BUG kmalloc-192 (Not tainted): kasan: bad access detected ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in assoc_array_insert+0xfd0/0x3a60 age=69 cpu=1 pid=1647 ___slab_alloc+0x563/0x5c0 __slab_alloc+0x51/0x90 kmem_cache_alloc_trace+0x263/0x300 assoc_array_insert+0xfd0/0x3a60 __key_link_begin+0xfc/0x270 key_create_or_update+0x459/0xaf0 SyS_add_key+0x1ba/0x350 entry_SYSCALL_64_fastpath+0x12/0x76 INFO: Slab 0xffffea0001829b80 objects=16 used=8 fp=0xffff880060a6f550 flags=0x3fff8000004080 INFO: Object 0xffff880060a6f740 @offset=5952 fp=0xffff880060a6e5d1 Bytes b4 ffff880060a6f730: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f740: d1 e5 a6 60 00 88 ff ff 0e 00 00 00 00 00 00 00 ...`............ Object ffff880060a6f750: 02 cf 8e 60 00 88 ff ff 02 c0 8e 60 00 88 ff ff ...`.......`.... Object ffff880060a6f760: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f770: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f790: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7d0: 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffff880060a6f7f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ CPU: 0 PID: 1655 Comm: keyctl Tainted: G B 4.5.0-rc4-kasan+ #291 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000000 000000001b2800b4 ffff880060a179e0 ffffffff81b60491 ffff88006c802900 ffff880060a6f740 ffff880060a17a10 ffffffff815e2969 ffff88006c802900 ffffea0001829b80 ffff880060a6f740 ffff880060a6e650 Call Trace: [] dump_stack+0x85/0xc4 [] print_trailer+0xf9/0x150 [] object_err+0x34/0x40 [] kasan_report_error+0x230/0x550 [] ? keyring_get_key_chunk+0x13e/0x210 [] __asan_report_load_n_noabort+0x5d/0x70 [] ? keyring_compare_object+0x213/0x240 [] keyring_compare_object+0x213/0x240 [] assoc_array_insert+0x86c/0x3a60 [] ? assoc_array_cancel_edit+0x70/0x70 [] ? __key_link_begin+0x20d/0x270 [] __key_link_begin+0xfc/0x270 [] key_create_or_update+0x459/0xaf0 [] ? trace_hardirqs_on+0xd/0x10 [] ? key_type_lookup+0xc0/0xc0 [] ? lookup_user_key+0x13d/0xcd0 [] ? memdup_user+0x53/0x80 [] SyS_add_key+0x1ba/0x350 [] ? key_get_type_from_user.constprop.6+0xa0/0xa0 [] ? retint_user+0x18/0x23 [] ? trace_hardirqs_on_caller+0x3fe/0x580 [] ? trace_hardirqs_on_thunk+0x17/0x19 [] entry_SYSCALL_64_fastpath+0x12/0x76 Memory state around the buggy address: ffff880060a6f700: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00 ffff880060a6f780: 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc fc >ffff880060a6f800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff880060a6f880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880060a6f900: fc fc fc fc fc fc 00 00 00 00 00 00 00 00 00 00 ================================================================== Signed-off-by: Jerome Marchand Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- lib/assoc_array.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 03dd576e6773..59fd7c0b119c 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -524,7 +524,9 @@ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, free_slot = i; continue; } - if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { + if (assoc_array_ptr_is_leaf(ptr) && + ops->compare_object(assoc_array_ptr_to_leaf(ptr), + index_key)) { pr_devel("replace in slot %d\n", i); edit->leaf_p = &node->slots[i]; edit->dead_leaf = node->slots[i]; From 6a20c0a043a73e39b5cd952d7eaf7fd7831e73ac Mon Sep 17 00:00:00 2001 From: Rafal Redzimski Date: Fri, 8 Apr 2016 16:25:05 +0300 Subject: [PATCH 457/797] usb: xhci: applying XHCI_PME_STUCK_QUIRK to Intel BXT B0 host commit 0d46faca6f887a849efb07c1655b5a9f7c288b45 upstream. Broxton B0 also requires XHCI_PME_STUCK_QUIRK. Adding PCI device ID for Broxton B and adding to quirk. Signed-off-by: Rafal Redzimski Signed-off-by: Robert Dobrowolski Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index c2d65206ec6c..6a120a71ca3d 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -48,6 +48,7 @@ #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI 0xa12f #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI 0x9d2f #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI 0x0aa8 +#define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 static const char hcd_name[] = "xhci_hcd"; @@ -156,7 +157,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_ETRON && From ba7aa9a970dc12054252042e2b30e1dedcdc5968 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 8 Apr 2016 16:25:06 +0300 Subject: [PATCH 458/797] xhci: resume USB 3 roothub first commit 671ffdff5b13314b1fc65d62cf7604b873fb5dc4 upstream. Give USB3 devices a better chance to enumerate at USB 3 speeds if they are connected to a suspended host. Solves an issue with NEC uPD720200 host hanging when partially enumerating a USB3 device as USB2 after host controller runtime resume. Tested-by: Mike Murdoch Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 776d59c32bc5..0be5beaf3f85 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1103,8 +1103,8 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) /* Resume root hubs only when have pending events. */ status = readl(&xhci->op_regs->status); if (status & STS_EINT) { - usb_hcd_resume_root_hub(hcd); usb_hcd_resume_root_hub(xhci->shared_hcd); + usb_hcd_resume_root_hub(hcd); } } @@ -1119,10 +1119,10 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) /* Re-enable port polling. */ xhci_dbg(xhci, "%s: starting port polling.\n", __func__); - set_bit(HCD_FLAG_POLL_RH, &hcd->flags); - usb_hcd_poll_rh_status(hcd); set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); usb_hcd_poll_rh_status(xhci->shared_hcd); + set_bit(HCD_FLAG_POLL_RH, &hcd->flags); + usb_hcd_poll_rh_status(hcd); return retval; } From bb6adb50beb03da007c63e86866f6be81d671075 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 8 Apr 2016 16:25:09 +0300 Subject: [PATCH 459/797] usb: xhci: fix wild pointers in xhci_mem_cleanup commit 71504062a7c34838c3fccd92c447f399d3cb5797 upstream. This patch fixes some wild pointers produced by xhci_mem_cleanup. These wild pointers will cause system crash if xhci_mem_cleanup() is called twice. Reported-and-tested-by: Pengcheng Li Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index c48cbe731356..d8dbd7e5194b 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1875,6 +1875,12 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) kfree(xhci->rh_bw); kfree(xhci->ext_caps); + xhci->usb2_ports = NULL; + xhci->usb3_ports = NULL; + xhci->port_array = NULL; + xhci->rh_bw = NULL; + xhci->ext_caps = NULL; + xhci->page_size = 0; xhci->page_shift = 0; xhci->bus_state[0].bus_suspended = 0; From 0eb1e16bf9feb36441440b0bd9fb0ced0fcdfdb6 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 8 Apr 2016 16:25:10 +0300 Subject: [PATCH 460/797] xhci: fix 10 second timeout on removal of PCI hotpluggable xhci controllers commit 98d74f9ceaefc2b6c4a6440050163a83be0abede upstream. PCI hotpluggable xhci controllers such as some Alpine Ridge solutions will remove the xhci controller from the PCI bus when the last USB device is disconnected. Add a flag to indicate that the host is being removed to avoid queueing configure_endpoint commands for the dropped endpoints. For PCI hotplugged controllers this will prevent 5 second command timeouts For static xhci controllers the configure_endpoint command is not needed in the removal case as everything will be returned, freed, and the controller is reset. For now the flag is only set for PCI connected host controllers. Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 1 + drivers/usb/host/xhci-ring.c | 3 ++- drivers/usb/host/xhci.c | 8 +++++--- drivers/usb/host/xhci.h | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 6a120a71ca3d..ea4fb4b0cd44 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -301,6 +301,7 @@ static void xhci_pci_remove(struct pci_dev *dev) struct xhci_hcd *xhci; xhci = hcd_to_xhci(pci_get_drvdata(dev)); + xhci->xhc_state |= XHCI_STATE_REMOVING; if (xhci->shared_hcd) { usb_remove_hcd(xhci->shared_hcd); usb_put_hcd(xhci->shared_hcd); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index db0f0831b94f..2b63969c2bbf 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -4008,7 +4008,8 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd, int reserved_trbs = xhci->cmd_ring_reserved_trbs; int ret; - if (xhci->xhc_state) { + if ((xhci->xhc_state & XHCI_STATE_DYING) || + (xhci->xhc_state & XHCI_STATE_HALTED)) { xhci_dbg(xhci, "xHCI dying or halted, can't queue_command\n"); return -ESHUTDOWN; } diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0be5beaf3f85..ec9e758d5fcd 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -146,7 +146,8 @@ static int xhci_start(struct xhci_hcd *xhci) "waited %u microseconds.\n", XHCI_MAX_HALT_USEC); if (!ret) - xhci->xhc_state &= ~(XHCI_STATE_HALTED | XHCI_STATE_DYING); + /* clear state flags. Including dying, halted or removing */ + xhci->xhc_state = 0; return ret; } @@ -2753,7 +2754,8 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) if (ret <= 0) return ret; xhci = hcd_to_xhci(hcd); - if (xhci->xhc_state & XHCI_STATE_DYING) + if ((xhci->xhc_state & XHCI_STATE_DYING) || + (xhci->xhc_state & XHCI_STATE_REMOVING)) return -ENODEV; xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev); @@ -3800,7 +3802,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_lock(&xhci->mutex); - if (xhci->xhc_state) /* dying or halted */ + if (xhci->xhc_state) /* dying, removing or halted */ goto out; if (!udev->slot_id) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 0b9451250e33..99ac2289dbf3 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1596,6 +1596,7 @@ struct xhci_hcd { */ #define XHCI_STATE_DYING (1 << 0) #define XHCI_STATE_HALTED (1 << 1) +#define XHCI_STATE_REMOVING (1 << 2) /* Statistics */ int error_bitmask; unsigned int quirks; From 95b9219e05dafdb76b0707e815e5314cc0cf91af Mon Sep 17 00:00:00 2001 From: Robert Dobrowolski Date: Thu, 24 Mar 2016 03:30:07 -0700 Subject: [PATCH 461/797] usb: hcd: out of bounds access in for_each_companion commit e86103a75705c7c530768f4ffaba74cf382910f2 upstream. On BXT platform Host Controller and Device Controller figure as same PCI device but with different device function. HCD should not pass data to Device Controller but only to Host Controllers. Checking if companion device is Host Controller, otherwise skip. Signed-off-by: Robert Dobrowolski Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd-pci.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 9eb1cff28bd4..b8b580e5ae6e 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -74,6 +74,15 @@ static void for_each_companion(struct pci_dev *pdev, struct usb_hcd *hcd, if (companion->bus != pdev->bus || PCI_SLOT(companion->devfn) != slot) continue; + + /* + * Companion device should be either UHCI,OHCI or EHCI host + * controller, otherwise skip. + */ + if (companion->class != CL_UHCI && companion->class != CL_OHCI && + companion->class != CL_EHCI) + continue; + companion_hcd = pci_get_drvdata(companion); if (!companion_hcd || !companion_hcd->self.root_hub) continue; From e4c7ab76586146820b394e0176f286f5a2e70cb3 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 14 Apr 2016 17:01:17 +0200 Subject: [PATCH 462/797] usb: gadget: f_fs: Fix use-after-free commit 38740a5b87d53ceb89eb2c970150f6e94e00373a upstream. When using asynchronous read or write operations on the USB endpoints the issuer of the IO request is notified by calling the ki_complete() callback of the submitted kiocb when the URB has been completed. Calling this ki_complete() callback will free kiocb. Make sure that the structure is no longer accessed beyond that point, otherwise undefined behaviour might occur. Fixes: 2e4c7553cd6f ("usb: gadget: f_fs: add aio support") Signed-off-by: Lars-Peter Clausen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index cf43e9e18368..79d895c2dd71 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -646,6 +646,7 @@ static void ffs_user_copy_worker(struct work_struct *work) work); int ret = io_data->req->status ? io_data->req->status : io_data->req->actual; + bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD; if (io_data->read && ret > 0) { use_mm(io_data->mm); @@ -657,13 +658,11 @@ static void ffs_user_copy_worker(struct work_struct *work) io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); - if (io_data->ffs->ffs_eventfd && - !(io_data->kiocb->ki_flags & IOCB_EVENTFD)) + if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd) eventfd_signal(io_data->ffs->ffs_eventfd, 1); usb_ep_free_request(io_data->ep, io_data->req); - io_data->kiocb->private = NULL; if (io_data->read) kfree(io_data->to_free); kfree(io_data->buf); From 9d58f322ee18ffaca1e0b67d90ab811ad75e62a6 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 12 Apr 2016 12:14:46 -0400 Subject: [PATCH 463/797] dm cache metadata: fix READ_LOCK macros and cleanup WRITE_LOCK macros commit 9567366fefddeaea4ed1d713270535d93a3b3c76 upstream. The READ_LOCK macro was incorrectly returning -EINVAL if dm_bm_is_read_only() was true -- it will always be true once the cache metadata transitions to read-only by dm_cache_metadata_set_read_only(). Wrap READ_LOCK and WRITE_LOCK multi-statement macros in do {} while(0). Also, all accesses of the 'cmd' argument passed to these related macros are now encapsulated in parenthesis. A follow-up patch can be developed to eliminate the use of macros in favor of pure C code. Avoiding that now given that this needs to apply to stable@. Reported-by: Ben Hutchings Signed-off-by: Mike Snitzer Fixes: d14fcf3dd79 ("dm cache: make sure every metadata function checks fail_io") Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 64 +++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 27f2ef300f8b..65ce6985f87a 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -867,39 +867,55 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, return 0; } -#define WRITE_LOCK(cmd) \ - down_write(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_write(&cmd->root_lock); \ - return -EINVAL; \ +static bool cmd_write_lock(struct dm_cache_metadata *cmd) +{ + down_write(&cmd->root_lock); + if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { + up_write(&cmd->root_lock); + return false; } + return true; +} -#define WRITE_LOCK_VOID(cmd) \ - down_write(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_write(&cmd->root_lock); \ - return; \ - } +#define WRITE_LOCK(cmd) \ + do { \ + if (!cmd_write_lock((cmd))) \ + return -EINVAL; \ + } while(0) + +#define WRITE_LOCK_VOID(cmd) \ + do { \ + if (!cmd_write_lock((cmd))) \ + return; \ + } while(0) #define WRITE_UNLOCK(cmd) \ - up_write(&cmd->root_lock) + up_write(&(cmd)->root_lock) -#define READ_LOCK(cmd) \ - down_read(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_read(&cmd->root_lock); \ - return -EINVAL; \ +static bool cmd_read_lock(struct dm_cache_metadata *cmd) +{ + down_write(&cmd->root_lock); + if (cmd->fail_io) { + up_write(&cmd->root_lock); + return false; } + return true; +} -#define READ_LOCK_VOID(cmd) \ - down_read(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_read(&cmd->root_lock); \ - return; \ - } +#define READ_LOCK(cmd) \ + do { \ + if (!cmd_read_lock((cmd))) \ + return -EINVAL; \ + } while(0) + +#define READ_LOCK_VOID(cmd) \ + do { \ + if (!cmd_read_lock((cmd))) \ + return; \ + } while(0) #define READ_UNLOCK(cmd) \ - up_read(&cmd->root_lock) + up_read(&(cmd)->root_lock) int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) { From be5cbaf31cd318f8aaeeff901f6d27232dfa965f Mon Sep 17 00:00:00 2001 From: Ahmed Samy Date: Sun, 17 Apr 2016 05:37:09 +0000 Subject: [PATCH 464/797] dm cache metadata: fix cmd_read_lock() acquiring write lock commit 6545b60baaf880b0cd29a5e89dbe745a06027e89 upstream. Commit 9567366fefdd ("dm cache metadata: fix READ_LOCK macros and cleanup WRITE_LOCK macros") uses down_write() instead of down_read() in cmd_read_lock(), yet up_read() is used to release the lock in READ_UNLOCK(). Fix it. Fixes: 9567366fefdd ("dm cache metadata: fix READ_LOCK macros and cleanup WRITE_LOCK macros") Signed-off-by: Ahmed Samy Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 65ce6985f87a..3970cda10080 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -894,9 +894,9 @@ static bool cmd_write_lock(struct dm_cache_metadata *cmd) static bool cmd_read_lock(struct dm_cache_metadata *cmd) { - down_write(&cmd->root_lock); + down_read(&cmd->root_lock); if (cmd->fail_io) { - up_write(&cmd->root_lock); + up_read(&cmd->root_lock); return false; } return true; From ed643d220692bfc2bfec9fe618d102f13a5dae9f Mon Sep 17 00:00:00 2001 From: Rui Salvaterra Date: Sat, 9 Apr 2016 22:05:34 +0100 Subject: [PATCH 465/797] lib: lz4: fixed zram with lz4 on big endian machines commit 3e26a691fe3fe1e02a76e5bab0c143ace4b137b4 upstream. Based on Sergey's test patch [1], this fixes zram with lz4 compression on big endian cpus. Note that the 64-bit preprocessor test is not a cleanup, it's part of the fix, since those identifiers are bogus (for example, __ppc64__ isn't defined anywhere else in the kernel, which means we'd fall into the 32-bit definitions on ppc64). Tested on ppc64 with no regression on x86_64. [1] http://marc.info/?l=linux-kernel&m=145994470805853&w=4 Suggested-by: Sergey Senozhatsky Signed-off-by: Rui Salvaterra Reviewed-by: Sergey Senozhatsky Signed-off-by: Greg Kroah-Hartman --- lib/lz4/lz4defs.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index abcecdc2d0f2..0710a62ad2f6 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -11,8 +11,7 @@ /* * Detects 64 bits mode */ -#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \ - || defined(__ppc64__) || defined(__LP64__)) +#if defined(CONFIG_64BIT) #define LZ4_ARCH64 1 #else #define LZ4_ARCH64 0 @@ -35,6 +34,10 @@ typedef struct _U64_S { u64 v; } U64_S; #define PUT4(s, d) (A32(d) = A32(s)) #define PUT8(s, d) (A64(d) = A64(s)) + +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - A16(p)) + #define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ do { \ A16(p) = v; \ @@ -51,10 +54,13 @@ typedef struct _U64_S { u64 v; } U64_S; #define PUT8(s, d) \ put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) -#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ - do { \ - put_unaligned(v, (u16 *)(p)); \ - p += 2; \ +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - get_unaligned_le16(p)) + +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + put_unaligned_le16(v, (u16 *)(p)); \ + p += 2; \ } while (0) #endif @@ -140,9 +146,6 @@ typedef struct _U64_S { u64 v; } U64_S; #endif -#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ - (d = s - get_unaligned_le16(p)) - #define LZ4_WILDCOPY(s, d, e) \ do { \ LZ4_COPYPACKET(s, d); \ From 9b3bd581a0492bdfe788539ca65a14da570faad1 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Wed, 9 Mar 2016 09:18:07 -0600 Subject: [PATCH 466/797] debugfs: Make automount point inodes permanently empty commit 87243deb88671f70def4c52dfa7ca7830707bd31 upstream. Starting with 4.1 the tracing subsystem has its own filesystem which is automounted in the tracing subdirectory of debugfs. Prior to this debugfs could be bind mounted in a cloned mount namespace, but if tracefs has been mounted under debugfs this now fails because there is a locked child mount. This creates a regression for container software which bind mounts debugfs to satisfy the assumption of some userspace software. In other pseudo filesystems such as proc and sysfs we're already creating mountpoints like this in such a way that no dirents can be created in the directories, allowing them to be exceptions to some MNT_LOCKED tests. In fact we're already do this for the tracefs mountpoint in sysfs. Do the same in debugfs_create_automount(), since the intention here is clearly to create a mountpoint. This fixes the regression, as locked child mounts on permanently empty directories do not cause a bind mount to fail. Signed-off-by: Seth Forshee Acked-by: Serge Hallyn Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b7fcc0de0b2f..0f5d05bf2131 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -457,7 +457,7 @@ struct dentry *debugfs_create_automount(const char *name, if (unlikely(!inode)) return failed_creating(dentry); - inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + make_empty_dir_inode(inode); inode->i_flags |= S_AUTOMOUNT; inode->i_private = data; dentry->d_fsdata = (void *)f; From 42e6f01a44fe4aab28819b5efa48fbe9da3059e5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 8 Apr 2016 16:22:17 +0300 Subject: [PATCH 467/797] dmaengine: dw: fix master selection commit 3fe6409c23e2bee4b2b1b6d671d2da8daa15271c upstream. The commit 895005202987 ("dmaengine: dw: apply both HS interfaces and remove slave_id usage") cleaned up the code to avoid usage of depricated slave_id member of generic slave configuration. Meanwhile it broke the master selection by removing important call to dwc_set_masters() in ->device_alloc_chan_resources() which copied masters from custom slave configuration to the internal channel structure. Everything works until now since there is no customized connection of DesignWare DMA IP to the bus, i.e. one bus and one or more masters are in use. The configurations where 2 masters are connected to the different masters are not working anymore. We are expecting one user of such configuration and need to select masters properly. Besides that it is obviously a performance regression since only one master is in use in multi-master configuration. Select masters in accordance with what user asked for. Keep this patch in a form more suitable for back porting. We are safe to take necessary data in ->device_alloc_chan_resources() because we don't support generic slave configuration embedded into custom one, and thus the only way to provide such is to use the parameter to a filter function which is called exactly before channel resource allocation. While here, replase BUG_ON to less noisy dev_warn() and prevent channel allocation in case of error. Fixes: 895005202987 ("dmaengine: dw: apply both HS interfaces and remove slave_id usage") Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dw/core.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 4f099ea29f83..c66133b5e852 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -130,26 +130,14 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) static void dwc_initialize(struct dw_dma_chan *dwc) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); - struct dw_dma_slave *dws = dwc->chan.private; u32 cfghi = DWC_CFGH_FIFO_MODE; u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); if (dwc->initialized == true) return; - if (dws) { - /* - * We need controller-specific data to set up slave - * transfers. - */ - BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev); - - cfghi |= DWC_CFGH_DST_PER(dws->dst_id); - cfghi |= DWC_CFGH_SRC_PER(dws->src_id); - } else { - cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); - cfghi |= DWC_CFGH_SRC_PER(dwc->src_id); - } + cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); + cfghi |= DWC_CFGH_SRC_PER(dwc->src_id); channel_writel(dwc, CFG_LO, cfglo); channel_writel(dwc, CFG_HI, cfghi); @@ -936,7 +924,7 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma_slave *dws = param; - if (!dws || dws->dma_dev != chan->device->dev) + if (dws->dma_dev != chan->device->dev) return false; /* We have to copy data since dws can be temporary storage */ @@ -1160,6 +1148,14 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) * doesn't mean what you think it means), and status writeback. */ + /* + * We need controller-specific data to set up slave transfers. + */ + if (chan->private && !dw_dma_filter(chan, chan->private)) { + dev_warn(chan2dev(chan), "Wrong controller-specific data\n"); + return -EINVAL; + } + /* Enable controller here if needed */ if (!dw->in_use) dw_dma_on(dw); @@ -1221,6 +1217,14 @@ static void dwc_free_chan_resources(struct dma_chan *chan) spin_lock_irqsave(&dwc->lock, flags); list_splice_init(&dwc->free_list, &list); dwc->descs_allocated = 0; + + /* Clear custom channel configuration */ + dwc->src_id = 0; + dwc->dst_id = 0; + + dwc->src_master = 0; + dwc->dst_master = 0; + dwc->initialized = false; /* Disable interrupts */ From 34c1b030296c6815c05e416c7a647b68e695004a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 18 Mar 2016 14:26:32 +0200 Subject: [PATCH 468/797] dmaengine: hsu: correct use of channel status register commit 4f4bc0abff79dc9d7ccbd3143adbf8ad1f4fe6ab upstream. There is a typo in documentation regarding to descriptor empty bit (DESCE) which is set to 1 when descriptor is empty. Thus, status register at the end of a transfer usually returns all DESCE bits set and thus it will never be zero. Moreover, there are 2 bits (CDESC) that encode current descriptor, on which interrupt has been asserted. In case when we have few descriptors programmed we might have non-zero value. Remove DESCE and CDESC bits from DMA channel status register (HSU_CH_SR) when reading it. Fixes: 2b49e0c56741 ("dmaengine: append hsu DMA driver") Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/hsu/hsu.c | 2 +- drivers/dma/hsu/hsu.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index 823ad728aecf..efc02b98e6ba 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -135,7 +135,7 @@ static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc) sr = hsu_chan_readl(hsuc, HSU_CH_SR); spin_unlock_irqrestore(&hsuc->vchan.lock, flags); - return sr; + return sr & ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY); } irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr) diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index f06579c6d548..26da2865b025 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -41,6 +41,9 @@ #define HSU_CH_SR_DESCTO(x) BIT(8 + (x)) #define HSU_CH_SR_DESCTO_ANY (BIT(11) | BIT(10) | BIT(9) | BIT(8)) #define HSU_CH_SR_CHE BIT(15) +#define HSU_CH_SR_DESCE(x) BIT(16 + (x)) +#define HSU_CH_SR_DESCE_ANY (BIT(19) | BIT(18) | BIT(17) | BIT(16)) +#define HSU_CH_SR_CDESC_ANY (BIT(31) | BIT(30)) /* Bits in HSU_CH_CR */ #define HSU_CH_CR_CHA BIT(0) From 94d75e190f199dfce1094496927418cb00810683 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Mon, 15 Feb 2016 21:57:48 +0100 Subject: [PATCH 469/797] dmaengine: pxa_dma: fix the maximum requestor line commit 6bab1c6afdca0371cfa957079b36b78d12dd2cf5 upstream. The current number of requestor lines is limited to 31. This was an error of a previous commit, as this number is platform dependent, and is actually : - for pxa25x: 40 requestor lines - for pxa27x: 75 requestor lines - for pxa3xx: 100 requestor lines The previous testing did not reveal the faulty constant as on pxa[23]xx platforms, only camera, MSL and USB are above requestor 32, and in these only the camera has a driver using dma. Fixes: e87ffbdf0697 ("dmaengine: pxa_dma: fix the no-requestor case") Signed-off-by: Robert Jarzmik Acked-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pxa_dma.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index a59061e4221a..55f5d33f6dc7 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -122,6 +122,7 @@ struct pxad_chan { struct pxad_device { struct dma_device slave; int nr_chans; + int nr_requestors; void __iomem *base; struct pxad_phy *phys; spinlock_t phy_lock; /* Phy association */ @@ -473,7 +474,7 @@ static void pxad_free_phy(struct pxad_chan *chan) return; /* clear the channel mapping in DRCMR */ - if (chan->drcmr <= DRCMR_CHLNUM) { + if (chan->drcmr <= pdev->nr_requestors) { reg = pxad_drcmr(chan->drcmr); writel_relaxed(0, chan->phy->base + reg); } @@ -509,6 +510,7 @@ static bool is_running_chan_misaligned(struct pxad_chan *chan) static void phy_enable(struct pxad_phy *phy, bool misaligned) { + struct pxad_device *pdev; u32 reg, dalgn; if (!phy->vchan) @@ -518,7 +520,8 @@ static void phy_enable(struct pxad_phy *phy, bool misaligned) "%s(); phy=%p(%d) misaligned=%d\n", __func__, phy, phy->idx, misaligned); - if (phy->vchan->drcmr <= DRCMR_CHLNUM) { + pdev = to_pxad_dev(phy->vchan->vc.chan.device); + if (phy->vchan->drcmr <= pdev->nr_requestors) { reg = pxad_drcmr(phy->vchan->drcmr); writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg); } @@ -914,6 +917,7 @@ static void pxad_get_config(struct pxad_chan *chan, { u32 maxburst = 0, dev_addr = 0; enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED; + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); *dcmd = 0; if (dir == DMA_DEV_TO_MEM) { @@ -922,7 +926,7 @@ static void pxad_get_config(struct pxad_chan *chan, dev_addr = chan->cfg.src_addr; *dev_src = dev_addr; *dcmd |= PXA_DCMD_INCTRGADDR; - if (chan->drcmr <= DRCMR_CHLNUM) + if (chan->drcmr <= pdev->nr_requestors) *dcmd |= PXA_DCMD_FLOWSRC; } if (dir == DMA_MEM_TO_DEV) { @@ -931,7 +935,7 @@ static void pxad_get_config(struct pxad_chan *chan, dev_addr = chan->cfg.dst_addr; *dev_dst = dev_addr; *dcmd |= PXA_DCMD_INCSRCADDR; - if (chan->drcmr <= DRCMR_CHLNUM) + if (chan->drcmr <= pdev->nr_requestors) *dcmd |= PXA_DCMD_FLOWTRG; } if (dir == DMA_MEM_TO_MEM) @@ -1341,13 +1345,15 @@ static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec, static int pxad_init_dmadev(struct platform_device *op, struct pxad_device *pdev, - unsigned int nr_phy_chans) + unsigned int nr_phy_chans, + unsigned int nr_requestors) { int ret; unsigned int i; struct pxad_chan *c; pdev->nr_chans = nr_phy_chans; + pdev->nr_requestors = nr_requestors; INIT_LIST_HEAD(&pdev->slave.channels); pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources; pdev->slave.device_free_chan_resources = pxad_free_chan_resources; @@ -1382,7 +1388,7 @@ static int pxad_probe(struct platform_device *op) const struct of_device_id *of_id; struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev); struct resource *iores; - int ret, dma_channels = 0; + int ret, dma_channels = 0, nb_requestors = 0; const enum dma_slave_buswidth widths = DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | DMA_SLAVE_BUSWIDTH_4_BYTES; @@ -1399,13 +1405,23 @@ static int pxad_probe(struct platform_device *op) return PTR_ERR(pdev->base); of_id = of_match_device(pxad_dt_ids, &op->dev); - if (of_id) + if (of_id) { of_property_read_u32(op->dev.of_node, "#dma-channels", &dma_channels); - else if (pdata && pdata->dma_channels) + ret = of_property_read_u32(op->dev.of_node, "#dma-requests", + &nb_requestors); + if (ret) { + dev_warn(pdev->slave.dev, + "#dma-requests set to default 32 as missing in OF: %d", + ret); + nb_requestors = 32; + }; + } else if (pdata && pdata->dma_channels) { dma_channels = pdata->dma_channels; - else + nb_requestors = pdata->nb_requestors; + } else { dma_channels = 32; /* default 32 channel */ + } dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask); dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask); @@ -1422,7 +1438,7 @@ static int pxad_probe(struct platform_device *op) pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; pdev->slave.dev = &op->dev; - ret = pxad_init_dmadev(op, pdev, dma_channels); + ret = pxad_init_dmadev(op, pdev, dma_channels, nb_requestors); if (ret) { dev_err(pdev->slave.dev, "unable to register\n"); return ret; @@ -1441,7 +1457,8 @@ static int pxad_probe(struct platform_device *op) platform_set_drvdata(op, pdev); pxad_init_debugfs(pdev); - dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels); + dev_info(pdev->slave.dev, "initialized %d channels on %d requestors\n", + dma_channels, nb_requestors); return 0; } From c0944355a74bc9c2b5b3cc5b627efe0c73e30bd9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Mar 2016 16:22:45 +0100 Subject: [PATCH 470/797] sched/cgroup: Fix/cleanup cgroup teardown/init commit 2f5177f0fd7e531b26d54633be62d1d4cb94621c upstream. The CPU controller hasn't kept up with the various changes in the whole cgroup initialization / destruction sequence, and commit: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups") caused it to explode. The reason for this is that zombies do not inhibit css_offline() from being called, but do stall css_released(). Now we tear down the cfs_rq structures on css_offline() but zombies can run after that, leading to use-after-free issues. The solution is to move the tear-down to css_released(), which guarantees nobody (including no zombies) is still using our cgroup. Furthermore, a few simple cleanups are possible too. There doesn't appear to be any point to us using css_online() (anymore?) so fold that in css_alloc(). And since cgroup code guarantees an RCU grace period between css_released() and css_free() we can forgo using call_rcu() and free the stuff immediately. Suggested-by: Tejun Heo Reported-by: Kazuki Yamaguchi Reported-by: Niklas Cassel Tested-by: Niklas Cassel Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups") Link: http://lkml.kernel.org/r/20160316152245.GY6344@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 70e5e09341f1..55bebf924946 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7693,7 +7693,7 @@ void set_curr_task(int cpu, struct task_struct *p) /* task_group_lock serializes the addition/removal of task groups */ static DEFINE_SPINLOCK(task_group_lock); -static void free_sched_group(struct task_group *tg) +static void sched_free_group(struct task_group *tg) { free_fair_sched_group(tg); free_rt_sched_group(tg); @@ -7719,7 +7719,7 @@ struct task_group *sched_create_group(struct task_group *parent) return tg; err: - free_sched_group(tg); + sched_free_group(tg); return ERR_PTR(-ENOMEM); } @@ -7739,17 +7739,16 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) } /* rcu callback to free various structures associated with a task group */ -static void free_sched_group_rcu(struct rcu_head *rhp) +static void sched_free_group_rcu(struct rcu_head *rhp) { /* now it should be safe to free those cfs_rqs */ - free_sched_group(container_of(rhp, struct task_group, rcu)); + sched_free_group(container_of(rhp, struct task_group, rcu)); } -/* Destroy runqueue etc associated with a task group */ void sched_destroy_group(struct task_group *tg) { /* wait for possible concurrent references to cfs_rqs complete */ - call_rcu(&tg->rcu, free_sched_group_rcu); + call_rcu(&tg->rcu, sched_free_group_rcu); } void sched_offline_group(struct task_group *tg) @@ -8210,31 +8209,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (IS_ERR(tg)) return ERR_PTR(-ENOMEM); + sched_online_group(tg, parent); + return &tg->css; } -static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) +static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); - struct task_group *parent = css_tg(css->parent); - if (parent) - sched_online_group(tg, parent); - return 0; + sched_offline_group(tg); } static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); - sched_destroy_group(tg); -} - -static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) -{ - struct task_group *tg = css_tg(css); - - sched_offline_group(tg); + /* + * Relies on the RCU grace period between css_released() and this. + */ + sched_free_group(tg); } static void cpu_cgroup_fork(struct task_struct *task, void *private) @@ -8594,9 +8588,8 @@ static struct cftype cpu_files[] = { struct cgroup_subsys cpu_cgrp_subsys = { .css_alloc = cpu_cgroup_css_alloc, + .css_released = cpu_cgroup_css_released, .css_free = cpu_cgroup_css_free, - .css_online = cpu_cgroup_css_online, - .css_offline = cpu_cgroup_css_offline, .fork = cpu_cgroup_fork, .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, From bdb0618ad1b9ea6ec6926450c687d133ccddf28c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 7 Jan 2016 16:07:20 +0000 Subject: [PATCH 471/797] arm64: Honour !PTE_WRITE in set_pte_at() for kernel mappings commit ac15bd63bbb24238f763ec5b24ee175ec301e8cd upstream. Currently, set_pte_at() only checks the software PTE_WRITE bit for user mappings when it sets or clears the hardware PTE_RDONLY accordingly. The kernel ptes are written directly without any modification, relying solely on the protection bits in macros like PAGE_KERNEL. However, modifying kernel pte attributes via pte_wrprotect() would be ignored by set_pte_at(). Since pte_wrprotect() does not set PTE_RDONLY (it only clears PTE_WRITE), the new permission is not taken into account. This patch changes set_pte_at() to adjust the read-only permission for kernel ptes as well. As a side effect, existing PROT_* definitions used for kernel ioremap*() need to include PTE_DIRTY | PTE_WRITE. (additionally, white space fix for PTE_KERNEL_ROX) Acked-by: Andrey Ryabinin Tested-by: Ard Biesheuvel Signed-off-by: Catalin Marinas Reported-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index eaa9cabf4066..298474933ef3 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -69,11 +69,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -83,7 +83,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) @@ -155,6 +155,7 @@ extern struct page *empty_zero_page; #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) @@ -165,8 +166,6 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) @@ -264,13 +263,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - if (pte_valid_user(pte)) { - if (!pte_special(pte) && pte_exec(pte)) - __sync_icache_dcache(pte, addr); + if (pte_valid(pte)) { if (pte_sw_dirty(pte) && pte_write(pte)) pte_val(pte) &= ~PTE_RDONLY; else pte_val(pte) |= PTE_RDONLY; + if (pte_user(pte) && pte_exec(pte) && !pte_special(pte)) + __sync_icache_dcache(pte, addr); } /* From 70d65587f0a82f50952cb29af133a5b6b8538611 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 9 Mar 2016 16:31:29 +0000 Subject: [PATCH 472/797] arm64: Update PTE_RDONLY in set_pte_at() for PROT_NONE permission commit fdc69e7df3cb24f18a93192641786e5b7ecd1dfe upstream. The set_pte_at() function must update the hardware PTE_RDONLY bit depending on the state of the PTE_WRITE and PTE_DIRTY bits of the given entry value. However, it currently only performs this for pte_valid() entries, ignoring PTE_PROT_NONE. The side-effect is that PROT_NONE mappings would not have the PTE_RDONLY bit set. Without CONFIG_ARM64_HW_AFDBM, this is not an issue since such PROT_NONE pages are not accessible anyway. With commit 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits"), the ptep_set_wrprotect() function was re-written to cope with automatic hardware updates of the dirty state. As an optimisation, only PTE_RDONLY is checked to assess the "dirty" status. Since set_pte_at() does not set this bit for PROT_NONE mappings, such pages may be considered "dirty" as a result of ptep_set_wrprotect(). This patch updates the pte_valid() check to pte_present() in set_pte_at(). It also adds PTE_PROT_NONE to the swap entry bits comment. Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") Signed-off-by: Catalin Marinas Reported-by: Ganapatrao Kulkarni Tested-by: Ganapatrao Kulkarni Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 298474933ef3..c63868ae9a4a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -263,7 +263,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - if (pte_valid(pte)) { + if (pte_present(pte)) { if (pte_sw_dirty(pte) && pte_write(pte)) pte_val(pte) &= ~PTE_RDONLY; else @@ -640,6 +640,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; * bits 0-1: present (must be zero) * bits 2-7: swap type * bits 8-57: swap offset + * bit 58: PTE_PROT_NONE (must be zero) */ #define __SWP_TYPE_SHIFT 2 #define __SWP_TYPE_BITS 6 From 27b3cc048a5275c53e26c15ffcab3fcf9a03cda0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 21 Apr 2016 00:27:04 -0600 Subject: [PATCH 473/797] x86/mm/xen: Suppress hugetlbfs in PV guests commit 103f6112f253017d7062cd74d17f4a514ed4485c upstream. Huge pages are not normally available to PV guests. Not suppressing hugetlbfs use results in an endless loop of page faults when user mode code tries to access a hugetlbfs mapped area (since the hypervisor denies such PTEs to be created, but error indications can't be propagated out of xen_set_pte_at(), just like for various of its siblings), and - once killed in an oops like this: kernel BUG at .../fs/hugetlbfs/inode.c:428! invalid opcode: 0000 [#1] SMP ... RIP: e030:[] [] remove_inode_hugepages+0x25b/0x320 ... Call Trace: [] hugetlbfs_evict_inode+0x15/0x40 [] evict+0xbd/0x1b0 [] __dentry_kill+0x19a/0x1f0 [] dput+0x1fe/0x220 [] __fput+0x155/0x200 [] task_work_run+0x60/0xa0 [] do_exit+0x160/0x400 [] do_group_exit+0x3b/0xa0 [] get_signal+0x1ed/0x470 [] do_signal+0x14/0x110 [] prepare_exit_to_usermode+0xe9/0xf0 [] retint_user+0x8/0x13 This is CVE-2016-3961 / XSA-174. Reported-by: Vitaly Kuznetsov Signed-off-by: Jan Beulich Cc: Andrew Morton Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Vrabel Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: xen-devel Link: http://lkml.kernel.org/r/57188ED802000078000E431C@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/hugetlb.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index f8a29d2c97b0..e6a8613fbfb0 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -4,6 +4,7 @@ #include #include +#define hugepages_supported() cpu_has_pse static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, From 5582eb00f5b2362234cccf542232101db61ffc8b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 14 Apr 2016 10:21:52 -0700 Subject: [PATCH 474/797] x86 EDAC, sb_edac.c: Repair damage introduced when "fixing" channel address commit ff15e95c82768d589957dbb17d7eb7dba7904659 upstream. In commit: eb1af3b71f9d ("Fix computation of channel address") I switched the "sck_way" variable from holding the log2 value read from the h/w to instead be the actual number. Unfortunately it is needed in log2 form when used to shift the address. Tested-by: Patrick Geary Signed-off-by: Tony Luck Acked-by: Mauro Carvalho Chehab Cc: Aristeu Rozanski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-edac@vger.kernel.org Fixes: eb1af3b71f9d ("Fix computation of channel address") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/edac/sb_edac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index cbee3179ec08..90c3fe99c786 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1396,7 +1396,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, } ch_way = TAD_CH(reg) + 1; - sck_way = 1 << TAD_SOCK(reg); + sck_way = TAD_SOCK(reg); if (ch_way == 3) idx = addr >> 6; @@ -1435,7 +1435,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, switch(ch_way) { case 2: case 4: - sck_xch = 1 << sck_way * (ch_way >> 1); + sck_xch = (1 << sck_way) * (ch_way >> 1); break; default: sprintf(msg, "Invalid mirror set. Can't decode addr"); @@ -1471,7 +1471,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, ch_addr = addr - offset; ch_addr >>= (6 + shiftup); - ch_addr /= ch_way * sck_way; + ch_addr /= sck_xch; ch_addr <<= (6 + shiftup); ch_addr |= addr & ((1 << (6 + shiftup)) - 1); From 66b7be5743d88c9b8fa69e5ba7d06d33d14de8c7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 17 Apr 2016 09:39:41 +0200 Subject: [PATCH 475/797] ALSA: hda - Don't trust the reported actual power state commit 50fd4987c4f3c3ebf0ce94d932732011bbdc7c71 upstream. We've got a regression report that the recording on Mac with a cirrus codec doesn't work any longer. This turned out to be the missing power up to D0 by power_save_node enablement. After analyzing the traces, we found out that the culprit is that the codec advertises the "actual" power state of a few nodes to be D0 while the "target" power state is D3. This inconsistency is usually OK, as it implies the power transition. But in the case of cirrus codec, this seems to be stuck to D3 while it's not actually D0. This patch addresses the issue by checking the power state difference more strictly. It sends the power-state change verb unless both the target and the actual power states show the given value. We may introduce yet another flag indicating the possible broken hardware power state, but it's anyway safer to set the proper power state even in a transition (at least it's harmless as long as the target state is same). So this simpler change was applied now. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116171 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_generic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 5c4fa8eba1d0..367dbf0d285e 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -843,7 +843,7 @@ static hda_nid_t path_power_update(struct hda_codec *codec, bool allow_powerdown) { hda_nid_t nid, changed = 0; - int i, state; + int i, state, power; for (i = 0; i < path->depth; i++) { nid = path->path[i]; @@ -855,7 +855,9 @@ static hda_nid_t path_power_update(struct hda_codec *codec, state = AC_PWRST_D0; else state = AC_PWRST_D3; - if (!snd_hda_check_power_state(codec, nid, state)) { + power = snd_hda_codec_read(codec, nid, 0, + AC_VERB_GET_POWER_STATE, 0); + if (power != (state | (state << 4))) { snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_POWER_STATE, state); changed = nid; From a38ae6bb1473a02c5e7501fefe2a5ff42ad736c5 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Mon, 18 Apr 2016 11:10:42 +0200 Subject: [PATCH 476/797] ALSA: hda/realtek - Add ALC3234 headset mode for Optiplex 9020m commit afecb146d8d8a60a1dde9cdf570c278649617fde upstream. The Optiplex 9020m with Haswell-DT processor needs a quirk for the headset jack at the front of the machine to be able to use microphones. A quirk for this model was originally added in 3127899, but c77900e removed it in favour of a more generic version. Unfortunately, pin configurations can changed based on firmware/BIOS versions, and the generic version doesn't have any effect on newer versions of the machine/firmware anymore. With help from David Henningsson Signed-off-by: Bastien Nocera Tested-by: Bastien Nocera Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1402ba954b3d..810bceee4fd2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5449,6 +5449,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13), + SND_PCI_QUIRK(0x1028, 0x0669, "Dell Optiplex 9020m", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069a, "Dell Vostro 5480", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), From 0df9987a2ec6bc37440b9c4fa176f360039a8b8e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 19 Apr 2016 22:07:50 +0200 Subject: [PATCH 477/797] ALSA: hda - Keep powering up ADCs on Cirrus codecs commit de3df8a986b635082a1d94bae2c361d043c57106 upstream. Although one weird behavior about the input path (inconsistent D0/D3 switch) on Cirrus CS420x codecs was fixed in the previous commit, there is still an issue on some Mac machines: the capture stream stalls when switching the ADCs on the fly. More badly, this keeps stuck until the next reboot. The dynamic ADC switching is already a bit fragile and assuming optimistically that the chip accepts the frequent power changes. On Cirrus codecs, this doesn't seem applicable. As a quick workaround, we pin down the ADCs to keep up in D0 when spec->dyn_adc_switch is set. In this way, the ADCs are kept up only for the system that were confirmed to be broken. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116171 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_cirrus.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index a47e8ae0eb30..80bbadc83721 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -361,6 +361,7 @@ static int cs_parse_auto_config(struct hda_codec *codec) { struct cs_spec *spec = codec->spec; int err; + int i; err = snd_hda_parse_pin_defcfg(codec, &spec->gen.autocfg, NULL, 0); if (err < 0) @@ -370,6 +371,19 @@ static int cs_parse_auto_config(struct hda_codec *codec) if (err < 0) return err; + /* keep the ADCs powered up when it's dynamically switchable */ + if (spec->gen.dyn_adc_switch) { + unsigned int done = 0; + for (i = 0; i < spec->gen.input_mux.num_items; i++) { + int idx = spec->gen.dyn_adc_idx[i]; + if (done & (1 << idx)) + continue; + snd_hda_gen_fix_pin_power(codec, + spec->gen.adc_nids[idx]); + done |= 1 << idx; + } + } + return 0; } From 79dc55bd02a8dc0b6adf7598c4f8a7356594c363 Mon Sep 17 00:00:00 2001 From: "Lu, Han" Date: Wed, 20 Apr 2016 10:08:43 +0800 Subject: [PATCH 478/797] ALSA: hda - add PCI ID for Intel Broxton-T commit 9859a971ca228725425238756ee89c6133306ec8 upstream. Add HD Audio Device PCI ID for the Intel Broxton-T platform. It is an HDA Intel PCH controller. Signed-off-by: Lu, Han Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 2ff692dd2c5f..411630e9c034 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2207,6 +2207,9 @@ static const struct pci_device_id azx_ids[] = { /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, + /* Broxton-T */ + { PCI_DEVICE(0x8086, 0x1a98), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, /* Haswell */ { PCI_DEVICE(0x8086, 0x0a0c), .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL }, From 519aef523513a58f958e0aa432855e7b2a57a611 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Apr 2016 17:37:54 +0200 Subject: [PATCH 479/797] ALSA: pcxhr: Fix missing mutex unlock commit 67f3754b51f22b18c4820fb84062f658c30e8644 upstream. The commit [9bef72bdb26e: ALSA: pcxhr: Use nonatomic PCM ops] converted to non-atomic PCM ops, but shamelessly with an unbalanced mutex locking, which leads to the hangup easily. Fix it. Fixes: 9bef72bdb26e ('ALSA: pcxhr: Use nonatomic PCM ops') Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116441 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/pcxhr/pcxhr_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/pcxhr/pcxhr_core.c b/sound/pci/pcxhr/pcxhr_core.c index c5194f5b150a..d7e71f309299 100644 --- a/sound/pci/pcxhr/pcxhr_core.c +++ b/sound/pci/pcxhr/pcxhr_core.c @@ -1341,5 +1341,6 @@ irqreturn_t pcxhr_threaded_irq(int irq, void *dev_id) } pcxhr_msg_thread(mgr); + mutex_unlock(&mgr->lock); return IRQ_HANDLED; } From 8dd069c221e299db24ea5937c6130433109d6499 Mon Sep 17 00:00:00 2001 From: Conrad Kostecki Date: Tue, 26 Apr 2016 10:08:10 +0200 Subject: [PATCH 480/797] ALSA: hda - Add dock support for ThinkPad X260 commit 037e119738120c1cdc460c6ae33871c3000531f3 upstream. Fixes audio output on a ThinkPad X260, when using Lenovo CES 2013 docking station series (basic, pro, ultra). Signed-off-by: Conrad Kostecki Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 810bceee4fd2..ac4490a96863 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5584,6 +5584,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x5034, "Thinkpad T450", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), From 4fad26a279caad671287e502e9b6d2487a56b270 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Thu, 14 Apr 2016 15:36:03 +0200 Subject: [PATCH 481/797] asm-generic/futex: Re-enable preemption in futex_atomic_cmpxchg_inatomic() commit fba7cd681b6155e2d93e7862fcd6f970336b83c3 upstream. The recent decoupling of pagefault disable and preempt disable added an explicit preempt_disable/enable() pair to the futex_atomic_cmpxchg_inatomic() implementation in asm-generic/futex.h. But it forgot to add preempt_enable() calls to the error handling code pathes, which results in a preemption count imbalance. This is observable on boot when the test for atomic_cmpxchg() is calling futex_atomic_cmpxchg_inatomic() on a NULL pointer. Add the missing preempt_enable() calls to the error handling code pathes. [ tglx: Massaged changelog ] Fixes: d9b9ff8c1889 ("sched/preempt, futex: Disable preemption in UP futex_atomic_cmpxchg_inatomic() explicitly") Signed-off-by: Romain Perier Cc: linux-arch@vger.kernel.org Cc: Thomas Petazzoni Cc: Arnd Bergmann Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1460640963-690-1-git-send-email-romain.perier@free-electrons.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/futex.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index e56272c919b5..bf2d34c9d804 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -108,11 +108,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 val; preempt_disable(); - if (unlikely(get_user(val, uaddr) != 0)) + if (unlikely(get_user(val, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } - if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) + if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } *uval = val; preempt_enable(); From 61fc0ae42c498f8eb782733065d93da6817d28b4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Apr 2016 14:35:39 +0200 Subject: [PATCH 482/797] futex: Handle unlock_pi race gracefully commit 89e9e66ba1b3bde9d8ea90566c2aee20697ad681 upstream. If userspace calls UNLOCK_PI unconditionally without trying the TID -> 0 transition in user space first then the user space value might not have the waiters bit set. This opens the following race: CPU0 CPU1 uval = get_user(futex) lock(hb) lock(hb) futex |= FUTEX_WAITERS .... unlock(hb) cmpxchg(futex, uval, newval) So the cmpxchg fails and returns -EINVAL to user space, which is wrong because the futex value is valid. To handle this (yes, yet another) corner case gracefully, check for a flag change and retry. [ tglx: Massaged changelog and slightly reworked implementation ] Fixes: ccf9e6a80d9e ("futex: Make unlock_pi more robust") Signed-off-by: Sebastian Andrzej Siewior Cc: Davidlohr Bueso Cc: Darren Hart Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1460723739-5195-1-git-send-email-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 461c72b2dac2..eaa3a8dfd345 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1244,10 +1244,20 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, if (unlikely(should_fail_futex(true))) ret = -EFAULT; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { ret = -EFAULT; - else if (curval != uval) - ret = -EINVAL; + } else if (curval != uval) { + /* + * If a unconditional UNLOCK_PI operation (user space did not + * try the TID->0 transition) raced with a waiter setting the + * FUTEX_WAITERS flag between get_user() and locking the hash + * bucket lock, retry the operation. + */ + if ((FUTEX_TID_MASK & curval) == uval) + ret = -EAGAIN; + else + ret = -EINVAL; + } if (ret) { raw_spin_unlock(&pi_state->pi_mutex.wait_lock); return ret; @@ -2537,6 +2547,15 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) */ if (ret == -EFAULT) goto pi_faulted; + /* + * A unconditional UNLOCK_PI op raced against a waiter + * setting the FUTEX_WAITERS bit. Try again. + */ + if (ret == -EAGAIN) { + spin_unlock(&hb->lock); + put_futex_key(&key); + goto retry; + } /* * wake_futex_pi has detected invalid state. Tell user * space. From ad4b209d192624e8587f4988171d624346913ddd Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 20 Apr 2016 20:09:24 -0700 Subject: [PATCH 483/797] futex: Acknowledge a new waiter in counter before plist commit fe1bce9e2107ba3a8faffe572483b6974201a0e6 upstream. Otherwise an incoming waker on the dest hash bucket can miss the waiter adding itself to the plist during the lockless check optimization (small window but still the correct way of doing this); similarly to the decrement counterpart. Suggested-by: Peter Zijlstra Signed-off-by: Davidlohr Bueso Cc: Davidlohr Bueso Cc: bigeasy@linutronix.de Cc: dvhart@infradead.org Link: http://lkml.kernel.org/r/1461208164-29150-1-git-send-email-dave@stgolabs.net Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index eaa3a8dfd345..9d8163afd87c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1484,8 +1484,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, if (likely(&hb1->chain != &hb2->chain)) { plist_del(&q->list, &hb1->chain); hb_waiters_dec(hb1); - plist_add(&q->list, &hb2->chain); hb_waiters_inc(hb2); + plist_add(&q->list, &hb2->chain); q->lock_ptr = &hb2->lock; } get_futex_key_refs(key2); From 06e38eaf1a24332b15748f33039d5bf15799c5cb Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 6 Mar 2016 16:06:06 -0500 Subject: [PATCH 484/797] drm/nouveau/core: use vzalloc for allocating ramht commit 78a121d82da8aff3aca2a6a1c40f5061081760f0 upstream. Most calls to nvkm_ramht_new use 0x8000 as the size. This results in a fairly sizeable chunk of memory to be allocated, which may not be available with kzalloc. Since this is done fairly rarely (once per channel), use vzalloc instead. Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Cc: Sven Joachim Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/core/ramht.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c index 3216e157a8a0..89da47234016 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c @@ -131,7 +131,7 @@ nvkm_ramht_del(struct nvkm_ramht **pramht) struct nvkm_ramht *ramht = *pramht; if (ramht) { nvkm_gpuobj_del(&ramht->gpuobj); - kfree(*pramht); + vfree(*pramht); *pramht = NULL; } } @@ -143,8 +143,8 @@ nvkm_ramht_new(struct nvkm_device *device, u32 size, u32 align, struct nvkm_ramht *ramht; int ret, i; - if (!(ramht = *pramht = kzalloc(sizeof(*ramht) + (size >> 3) * - sizeof(*ramht->data), GFP_KERNEL))) + if (!(ramht = *pramht = vzalloc(sizeof(*ramht) + + (size >> 3) * sizeof(*ramht->data)))) return -ENOMEM; ramht->device = device; From 20fd4b1bbfbea603fff1d756b39cffc67048aec5 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 18 Nov 2015 11:17:25 +0000 Subject: [PATCH 485/797] drm/qxl: fix cursor position with non-zero hotspot commit d59a1f71ff1aeda4b4630df92d3ad4e3b1dfc885 upstream. The SPICE protocol considers the position of a cursor to be the location of its active pixel on the display, so the cursor is drawn with its top-left corner at "(x - hot_spot_x, y - hot_spot_y)" but the DRM cursor position gives the location where the top-left corner should be drawn, with the hotspot being a hint for drivers that need it. This fixes the location of the window resize cursors when using Fluxbox with the QXL DRM driver and both the QXL and modesetting X drivers. Signed-off-by: John Keeping Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1447845445-2116-1-git-send-email-john@metanate.com Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_display.c | 13 +++++++++---- drivers/gpu/drm/qxl/qxl_drv.h | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 183aea1abebc..5edebf495c07 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -375,10 +375,15 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, qxl_bo_kunmap(user_bo); + qcrtc->cur_x += qcrtc->hot_spot_x - hot_x; + qcrtc->cur_y += qcrtc->hot_spot_y - hot_y; + qcrtc->hot_spot_x = hot_x; + qcrtc->hot_spot_y = hot_y; + cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_SET; - cmd->u.set.position.x = qcrtc->cur_x; - cmd->u.set.position.y = qcrtc->cur_y; + cmd->u.set.position.x = qcrtc->cur_x + qcrtc->hot_spot_x; + cmd->u.set.position.y = qcrtc->cur_y + qcrtc->hot_spot_y; cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0); @@ -441,8 +446,8 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc, cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_MOVE; - cmd->u.position.x = qcrtc->cur_x; - cmd->u.position.y = qcrtc->cur_y; + cmd->u.position.x = qcrtc->cur_x + qcrtc->hot_spot_x; + cmd->u.position.y = qcrtc->cur_y + qcrtc->hot_spot_y; qxl_release_unmap(qdev, release, &cmd->release_info); qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 01a86948eb8c..3ab90179e9ab 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -135,6 +135,8 @@ struct qxl_crtc { int index; int cur_x; int cur_y; + int hot_spot_x; + int hot_spot_y; }; struct qxl_output { From 67df493c5557c6f34f33c4a3d52784d7ba46312a Mon Sep 17 00:00:00 2001 From: Lyude Date: Wed, 16 Mar 2016 15:18:04 -0400 Subject: [PATCH 486/797] drm/i915: Fix race condition in intel_dp_destroy_mst_connector() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9e60290dbafdf577766e5fc5f2fdb3be450cf9a6 upstream. After unplugging a DP MST display from the system, we have to go through and destroy all of the DRM connectors associated with it since none of them are valid anymore. Unfortunately, intel_dp_destroy_mst_connector() doesn't do a good enough job of ensuring that throughout the destruction process that no modesettings can be done with the connectors. As it is right now, intel_dp_destroy_mst_connector() works like this: * Take all modeset locks * Clear the configuration of the crtc on the connector, if there is one * Drop all modeset locks, this is required because of circular dependency issues that arise with trying to remove the connector from sysfs with modeset locks held * Unregister the connector * Take all modeset locks, again * Do the rest of the required cleaning for destroying the connector * Finally drop all modeset locks for good This only works sometimes. During the destruction process, it's very possible that a userspace application will attempt to do a modesetting using the connector. When we drop the modeset locks, an ioctl handler such as drm_mode_setcrtc has the oppurtunity to take all of the modeset locks from us. When this happens, one thing leads to another and eventually we end up committing a mode with the non-existent connector: [drm:intel_dp_link_training_clock_recovery [i915]] *ERROR* failed to enable link training [drm:intel_dp_aux_ch] dp_aux_ch timeout status 0x7cf0001f [drm:intel_dp_start_link_train [i915]] *ERROR* failed to start channel equalization [drm:intel_dp_aux_ch] dp_aux_ch timeout status 0x7cf0001f [drm:intel_mst_pre_enable_dp [i915]] *ERROR* failed to allocate vcpi And in some cases, such as with the T460s using an MST dock, this results in breaking modesetting and/or panicking the system. To work around this, we now unregister the connector at the very beginning of intel_dp_destroy_mst_connector(), grab all the modesetting locks, and then hold them until we finish the rest of the function. Signed-off-by: Lyude Signed-off-by: Rob Clark Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1458155884-13877-1-git-send-email-cpaul@redhat.com (cherry picked from commit 1f7717552ef1306be3b7ed28c66c6eff550e3a23) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp_mst.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 0639275fc471..06bd9257acdc 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -477,6 +477,8 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_device *dev = connector->dev; + intel_connector->unregister(intel_connector); + /* need to nuke the connector */ drm_modeset_lock_all(dev); if (connector->state->crtc) { @@ -490,11 +492,7 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, WARN(ret, "Disabling mst crtc failed with %i\n", ret); } - drm_modeset_unlock_all(dev); - intel_connector->unregister(intel_connector); - - drm_modeset_lock_all(dev); intel_connector_remove_from_fbdev(intel_connector); drm_connector_cleanup(connector); drm_modeset_unlock_all(dev); From 67fb098f6f23ebab7b47ae517c161032dc161cd9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 18 Apr 2016 11:19:19 -0400 Subject: [PATCH 487/797] Revert "drm/radeon: disable runtime pm on PX laptops without dGPU power control" commit bfaddd9fc8ac048b99475f000dbef6f08297417f upstream. This reverts commit e64c952efb8e0c15ae82cec8e455ab4910690ef1. ATPX is the ACPI method for controlling AMD PowerXpress laptops. There are flags to indicate which methods are supported. If the dGPU power down flag is not supported, the driver needs to implement the dGPU power down manually. We had previously always forced the driver to assume the ATPX dGPU power down was present, but this causes problems on boards where it is not, leading to GPU hangs when attempting to power down the dGPU. Manual dGPU power down is not currently supported in the Linux driver. Some laptops indicate that the ATPX dGPU power down method is not present, but it actually apparently is. I'm not sure if this is a bios bug and it should be set or if there is a reason it was unset and the method should not be used. This is not an issue on other OSes since both the ATPX and the manual driver power down methods are supported. This is apparently fairly widespread, so just revert for now. bugs: https://bugzilla.kernel.org/show_bug.cgi?id=115321 https://bugzilla.kernel.org/show_bug.cgi?id=116581 https://bugzilla.kernel.org/show_bug.cgi?id=116251 Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 8 ++++---- drivers/gpu/drm/radeon/radeon_device.c | 8 +------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 9bc408c9f9f6..c4b4f298a283 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -62,10 +62,6 @@ bool radeon_has_atpx(void) { return radeon_atpx_priv.atpx_detected; } -bool radeon_has_atpx_dgpu_power_cntl(void) { - return radeon_atpx_priv.atpx.functions.power_cntl; -} - /** * radeon_atpx_call - call an ATPX method * @@ -145,6 +141,10 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas */ static int radeon_atpx_validate(struct radeon_atpx *atpx) { + /* make sure required functions are enabled */ + /* dGPU power control is required */ + atpx->functions.power_cntl = true; + if (atpx->functions.px_params) { union acpi_object *info; struct atpx_px_params output; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index f78f111e68de..c566993a2ec3 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -103,12 +103,6 @@ static const char radeon_family_name[][16] = { "LAST", }; -#if defined(CONFIG_VGA_SWITCHEROO) -bool radeon_has_atpx_dgpu_power_cntl(void); -#else -static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; } -#endif - #define RADEON_PX_QUIRK_DISABLE_PX (1 << 0) #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1) @@ -1439,7 +1433,7 @@ int radeon_device_init(struct radeon_device *rdev, * ignore it */ vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode); - if ((rdev->flags & RADEON_IS_PX) && radeon_has_atpx_dgpu_power_cntl()) + if (rdev->flags & RADEON_IS_PX) runtime = true; vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, runtime); if (runtime) From 54aeb5854ec03315a721268b8c207fcdcd7f298f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 25 Apr 2016 13:12:18 -0400 Subject: [PATCH 488/797] Revert "drm/amdgpu: disable runtime pm on PX laptops without dGPU power control" commit e9bef455af8eb0e837e179aab8988ae2649fd8d3 upstream. This reverts commit bedf2a65c1aa8fb29ba8527fd00c0f68ec1f55f1. See the radeon revert for an extended description. Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 8ac49812a716..5a8fbadbd27b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -63,10 +63,6 @@ bool amdgpu_has_atpx(void) { return amdgpu_atpx_priv.atpx_detected; } -bool amdgpu_has_atpx_dgpu_power_cntl(void) { - return amdgpu_atpx_priv.atpx.functions.power_cntl; -} - /** * amdgpu_atpx_call - call an ATPX method * @@ -146,6 +142,10 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas */ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) { + /* make sure required functions are enabled */ + /* dGPU power control is required */ + atpx->functions.power_cntl = true; + if (atpx->functions.px_params) { union acpi_object *info; struct atpx_px_params output; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9d88023df836..c961fe093e12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -61,12 +61,6 @@ static const char *amdgpu_asic_name[] = { "LAST", }; -#if defined(CONFIG_VGA_SWITCHEROO) -bool amdgpu_has_atpx_dgpu_power_cntl(void); -#else -static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; } -#endif - bool amdgpu_device_is_px(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; @@ -1475,7 +1469,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (amdgpu_runtime_pm == 1) runtime = true; - if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl()) + if (amdgpu_device_is_px(ddev)) runtime = true; vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime); if (runtime) From 61fe67520c4394c90f688c61c5c16dd63824cd42 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 22 Apr 2016 19:53:59 -0700 Subject: [PATCH 489/797] cpufreq: intel_pstate: Fix processing for turbo activation ratio commit 1becf03545a0859ceaaf9e8c2d9861882a71cb01 upstream. When the config TDP level is not nominal (level = 0), the MSR values for reading level 1 and level 2 ratios contain power in low 14 bits and actual ratio bits are at bits [23:16]. The current processing for level 1 and level 2 is wrong as there is no shift done to get actual ratio. Fixes: 6a35fc2d6c22 (cpufreq: intel_pstate: get P1 from TAR when available) Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/intel_pstate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 98fb8821382d..f53b02a6bc05 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -667,6 +667,11 @@ static int core_get_max_pstate(void) if (err) goto skip_tar; + /* For level 1 and 2, bits[23:16] contain the ratio */ + if (tdp_ctrl) + tdp_ratio >>= 16; + + tdp_ratio &= 0xff; /* ratios are only 8 bits long */ if (tdp_ratio - 1 == tar) { max_pstate = tar; pr_debug("max_pstate=TAC %x\n", max_pstate); From d22ac3a9403bc2a60662ec117dc83f72564d61f9 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 31 Mar 2016 11:48:31 +0200 Subject: [PATCH 490/797] s390/pci: add extra padding to function measurement block commit 9d89d9e61d361f3adb75e1aebe4bb367faf16cfa upstream. Newer machines might use a different (larger) format for function measurement blocks. To ensure that we comply with the alignment requirement on these machines and prevent memory corruption (when firmware writes more data than we expect) add 16 padding bytes at the end of the fmb. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/pci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 2b2ced9dc00a..6dafabb6ae1a 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -45,7 +45,8 @@ struct zpci_fmb { u64 rpcit_ops; u64 dma_rbytes; u64 dma_wbytes; -} __packed __aligned(64); + u64 pad[2]; +} __packed __aligned(128); enum zpci_state { ZPCI_FN_STATE_RESERVED, From 0dec867402c0ce4eee7ca0055a99d634fe32a72b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 10 Mar 2016 13:07:17 +0200 Subject: [PATCH 491/797] iwlwifi: pcie: lower the debug level for RSA semaphore access commit 9fc515bc9e735c10cd327f05c20f5ef69474188d upstream. IWL_INFO is not an error but still printed by default. "can't access the RSA semaphore it is write protected" seems worrisome but it is not really a problem. Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/trans.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 8c7204738aa3..00e0332e2544 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -731,8 +731,8 @@ static int iwl_pcie_rsa_race_bug_wa(struct iwl_trans *trans) */ val = iwl_read_prph(trans, PREG_AUX_BUS_WPROT_0); if (val & (BIT(1) | BIT(17))) { - IWL_INFO(trans, - "can't access the RSA semaphore it is write protected\n"); + IWL_DEBUG_INFO(trans, + "can't access the RSA semaphore it is write protected\n"); return 0; } From 72b847aa95584d9e0718c9e3ee38a627bbb24c17 Mon Sep 17 00:00:00 2001 From: Matti Gottlieb Date: Tue, 15 Mar 2016 13:46:47 +0200 Subject: [PATCH 492/797] iwlwifi: mvm: fix memory leak in paging commit 7fdf9663261cc77a516396fec82cee8a8ea07e76 upstream. Currently paging download buffer is freed during the the unloading of the opmode which happens when the driver is unloaded. This causes a memory leak since the paging download buffer is allocated every time we enable the interface, so the download buffer can be allocated many times, but only be freed once. Free paging download buffer during disabling of the interface. Signed-off-by: Matti Gottlieb Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 2 ++ drivers/net/wireless/iwlwifi/mvm/ops.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index e88afac51c5d..f96ab2f4b90e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1557,6 +1557,8 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm) /* the fw is stopped, the aux sta is dead: clean up driver state */ iwl_mvm_del_aux_sta(mvm); + iwl_free_fw_paging(mvm); + /* * Clear IN_HW_RESTART flag when stopping the hw (as restart_complete() * won't be called in this case). diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index c3adf2bcdc85..13c97f665ba8 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -645,8 +645,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode) for (i = 0; i < NVM_MAX_NUM_SECTIONS; i++) kfree(mvm->nvm_sections[i].data); - iwl_free_fw_paging(mvm); - iwl_mvm_tof_clean(mvm); ieee80211_free_hw(mvm->hw); From 1575fcd167e3452cadbcf7d04ad6277c875a482f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 13 Apr 2016 10:52:25 -0500 Subject: [PATCH 493/797] crypto: ccp - Prevent information leakage on export commit f709b45ec461b548c41a00044dba1f1b572783bf upstream. Prevent information from leaking to userspace by doing a memset to 0 of the export state structure before setting the structure values and copying it. This prevents un-initialized padding areas from being copied into the export area. Reported-by: Ben Hutchings Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 3 +++ drivers/crypto/ccp/ccp-crypto-sha.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index 3d9acc53d247..60fc0fa26fd3 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -225,6 +225,9 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out) struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); struct ccp_aes_cmac_exp_ctx state; + /* Don't let anything leak to 'out' */ + memset(&state, 0, sizeof(state)); + state.null_msg = rctx->null_msg; memcpy(state.iv, rctx->iv, sizeof(state.iv)); state.buf_count = rctx->buf_count; diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 8ef06fad8b14..ab9945f2cb7a 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -212,6 +212,9 @@ static int ccp_sha_export(struct ahash_request *req, void *out) struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); struct ccp_sha_exp_ctx state; + /* Don't let anything leak to 'out' */ + memset(&state, 0, sizeof(state)); + state.type = rctx->type; state.msg_bits = rctx->msg_bits; state.first = rctx->first; From 9a10dfc8bf95270073c6c2e2be3de26a652d730a Mon Sep 17 00:00:00 2001 From: Xiaodong Liu Date: Tue, 12 Apr 2016 09:45:51 +0000 Subject: [PATCH 494/797] crypto: sha1-mb - use corrcet pointer while completing jobs commit 0851561d9c965df086ef8a53f981f5f95a57c2c8 upstream. In sha_complete_job, incorrect mcryptd_hash_request_ctx pointer is used when check and complete other jobs. If the memory of first completed req is freed, while still completing other jobs in the func, kernel will crash since NULL pointer is assigned to RIP. Signed-off-by: Xiaodong Liu Acked-by: Tim Chen Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/sha-mb/sha1_mb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index a841e9765bd6..8381c09d2870 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -453,10 +453,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); else { local_bh_disable(); - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); local_bh_enable(); } } From 0dedb763d08e6fa0da9c1ee14fb5d8522a85e846 Mon Sep 17 00:00:00 2001 From: Jonas Eymann Date: Tue, 19 Apr 2016 20:33:47 +0300 Subject: [PATCH 495/797] crypto: talitos - fix crash in talitos_cra_init() commit 89d124cb61b39900959e2839ac06b6339b6a54cb upstream. Conversion of talitos driver to the new AEAD interface hasn't been properly tested. AEAD algorithms crash in talitos_cra_init as follows: [...] [ 1.141095] talitos ffe30000.crypto: hwrng [ 1.145381] Unable to handle kernel paging request for data at address 0x00000058 [ 1.152913] Faulting instruction address: 0xc02accc0 [ 1.157910] Oops: Kernel access of bad area, sig: 11 [#1] [ 1.163315] SMP NR_CPUS=2 P1020 RDB [ 1.166810] Modules linked in: [ 1.169875] CPU: 0 PID: 1007 Comm: cryptomgr_test Not tainted 4.4.6 #1 [ 1.176415] task: db5ec200 ti: db4d6000 task.ti: db4d6000 [ 1.181821] NIP: c02accc0 LR: c02acd18 CTR: c02acd04 [ 1.186793] REGS: db4d7d30 TRAP: 0300 Not tainted (4.4.6) [ 1.192457] MSR: 00029000 CR: 95009359 XER: e0000000 [ 1.198585] DEAR: 00000058 ESR: 00000000 GPR00: c017bdc0 db4d7de0 db5ec200 df424b48 00000000 00000000 df424bfc db75a600 GPR08: df424b48 00000000 db75a628 db4d6000 00000149 00000000 c0044cac db5acda0 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000400 df424940 GPR24: df424900 00003083 00000400 c0180000 db75a640 c03e9f84 df424b40 df424b48 [ 1.230978] NIP [c02accc0] talitos_cra_init+0x28/0x6c [ 1.236039] LR [c02acd18] talitos_cra_init_aead+0x14/0x28 [ 1.241443] Call Trace: [ 1.243894] [db4d7de0] [c03e9f84] 0xc03e9f84 (unreliable) [ 1.249322] [db4d7df0] [c017bdc0] crypto_create_tfm+0x5c/0xf0 [ 1.255083] [db4d7e10] [c017beec] crypto_alloc_tfm+0x98/0xf8 [ 1.260769] [db4d7e40] [c0186a20] alg_test_aead+0x28/0xc8 [ 1.266181] [db4d7e60] [c0186718] alg_test+0x260/0x2e0 [ 1.271333] [db4d7ee0] [c0183860] cryptomgr_test+0x30/0x54 [ 1.276843] [db4d7ef0] [c0044d80] kthread+0xd4/0xd8 [ 1.281741] [db4d7f40] [c000e4a4] ret_from_kernel_thread+0x5c/0x64 [ 1.287930] Instruction dump: [ 1.290902] 38600000 4e800020 81230028 7c681b78 81490010 38e9ffc0 3929ffe8 554a073e [ 1.298691] 2b8a000a 7d474f9e 812a0008 91230030 <80e90058> 39270060 7c0004ac 7cc04828 Fixes: aeb4c132f33d ("crypto: talitos - Convert to new AEAD interface") Signed-off-by: Jonas Eymann Signed-off-by: Greg Kroah-Hartman Fix typo - replaced parameter of __crypto_ahash_alg(): s/tfm/alg Remove checkpatch warnings. Add commit message. Signed-off-by: Horia Geant? Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 41 ++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index b6f9f42e2985..79dff3b2dfb7 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -2519,21 +2519,11 @@ struct talitos_crypto_alg { struct talitos_alg_template algt; }; -static int talitos_cra_init(struct crypto_tfm *tfm) +static int talitos_init_common(struct talitos_ctx *ctx, + struct talitos_crypto_alg *talitos_alg) { - struct crypto_alg *alg = tfm->__crt_alg; - struct talitos_crypto_alg *talitos_alg; - struct talitos_ctx *ctx = crypto_tfm_ctx(tfm); struct talitos_private *priv; - if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH) - talitos_alg = container_of(__crypto_ahash_alg(alg), - struct talitos_crypto_alg, - algt.alg.hash); - else - talitos_alg = container_of(alg, struct talitos_crypto_alg, - algt.alg.crypto); - /* update context with ptr to dev */ ctx->dev = talitos_alg->dev; @@ -2551,10 +2541,33 @@ static int talitos_cra_init(struct crypto_tfm *tfm) return 0; } +static int talitos_cra_init(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct talitos_crypto_alg *talitos_alg; + struct talitos_ctx *ctx = crypto_tfm_ctx(tfm); + + if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH) + talitos_alg = container_of(__crypto_ahash_alg(alg), + struct talitos_crypto_alg, + algt.alg.hash); + else + talitos_alg = container_of(alg, struct talitos_crypto_alg, + algt.alg.crypto); + + return talitos_init_common(ctx, talitos_alg); +} + static int talitos_cra_init_aead(struct crypto_aead *tfm) { - talitos_cra_init(crypto_aead_tfm(tfm)); - return 0; + struct aead_alg *alg = crypto_aead_alg(tfm); + struct talitos_crypto_alg *talitos_alg; + struct talitos_ctx *ctx = crypto_aead_ctx(tfm); + + talitos_alg = container_of(alg, struct talitos_crypto_alg, + algt.alg.aead); + + return talitos_init_common(ctx, talitos_alg); } static int talitos_cra_init_ahash(struct crypto_tfm *tfm) From cd7803563938ce36988e6bc494b8f6610c1537af Mon Sep 17 00:00:00 2001 From: Horia Geant? Date: Tue, 19 Apr 2016 20:33:48 +0300 Subject: [PATCH 496/797] crypto: talitos - fix AEAD tcrypt tests commit 340ff60ae93a5db2b2be6f38868df9a1293b6007 upstream. After conversion to new AEAD interface, tcrypt tests fail as follows: [...] [ 1.145414] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-aes-talitos [ 1.153564] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67 [ 1.160041] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 1.166509] 00000020: 00 00 00 00 [...] Fix them by providing the correct cipher in & cipher out pointers, i.e. must skip over associated data in src and dst S/G. While here, fix a problem with the HW S/G table index usage: tbl_off must be updated after the pointer to the table entries is set. Fixes: aeb4c132f33d ("crypto: talitos - Convert to new AEAD interface") Reported-by: Jonas Eymann Signed-off-by: Horia Geant? Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 46 ++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 79dff3b2dfb7..a04fea4d0063 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -63,6 +63,14 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr, ptr->eptr = upper_32_bits(dma_addr); } +static void copy_talitos_ptr(struct talitos_ptr *dst_ptr, + struct talitos_ptr *src_ptr, bool is_sec1) +{ + dst_ptr->ptr = src_ptr->ptr; + if (!is_sec1) + dst_ptr->eptr = src_ptr->eptr; +} + static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned int len, bool is_sec1) { @@ -1083,21 +1091,20 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1, (areq->src == areq->dst) ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); - /* hmac data */ desc->ptr[1].len = cpu_to_be16(areq->assoclen); if (sg_count > 1 && (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0, areq->assoclen, &edesc->link_tbl[tbl_off])) > 1) { - tbl_off += ret; - to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * sizeof(struct talitos_ptr), 0); desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); + + tbl_off += ret; } else { to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0); desc->ptr[1].j_extent = 0; @@ -1126,11 +1133,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) sg_link_tbl_len += authsize; - if (sg_count > 1 && - (ret = sg_to_link_tbl_offset(areq->src, sg_count, areq->assoclen, - sg_link_tbl_len, - &edesc->link_tbl[tbl_off])) > 1) { - tbl_off += ret; + if (sg_count == 1) { + to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src) + + areq->assoclen, 0); + } else if ((ret = sg_to_link_tbl_offset(areq->src, sg_count, + areq->assoclen, sg_link_tbl_len, + &edesc->link_tbl[tbl_off])) > + 1) { desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + tbl_off * @@ -1138,8 +1147,10 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); - } else - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src), 0); + tbl_off += ret; + } else { + copy_talitos_ptr(&desc->ptr[4], &edesc->link_tbl[tbl_off], 0); + } /* cipher out */ desc->ptr[5].len = cpu_to_be16(cryptlen); @@ -1151,11 +1162,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, edesc->icv_ool = false; - if (sg_count > 1 && - (sg_count = sg_to_link_tbl_offset(areq->dst, sg_count, + if (sg_count == 1) { + to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst) + + areq->assoclen, 0); + } else if ((sg_count = + sg_to_link_tbl_offset(areq->dst, sg_count, areq->assoclen, cryptlen, - &edesc->link_tbl[tbl_off])) > - 1) { + &edesc->link_tbl[tbl_off])) > 1) { struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl + @@ -1178,8 +1191,9 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, edesc->dma_len, DMA_BIDIRECTIONAL); edesc->icv_ool = true; - } else - to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst), 0); + } else { + copy_talitos_ptr(&desc->ptr[5], &edesc->link_tbl[tbl_off], 0); + } /* iv out */ map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, From 53f3e26b3d09ae40318877b74e0d6c1af767a07f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 15 Apr 2016 12:06:13 +1000 Subject: [PATCH 497/797] powerpc: scan_features() updates incorrect bits for REAL_LE commit 6997e57d693b07289694239e52a10d2f02c3a46f upstream. The REAL_LE feature entry in the ibm_pa_feature struct is missing an MMU feature value, meaning all the remaining elements initialise the wrong values. This means instead of checking for byte 5, bit 0, we check for byte 0, bit 0, and then we incorrectly set the CPU feature bit as well as MMU feature bit 1 and CPU user feature bits 0 and 2 (5). Checking byte 0 bit 0 (IBM numbering), means we're looking at the "Memory Management Unit (MMU)" feature - ie. does the CPU have an MMU. In practice that bit is set on all platforms which have the property. This means we set CPU_FTR_REAL_LE always. In practice that seems not to matter because all the modern cpus which have this property also implement REAL_LE, and we've never needed to disable it. We're also incorrectly setting MMU feature bit 1, which is: #define MMU_FTR_TYPE_8xx 0x00000002 Luckily the only place that looks for MMU_FTR_TYPE_8xx is in Book3E code, which can't run on the same cpus as scan_features(). So this also doesn't matter in practice. Finally in the CPU user feature mask, we're setting bits 0 and 2. Bit 2 is not currently used, and bit 0 is: #define PPC_FEATURE_PPC_LE 0x00000001 Which says the CPU supports the old style "PPC Little Endian" mode. Again this should be harmless in practice as no 64-bit CPUs implement that mode. Fix the code by adding the missing initialisation of the MMU feature. Also add a comment marking CPU user feature bit 2 (0x4) as reserved. It would be unsafe to start using it as old kernels incorrectly set it. Fixes: 44ae3ab3358e ("powerpc: Free up some CPU feature bits by moving out MMU-related features") Signed-off-by: Anton Blanchard [mpe: Flesh out changelog, add comment reserving 0x4] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/uapi/asm/cputable.h | 1 + arch/powerpc/kernel/prom.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 43686043e297..2734c005da21 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -31,6 +31,7 @@ #define PPC_FEATURE_PSERIES_PERFMON_COMPAT \ 0x00000040 +/* Reserved - do not use 0x00000004 */ #define PPC_FEATURE_TRUE_LE 0x00000002 #define PPC_FEATURE_PPC_LE 0x00000001 diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7030b035905d..080c96b44a7f 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -158,7 +158,7 @@ static struct ibm_pa_feature { {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, - {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, + {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 5, 0, 0}, /* * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP From 08c9b94505bbe09ed42f658de2a4dbe274fa7468 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 15 Apr 2016 12:07:24 +1000 Subject: [PATCH 498/797] powerpc: Update cpu_user_features2 in scan_features() commit beff82374b259d726e2625ec6c518a5f2613f0ae upstream. scan_features() updates cpu_user_features but not cpu_user_features2. Amongst other things, cpu_user_features2 contains the user TM feature bits which we must keep in sync with the kernel TM feature bit. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/prom.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 080c96b44a7f..03fce77e441d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -148,23 +148,24 @@ static struct ibm_pa_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ + unsigned int cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */ unsigned char pabyte; /* byte number in ibm,pa-features */ unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, - {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, - {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, - {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 5, 0, 0}, + {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0, 0}, + {0, 0, PPC_FEATURE_HAS_FPU, 0, 0, 1, 0}, + {CPU_FTR_CTRL, 0, 0, 0, 0, 3, 0}, + {CPU_FTR_NOEXECUTE, 0, 0, 0, 0, 6, 0}, + {CPU_FTR_NODSISRALIGN, 0, 0, 0, 1, 1, 1}, + {0, MMU_FTR_CI_LARGE_PAGE, 0, 0, 1, 2, 0}, + {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0}, /* * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP * which is 0 if the kernel doesn't support TM. */ - {CPU_FTR_TM_COMP, 0, 0, 22, 0, 0}, + {CPU_FTR_TM_COMP, 0, 0, 0, 22, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -195,10 +196,12 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs, if (bit ^ fp->invert) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; + cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features |= fp->mmu_features; } else { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; + cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features &= ~fp->mmu_features; } } From c89c3225062d64c63532c127c374ea962f336e6b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 15 Apr 2016 12:08:19 +1000 Subject: [PATCH 499/797] powerpc: Update TM user feature bits in scan_features() commit 4705e02498d6d5a7ab98dfee9595cd5e91db2017 upstream. We need to update the user TM feature bits (PPC_FEATURE2_HTM and PPC_FEATURE2_HTM) to mirror what we do with the kernel TM feature bit. At the moment, if firmware reports TM is not available we turn off the kernel TM feature bit but leave the userspace ones on. Userspace thinks it can execute TM instructions and it dies trying. This (together with a QEMU patch) fixes PR KVM, which doesn't currently support TM. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/prom.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 03fce77e441d..a15fe1d4e84a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -161,11 +161,12 @@ static struct ibm_pa_feature { {0, MMU_FTR_CI_LARGE_PAGE, 0, 0, 1, 2, 0}, {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0}, /* - * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), - * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP - * which is 0 if the kernel doesn't support TM. + * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n), + * we don't want to turn on TM here, so we use the *_COMP versions + * which are 0 if the kernel doesn't support TM. */ - {CPU_FTR_TM_COMP, 0, 0, 0, 22, 0, 0}, + {CPU_FTR_TM_COMP, 0, 0, + PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, From 56b8eaa38b04f147a6b825a73a31b826b6051604 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 6 Apr 2016 17:23:18 +0300 Subject: [PATCH 500/797] nl80211: check netlink protocol in socket release notification commit 8f815cdde3e550e10c2736990d791f60c2ce43eb upstream. A non-privileged user can create a netlink socket with the same port_id as used by an existing open nl80211 netlink socket (e.g. as used by a hostapd process) with a different protocol number. Closing this socket will then lead to the notification going to nl80211's socket release notification handler, and possibly cause an action such as removing a virtual interface. Fix this issue by checking that the netlink protocol is NETLINK_GENERIC. Since generic netlink has no notifier chain of its own, we can't fix the problem more generically. Fixes: 026331c4d9b5 ("cfg80211/mac80211: allow registering for and sending action frames") Signed-off-by: Dmitry Ivanov [rewrite commit message] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 75b0d23ee882..5d89f13a98db 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13161,7 +13161,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, struct wireless_dev *wdev; struct cfg80211_beacon_registration *reg, *tmp; - if (state != NETLINK_URELEASE) + if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC) return NOTIFY_DONE; rcu_read_lock(); From 95415ac5786f483c7c69145ae644bc64c2240776 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 7 Apr 2016 09:31:38 +0200 Subject: [PATCH 501/797] netlink: don't send NETLINK_URELEASE for unbound sockets commit e27260203912b40751fa353d009eaa5a642c739f upstream. All existing users of NETLINK_URELEASE use it to clean up resources that were previously allocated to a socket via some command. As a result, no users require getting this notification for unbound sockets. Sending it for unbound sockets, however, is a problem because any user (including unprivileged users) can create a socket that uses the same ID as an existing socket. Binding this new socket will fail, but if the NETLINK_URELEASE notification is generated for such sockets, the users thereof will be tricked into thinking the socket that they allocated the resources for is closed. In the nl80211 case, this will cause destruction of virtual interfaces that still belong to an existing hostapd process; this is the case that Dmitry noticed. In the NFC case, it will cause a poll abort. In the case of netlink log/queue it will cause them to stop reporting events, as if NFULNL_CFG_CMD_UNBIND/NFQNL_CFG_CMD_UNBIND had been called. Fix this problem by checking that the socket is bound before generating the NETLINK_URELEASE notification. Signed-off-by: Dmitry Ivanov Signed-off-by: Johannes Berg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 59651af8cc27..992b35fb8615 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1305,7 +1305,7 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->portid) { + if (nlk->portid && nlk->bound) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, From 197b6c5f0d976420c3eeacc7589ebc5869d2d70f Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Thu, 31 Mar 2016 10:53:42 -0700 Subject: [PATCH 502/797] Input: gtco - fix crash on detecting device without endpoints commit 162f98dea487206d9ab79fc12ed64700667a894d upstream. The gtco driver expects at least one valid endpoint. If given malicious descriptors that specify 0 for the number of endpoints, it will crash in the probe function. Ensure there is at least one endpoint on the interface before using it. Also let's fix a minor coding style issue. The full correct report of this issue can be found in the public Red Hat Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1283385 Reported-by: Ralf Spenneberg Signed-off-by: Vladis Dronov Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/tablet/gtco.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index 3a7f3a4a4396..7c18249d6c8e 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -858,6 +858,14 @@ static int gtco_probe(struct usb_interface *usbinterface, goto err_free_buf; } + /* Sanity check that a device has an endpoint */ + if (usbinterface->altsetting[0].desc.bNumEndpoints < 1) { + dev_err(&usbinterface->dev, + "Invalid number of endpoints\n"); + error = -EINVAL; + goto err_free_urb; + } + /* * The endpoint is always altsetting 0, we know this since we know * this device only has one interrupt endpoint @@ -879,7 +887,7 @@ static int gtco_probe(struct usb_interface *usbinterface, * HID report descriptor */ if (usb_get_extra_descriptor(usbinterface->cur_altsetting, - HID_DEVICE_TYPE, &hid_desc) != 0){ + HID_DEVICE_TYPE, &hid_desc) != 0) { dev_err(&usbinterface->dev, "Can't retrieve exta USB descriptor to get hid report descriptor length\n"); error = -EIO; From 506788dafb7d27c31703991f0b5f7b87bd9a942c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 17 Apr 2016 05:21:42 -0700 Subject: [PATCH 503/797] Input: pmic8xxx-pwrkey - fix algorithm for converting trigger delay commit eda5ecc0a6b865561997e177c393f0b0136fe3b7 upstream. The trigger delay algorithm that converts from microseconds to the register value looks incorrect. According to most of the PMIC documentation, the equation is delay (Seconds) = (1 / 1024) * 2 ^ (x + 4) except for one case where the documentation looks to have a formatting issue and the equation looks like delay (Seconds) = (1 / 1024) * 2 x + 4 Most likely this driver was written with the improper documentation to begin with. According to the downstream sources the valid delays are from 2 seconds to 1/64 second, and the latter equation just doesn't make sense for that. Let's fix the algorithm and the range check to match the documentation and the downstream sources. Reported-by: Bjorn Andersson Fixes: 92d57a73e410 ("input: Add support for Qualcomm PMIC8XXX power key") Signed-off-by: Stephen Boyd Tested-by: John Stultz Acked-by: Bjorn Andersson Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/pmic8xxx-pwrkey.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c index 3f02e0e03d12..67aab86048ad 100644 --- a/drivers/input/misc/pmic8xxx-pwrkey.c +++ b/drivers/input/misc/pmic8xxx-pwrkey.c @@ -353,7 +353,8 @@ static int pmic8xxx_pwrkey_probe(struct platform_device *pdev) if (of_property_read_u32(pdev->dev.of_node, "debounce", &kpd_delay)) kpd_delay = 15625; - if (kpd_delay > 62500 || kpd_delay == 0) { + /* Valid range of pwr key trigger delay is 1/64 sec to 2 seconds. */ + if (kpd_delay > USEC_PER_SEC * 2 || kpd_delay < USEC_PER_SEC / 64) { dev_err(&pdev->dev, "invalid power key trigger delay\n"); return -EINVAL; } @@ -385,8 +386,8 @@ static int pmic8xxx_pwrkey_probe(struct platform_device *pdev) pwr->name = "pmic8xxx_pwrkey"; pwr->phys = "pmic8xxx_pwrkey/input0"; - delay = (kpd_delay << 10) / USEC_PER_SEC; - delay = 1 + ilog2(delay); + delay = (kpd_delay << 6) / USEC_PER_SEC; + delay = ilog2(delay); err = regmap_read(regmap, PON_CNTL_1, &pon_cntl); if (err < 0) { From 4e8d89e8bb8828faf3c955fe9a50e1ae54918326 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Feb 2016 16:03:23 +0100 Subject: [PATCH 504/797] xen kconfig: don't "select INPUT_XEN_KBDDEV_FRONTEND" commit 13aa38e291bdd4e4018f40dd2f75e464814dcbf3 upstream. The Xen framebuffer driver selects the xen keyboard driver, so the latter will be built-in if XEN_FBDEV_FRONTEND=y. However, when CONFIG_INPUT is a loadable module, this configuration cannot work. On mainline kernels, the symbol will be enabled but not used, while in combination with a patch I have to detect such useless configurations, we get the expected link failure: drivers/input/built-in.o: In function `xenkbd_remove': xen-kbdfront.c:(.text+0x2f0): undefined reference to `input_unregister_device' xen-kbdfront.c:(.text+0x30e): undefined reference to `input_unregister_device' This removes the extra "select", as it just causes more trouble than it helps. In theory, some defconfig file might break if it has XEN_FBDEV_FRONTEND in it but not INPUT_XEN_KBDDEV_FRONTEND. The Kconfig fragment we ship in the kernel (kernel/configs/xen.config) however already enables both, and anyone using an old .config file would keep having both enabled. Signed-off-by: Arnd Bergmann Suggested-by: David Vrabel Fixes: 36c1132e34bd ("xen kconfig: fix select INPUT_XEN_KBDDEV_FRONTEND") Acked-by: Stefano Stabellini Signed-off-by: Tomi Valkeinen Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index e6d16d65e4e6..f07a0974fda2 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -2249,7 +2249,6 @@ config XEN_FBDEV_FRONTEND select FB_SYS_IMAGEBLIT select FB_SYS_FOPS select FB_DEFERRED_IO - select INPUT_XEN_KBDDEV_FRONTEND if INPUT_MISC select XEN_XENBUS_FRONTEND default y help From c7ce82609fda7214292998e3a38901d3944b6c16 Mon Sep 17 00:00:00 2001 From: Yingjoe Chen Date: Sat, 2 Apr 2016 14:57:49 +0800 Subject: [PATCH 505/797] pinctrl: mediatek: correct debounce time unit in mtk_gpio_set_debounce commit 5fedbb923936174ab4d1d5cc92bca1cf6b2e0ca2 upstream. The debounce time unit for gpio_chip.set_debounce is us but mtk_gpio_set_debounce regard it as ms. Fix this by correct debounce time array dbnc_arr so it can find correct debounce setting. Debounce time for first debounce setting is 500us, correct this as well. While I'm at it, also change the debounce time array name to "debounce_time" for readability. Signed-off-by: Yingjoe Chen Reviewed-by: Daniel Kurtz Acked-by: Hongzhou Yang Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 5c717275a7fa..3d8019eb3d84 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -939,7 +939,8 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset, struct mtk_pinctrl *pctl = dev_get_drvdata(chip->dev); int eint_num, virq, eint_offset; unsigned int set_offset, bit, clr_bit, clr_offset, rst, i, unmask, dbnc; - static const unsigned int dbnc_arr[] = {0 , 1, 16, 32, 64, 128, 256}; + static const unsigned int debounce_time[] = {500, 1000, 16000, 32000, 64000, + 128000, 256000}; const struct mtk_desc_pin *pin; struct irq_data *d; @@ -957,9 +958,9 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset, if (!mtk_eint_can_en_debounce(pctl, eint_num)) return -ENOSYS; - dbnc = ARRAY_SIZE(dbnc_arr); - for (i = 0; i < ARRAY_SIZE(dbnc_arr); i++) { - if (debounce <= dbnc_arr[i]) { + dbnc = ARRAY_SIZE(debounce_time); + for (i = 0; i < ARRAY_SIZE(debounce_time); i++) { + if (debounce <= debounce_time[i]) { dbnc = i; break; } From ee6a1e9eefed56308fcbd5619cb02b926b7ec630 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Thu, 14 Apr 2016 10:29:16 +0530 Subject: [PATCH 506/797] pinctrl: single: Fix pcs_parse_bits_in_pinctrl_entry to use __ffs than ffs commit 56b367c0cd67d4c3006738e7dc9dda9273fd2bfe upstream. pcs_parse_bits_in_pinctrl_entry uses ffs which gives bit indices ranging from 1 to MAX. This leads to a corner case where we try to request the pin number = MAX and fails. bit_pos value is being calculted using ffs. pin_num_from_lsb uses bit_pos value. pins array is populated with: pin + pin_num_from_lsb. The above is 1 more than usual bit indices as bit_pos uses ffs to compute first set bit. Hence the last of the pins array is populated with the MAX value and not MAX - 1 which causes error when we call pin_request. mask_pos is rightly calculated as ((pcs->fmask) << (bit_pos - 1)) Consequently val_pos and submask are correct. Hence use __ffs which gives (ffs(x) - 1) as the first bit set. fixes: 4e7e8017a8 ("pinctrl: pinctrl-single: enhance to configure multiple pins of different modules") Signed-off-by: Keerthy Acked-by: Tony Lindgren Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-single.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index ef04b962c3d5..23b6b8c29a99 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1273,9 +1273,9 @@ static int pcs_parse_bits_in_pinctrl_entry(struct pcs_device *pcs, /* Parse pins in each row from LSB */ while (mask) { - bit_pos = ffs(mask); + bit_pos = __ffs(mask); pin_num_from_lsb = bit_pos / pcs->bits_per_pin; - mask_pos = ((pcs->fmask) << (bit_pos - 1)); + mask_pos = ((pcs->fmask) << bit_pos); val_pos = val & mask_pos; submask = mask & mask_pos; @@ -1847,7 +1847,7 @@ static int pcs_probe(struct platform_device *pdev) ret = of_property_read_u32(np, "pinctrl-single,function-mask", &pcs->fmask); if (!ret) { - pcs->fshift = ffs(pcs->fmask) - 1; + pcs->fshift = __ffs(pcs->fmask); pcs->fmax = pcs->fmask >> pcs->fshift; } else { /* If mask property doesn't exist, function mux is invalid. */ From 99067b8e854211316200b3d6375a664448c2fabd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 8 Apr 2016 15:12:24 +0200 Subject: [PATCH 507/797] iommu/amd: Fix checking of pci dma aliases commit e3156048346c28c695f5cf9db67a8cf88c90f947 upstream. Commit 61289cb ('iommu/amd: Remove old alias handling code') removed the old alias handling code from the AMD IOMMU driver because this is now handled by the IOMMU core code. But this also removed the handling of PCI aliases, which is not handled by the core code. This caused issues with PCI devices that have hidden PCIe-to-PCI bridges that rewrite the request-id. Fix this bug by re-introducing some of the removed functions from commit 61289cbaf6c8 and add a alias field 'struct iommu_dev_data'. This field carrys the return value of the get_alias() function and uses that instead of the amd_iommu_alias_table[] array in the code. Fixes: 61289cbaf6c8 ('iommu/amd: Remove old alias handling code') Tested-by: Tomasz Golinski Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 87 ++++++++++++++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index fc836f523afa..b9319b76a8a1 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -91,6 +91,7 @@ struct iommu_dev_data { struct list_head dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ u16 devid; /* PCI Device ID */ + u16 alias; /* Alias Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Device is identity mapped */ struct { @@ -125,6 +126,13 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } +static inline u16 get_device_id(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return PCI_DEVID(pdev->bus->number, pdev->devfn); +} + static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -162,6 +170,68 @@ static struct iommu_dev_data *search_dev_data(u16 devid) return dev_data; } +static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) +{ + *(u16 *)data = alias; + return 0; +} + +static u16 get_alias(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid, ivrs_alias, pci_alias; + + devid = get_device_id(dev); + ivrs_alias = amd_iommu_alias_table[devid]; + pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); + + if (ivrs_alias == pci_alias) + return ivrs_alias; + + /* + * DMA alias showdown + * + * The IVRS is fairly reliable in telling us about aliases, but it + * can't know about every screwy device. If we don't have an IVRS + * reported alias, use the PCI reported alias. In that case we may + * still need to initialize the rlookup and dev_table entries if the + * alias is to a non-existent device. + */ + if (ivrs_alias == devid) { + if (!amd_iommu_rlookup_table[pci_alias]) { + amd_iommu_rlookup_table[pci_alias] = + amd_iommu_rlookup_table[devid]; + memcpy(amd_iommu_dev_table[pci_alias].data, + amd_iommu_dev_table[devid].data, + sizeof(amd_iommu_dev_table[pci_alias].data)); + } + + return pci_alias; + } + + pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " + "for device %s[%04x:%04x], kernel reported alias " + "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), + PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, + PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), + PCI_FUNC(pci_alias)); + + /* + * If we don't have a PCI DMA alias and the IVRS alias is on the same + * bus, then the IVRS table may know about a quirk that we don't. + */ + if (pci_alias == devid && + PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { + pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; + pdev->dma_alias_devfn = ivrs_alias & 0xff; + pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", + PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), + dev_name(dev)); + } + + return ivrs_alias; +} + static struct iommu_dev_data *find_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -174,13 +244,6 @@ static struct iommu_dev_data *find_dev_data(u16 devid) return dev_data; } -static inline u16 get_device_id(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - return PCI_DEVID(pdev->bus->number, pdev->devfn); -} - static struct iommu_dev_data *get_dev_data(struct device *dev) { return dev->archdata.iommu; @@ -308,6 +371,8 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; + dev_data->alias = get_alias(dev); + if (pci_iommuv2_capable(pdev)) { struct amd_iommu *iommu; @@ -328,7 +393,7 @@ static void iommu_ignore_device(struct device *dev) u16 devid, alias; devid = get_device_id(dev); - alias = amd_iommu_alias_table[devid]; + alias = get_alias(dev); memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry)); @@ -1017,7 +1082,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) int ret; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = dev_data->alias; ret = iommu_flush_dte(iommu, dev_data->devid); if (!ret && alias != dev_data->devid) @@ -1891,7 +1956,7 @@ static void do_attach(struct iommu_dev_data *dev_data, bool ats; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = dev_data->alias; ats = dev_data->ats.enabled; /* Update data structures */ @@ -1925,7 +1990,7 @@ static void do_detach(struct iommu_dev_data *dev_data) return; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = dev_data->alias; /* decrease reference counters */ dev_data->domain->dev_iommu[iommu->index] -= 1; From e05cff2aa31766746f02c932e11b6b2ae357464c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 10 Mar 2016 19:28:12 +0000 Subject: [PATCH 508/797] iommu/dma: Restore scatterlist offsets correctly commit 07b48ac4bbe527e68cfc555f2b2b206908437141 upstream. With the change to stashing just the IOVA-page-aligned remainder of the CPU-page offset rather than the whole thing, the failure path in __invalidate_sg() also needs tweaking to account for that in the case of differing page sizes where the two offsets may not be equivalent. Similarly in __finalise_sg(), lest the architecture-specific wrappers later get the wrong address for cache maintenance on sync or unmap. Fixes: 164afb1d85b8 ("iommu/dma: Use correct offset in map_sg") Reported-by: Magnus Damm Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dma-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 72d6182666cb..58f2fe687a24 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -403,7 +403,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, unsigned int s_length = sg_dma_len(s); unsigned int s_dma_len = s->length; - s->offset = s_offset; + s->offset += s_offset; s->length = s_length; sg_dma_address(s) = dma_addr + s_offset; dma_addr += s_dma_len; @@ -422,7 +422,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) for_each_sg(sg, s, nents, i) { if (sg_dma_address(s) != DMA_ERROR_CODE) - s->offset = sg_dma_address(s); + s->offset += sg_dma_address(s); if (sg_dma_len(s)) s->length = sg_dma_len(s); sg_dma_address(s) = DMA_ERROR_CODE; From b7cf6750c05ac80df28d1d66ecd949011f7e0d4b Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 12 Apr 2016 19:25:52 +0800 Subject: [PATCH 509/797] drm/amdgpu: when suspending, if uvd/vce was running. need to cancel delay work. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 85cc88f02eb0ecf44493c1b2ebb6f206cd5fc321 upstream. fix the issue that when resume back, uvd/vce dpm was disabled and uvd/vce's performace dropped. Signed-off-by: Rex Zhu Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 53f987aeeacf..0d016ce541c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -273,6 +273,8 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset, (adev->uvd.fw->size) - offset); + cancel_delayed_work_sync(&adev->uvd.idle_work); + size = amdgpu_bo_size(adev->uvd.vcpu_bo); size -= le32_to_cpu(hdr->ucode_size_bytes); ptr = adev->uvd.cpu_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a745eeeb5d82..bb0da76051a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -220,6 +220,7 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) if (i == AMDGPU_MAX_VCE_HANDLES) return 0; + cancel_delayed_work_sync(&adev->vce.idle_work); /* TODO: suspending running encoding sessions isn't supported */ return -EINVAL; } From 57c17683f013be3aca2bf937516da9169e3b6727 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 18 Apr 2016 18:09:57 -0400 Subject: [PATCH 510/797] drm/amdgpu: use defines for CRTCs and AMFT blocks commit 3ea25f858fd5aeee888059952bbb8e910541eebb upstream. Prerequiste for the next patch which ups the limits. Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 064ebb347074..92f8b7bf7c64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -308,8 +308,8 @@ struct amdgpu_mode_info { struct atom_context *atom_context; struct card_info *atom_card_info; bool mode_config_initialized; - struct amdgpu_crtc *crtcs[6]; - struct amdgpu_afmt *afmt[7]; + struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS]; + struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS]; /* DVI-I properties */ struct drm_property *coherent_mode_property; /* DAC enable load detect */ From 25d1be8d9fbc1a1a479483c29345f578687c478a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 18 Apr 2016 18:25:34 -0400 Subject: [PATCH 511/797] drm/amdgpu: bump the afmt limit for CZ, ST, Polaris commit 83c5cda2ccf40a7a7e4bb674321509b346e23d5a upstream. Fixes array overflow on these chips. Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 92f8b7bf7c64..89df7871653d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -52,7 +52,7 @@ struct amdgpu_hpd; #define AMDGPU_MAX_HPD_PINS 6 #define AMDGPU_MAX_CRTCS 6 -#define AMDGPU_MAX_AFMT_BLOCKS 7 +#define AMDGPU_MAX_AFMT_BLOCKS 9 enum amdgpu_rmx_type { RMX_OFF, From fe98d45db9b84d436284ed156e4f5c2f78bb7999 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Mon, 18 Apr 2016 16:05:04 -0400 Subject: [PATCH 512/797] amdgpu/uvd: add uvd fw version for amdgpu commit 562e2689baebaa2ac25b7ec934385480ed1cb7d6 upstream. Was previously always hardcoded to 0. Signed-off-by: Sonny Jiang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bb1099c549df..053fc2f465df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1673,6 +1673,7 @@ struct amdgpu_uvd { struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; + unsigned fw_version; atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; struct delayed_work idle_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index e23843f4d877..4488e82f87b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -303,7 +303,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file fw_info.feature = adev->vce.fb_version; break; case AMDGPU_INFO_FW_UVD: - fw_info.ver = 0; + fw_info.ver = adev->uvd.fw_version; fw_info.feature = 0; break; case AMDGPU_INFO_FW_GMC: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 0d016ce541c2..3b35ad83867c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -156,6 +156,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", version_major, version_minor, family_id); + adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | + (family_id << 8)); + bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE; r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true, From 038cf9c1977ed3bd71b107bbabe72cff8e5bb12f Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Tue, 22 Mar 2016 15:48:18 -0400 Subject: [PATCH 513/797] drm/amdgpu: fix regression on CIK (v2) This fix was written against drm-next, but when it was backported to 4.5 as a stable fix, the driver internal structure change was missed. Fix that up here to avoid a hang due to waiting for the wrong sequence number. v2: agd: fix up commit message Signed-off-by: Grigori Goronzy Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index aa491540ba85..946300764609 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3628,7 +3628,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); - uint32_t seq = ring->fence_drv.sync_seq; + uint32_t seq = ring->fence_drv.sync_seq[ring->idx]; uint64_t addr = ring->fence_drv.gpu_addr; amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); From a37564fa5a495a9e3f59fb648315ad33085476cd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 14 Apr 2016 14:15:16 -0400 Subject: [PATCH 514/797] drm/radeon: add a quirk for a XFX R9 270X commit bcb31eba4a4ea356fd61cbd5dec5511c3883f57e upstream. bug: https://bugs.freedesktop.org/show_bug.cgi?id=76490 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 7285adb27099..caa73de584a5 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2931,6 +2931,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { { PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x148c, 0x2015, 0, 120000 }, + { PCI_VENDOR_ID_ATI, 0x6810, 0x1682, 0x9275, 0, 120000 }, { 0, 0, 0, 0 }, }; From 2ae4d4093977f2f29af5c92d6e0627eca3a97e20 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 13 Apr 2016 12:08:27 -0400 Subject: [PATCH 515/797] drm/radeon: fix initial connector audio value commit 7403c515c49c033fec33df0814fffdc977e6acdc upstream. This got lost somewhere along the way. This fixes audio not working until set_property was called. Noticed-by: Hyungwon Hwang Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_connectors.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 340f3f549f29..9cfc1c3e1965 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1996,10 +1996,12 @@ radeon_add_atom_connector(struct drm_device *dev, rdev->mode_info.dither_property, RADEON_FMT_DITHER_DISABLE); - if (radeon_audio != 0) + if (radeon_audio != 0) { drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; + } if (ASIC_IS_DCE5(rdev)) drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.output_csc_property, @@ -2124,6 +2126,7 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; } if (connector_type == DRM_MODE_CONNECTOR_DVII) { radeon_connector->dac_load_detect = true; @@ -2179,6 +2182,7 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; } if (ASIC_IS_DCE5(rdev)) drm_object_attach_property(&radeon_connector->base.base, @@ -2231,6 +2235,7 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; } if (ASIC_IS_DCE5(rdev)) drm_object_attach_property(&radeon_connector->base.base, From 7946284184695f2ba338b5c8c45a40f0b732fb2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 19 Apr 2016 09:07:50 -0400 Subject: [PATCH 516/797] drm/radeon: forbid mapping of userptr bo through radeon device file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b5dcec693f87cb8475f2291c0075b2422addd3d6 upstream. Allowing userptr bo which are basicly a list of page from some vma (so either anonymous page or file backed page) would lead to serious corruption of kernel structures and counters (because we overwrite the page->mapping field when mapping buffer). This will already block if the buffer was populated before anyone does try to mmap it because then TTM_PAGE_FLAG_SG would be set in in the ttm_tt flags. But that flag is check before ttm_tt_populate in the ttm vm fault handler. So to be safe just add a check to verify_access() callback. Reviewed-by: Christian König Signed-off-by: Jérôme Glisse Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_ttm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index e06ac546a90f..f342aad79cc6 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -235,6 +235,8 @@ static int radeon_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct radeon_bo *rbo = container_of(bo, struct radeon_bo, tbo); + if (radeon_ttm_tt_has_userptr(bo->ttm)) + return -EPERM; return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp); } From 4b6b0008f9dd3f18ac1d42c28e25dc09715ebc66 Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Thu, 14 Apr 2016 13:34:03 -0400 Subject: [PATCH 517/797] drm/radeon: fix vertical bars appear on monitor (v2) commit 5d5b7803c49bbb01bdf4c6e95e8314d0515b9484 upstream. When crtc/timing is disabled on boot the dig block should be stopped in order ignore timing from crtc, reset the steering fifo otherwise we get display corruption or hung in dp sst mode. v2: agd: fix coding style Signed-off-by: Vitaly Prosyak Signed-off-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/evergreen.c | 154 ++++++++++++++++++++++++- drivers/gpu/drm/radeon/evergreen_reg.h | 46 ++++++++ 2 files changed, 199 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 2ad462896896..32491355a1d4 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2608,10 +2608,152 @@ static void evergreen_agp_enable(struct radeon_device *rdev) WREG32(VM_CONTEXT1_CNTL, 0); } +static const unsigned ni_dig_offsets[] = +{ + NI_DIG0_REGISTER_OFFSET, + NI_DIG1_REGISTER_OFFSET, + NI_DIG2_REGISTER_OFFSET, + NI_DIG3_REGISTER_OFFSET, + NI_DIG4_REGISTER_OFFSET, + NI_DIG5_REGISTER_OFFSET +}; + +static const unsigned ni_tx_offsets[] = +{ + NI_DCIO_UNIPHY0_UNIPHY_TX_CONTROL1, + NI_DCIO_UNIPHY1_UNIPHY_TX_CONTROL1, + NI_DCIO_UNIPHY2_UNIPHY_TX_CONTROL1, + NI_DCIO_UNIPHY3_UNIPHY_TX_CONTROL1, + NI_DCIO_UNIPHY4_UNIPHY_TX_CONTROL1, + NI_DCIO_UNIPHY5_UNIPHY_TX_CONTROL1 +}; + +static const unsigned evergreen_dp_offsets[] = +{ + EVERGREEN_DP0_REGISTER_OFFSET, + EVERGREEN_DP1_REGISTER_OFFSET, + EVERGREEN_DP2_REGISTER_OFFSET, + EVERGREEN_DP3_REGISTER_OFFSET, + EVERGREEN_DP4_REGISTER_OFFSET, + EVERGREEN_DP5_REGISTER_OFFSET +}; + + +/* + * Assumption is that EVERGREEN_CRTC_MASTER_EN enable for requested crtc + * We go from crtc to connector and it is not relible since it + * should be an opposite direction .If crtc is enable then + * find the dig_fe which selects this crtc and insure that it enable. + * if such dig_fe is found then find dig_be which selects found dig_be and + * insure that it enable and in DP_SST mode. + * if UNIPHY_PLL_CONTROL1.enable then we should disconnect timing + * from dp symbols clocks . + */ +static bool evergreen_is_dp_sst_stream_enabled(struct radeon_device *rdev, + unsigned crtc_id, unsigned *ret_dig_fe) +{ + unsigned i; + unsigned dig_fe; + unsigned dig_be; + unsigned dig_en_be; + unsigned uniphy_pll; + unsigned digs_fe_selected; + unsigned dig_be_mode; + unsigned dig_fe_mask; + bool is_enabled = false; + bool found_crtc = false; + + /* loop through all running dig_fe to find selected crtc */ + for (i = 0; i < ARRAY_SIZE(ni_dig_offsets); i++) { + dig_fe = RREG32(NI_DIG_FE_CNTL + ni_dig_offsets[i]); + if (dig_fe & NI_DIG_FE_CNTL_SYMCLK_FE_ON && + crtc_id == NI_DIG_FE_CNTL_SOURCE_SELECT(dig_fe)) { + /* found running pipe */ + found_crtc = true; + dig_fe_mask = 1 << i; + dig_fe = i; + break; + } + } + + if (found_crtc) { + /* loop through all running dig_be to find selected dig_fe */ + for (i = 0; i < ARRAY_SIZE(ni_dig_offsets); i++) { + dig_be = RREG32(NI_DIG_BE_CNTL + ni_dig_offsets[i]); + /* if dig_fe_selected by dig_be? */ + digs_fe_selected = NI_DIG_BE_CNTL_FE_SOURCE_SELECT(dig_be); + dig_be_mode = NI_DIG_FE_CNTL_MODE(dig_be); + if (dig_fe_mask & digs_fe_selected && + /* if dig_be in sst mode? */ + dig_be_mode == NI_DIG_BE_DPSST) { + dig_en_be = RREG32(NI_DIG_BE_EN_CNTL + + ni_dig_offsets[i]); + uniphy_pll = RREG32(NI_DCIO_UNIPHY0_PLL_CONTROL1 + + ni_tx_offsets[i]); + /* dig_be enable and tx is running */ + if (dig_en_be & NI_DIG_BE_EN_CNTL_ENABLE && + dig_en_be & NI_DIG_BE_EN_CNTL_SYMBCLK_ON && + uniphy_pll & NI_DCIO_UNIPHY0_PLL_CONTROL1_ENABLE) { + is_enabled = true; + *ret_dig_fe = dig_fe; + break; + } + } + } + } + + return is_enabled; +} + +/* + * Blank dig when in dp sst mode + * Dig ignores crtc timing + */ +static void evergreen_blank_dp_output(struct radeon_device *rdev, + unsigned dig_fe) +{ + unsigned stream_ctrl; + unsigned fifo_ctrl; + unsigned counter = 0; + + if (dig_fe >= ARRAY_SIZE(evergreen_dp_offsets)) { + DRM_ERROR("invalid dig_fe %d\n", dig_fe); + return; + } + + stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL + + evergreen_dp_offsets[dig_fe]); + if (!(stream_ctrl & EVERGREEN_DP_VID_STREAM_CNTL_ENABLE)) { + DRM_ERROR("dig %d , should be enable\n", dig_fe); + return; + } + + stream_ctrl &=~EVERGREEN_DP_VID_STREAM_CNTL_ENABLE; + WREG32(EVERGREEN_DP_VID_STREAM_CNTL + + evergreen_dp_offsets[dig_fe], stream_ctrl); + + stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL + + evergreen_dp_offsets[dig_fe]); + while (counter < 32 && stream_ctrl & EVERGREEN_DP_VID_STREAM_STATUS) { + msleep(1); + counter++; + stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL + + evergreen_dp_offsets[dig_fe]); + } + if (counter >= 32 ) + DRM_ERROR("counter exceeds %d\n", counter); + + fifo_ctrl = RREG32(EVERGREEN_DP_STEER_FIFO + evergreen_dp_offsets[dig_fe]); + fifo_ctrl |= EVERGREEN_DP_STEER_FIFO_RESET; + WREG32(EVERGREEN_DP_STEER_FIFO + evergreen_dp_offsets[dig_fe], fifo_ctrl); + +} + void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save) { u32 crtc_enabled, tmp, frame_count, blackout; int i, j; + unsigned dig_fe; if (!ASIC_IS_NODCE(rdev)) { save->vga_render_control = RREG32(VGA_RENDER_CONTROL); @@ -2651,7 +2793,17 @@ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *sav break; udelay(1); } - + /*we should disable dig if it drives dp sst*/ + /*but we are in radeon_device_init and the topology is unknown*/ + /*and it is available after radeon_modeset_init*/ + /*the following method radeon_atom_encoder_dpms_dig*/ + /*does the job if we initialize it properly*/ + /*for now we do it this manually*/ + /**/ + if (ASIC_IS_DCE5(rdev) && + evergreen_is_dp_sst_stream_enabled(rdev, i ,&dig_fe)) + evergreen_blank_dp_output(rdev, dig_fe); + /*we could remove 6 lines below*/ /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 1); tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]); diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h index aa939dfed3a3..b436badf9efa 100644 --- a/drivers/gpu/drm/radeon/evergreen_reg.h +++ b/drivers/gpu/drm/radeon/evergreen_reg.h @@ -250,8 +250,43 @@ /* HDMI blocks at 0x7030, 0x7c30, 0x10830, 0x11430, 0x12030, 0x12c30 */ #define EVERGREEN_HDMI_BASE 0x7030 +/*DIG block*/ +#define NI_DIG0_REGISTER_OFFSET (0x7000 - 0x7000) +#define NI_DIG1_REGISTER_OFFSET (0x7C00 - 0x7000) +#define NI_DIG2_REGISTER_OFFSET (0x10800 - 0x7000) +#define NI_DIG3_REGISTER_OFFSET (0x11400 - 0x7000) +#define NI_DIG4_REGISTER_OFFSET (0x12000 - 0x7000) +#define NI_DIG5_REGISTER_OFFSET (0x12C00 - 0x7000) + + +#define NI_DIG_FE_CNTL 0x7000 +# define NI_DIG_FE_CNTL_SOURCE_SELECT(x) ((x) & 0x3) +# define NI_DIG_FE_CNTL_SYMCLK_FE_ON (1<<24) + + +#define NI_DIG_BE_CNTL 0x7140 +# define NI_DIG_BE_CNTL_FE_SOURCE_SELECT(x) (((x) >> 8 ) & 0x3F) +# define NI_DIG_FE_CNTL_MODE(x) (((x) >> 16) & 0x7 ) + +#define NI_DIG_BE_EN_CNTL 0x7144 +# define NI_DIG_BE_EN_CNTL_ENABLE (1 << 0) +# define NI_DIG_BE_EN_CNTL_SYMBCLK_ON (1 << 8) +# define NI_DIG_BE_DPSST 0 /* Display Port block */ +#define EVERGREEN_DP0_REGISTER_OFFSET (0x730C - 0x730C) +#define EVERGREEN_DP1_REGISTER_OFFSET (0x7F0C - 0x730C) +#define EVERGREEN_DP2_REGISTER_OFFSET (0x10B0C - 0x730C) +#define EVERGREEN_DP3_REGISTER_OFFSET (0x1170C - 0x730C) +#define EVERGREEN_DP4_REGISTER_OFFSET (0x1230C - 0x730C) +#define EVERGREEN_DP5_REGISTER_OFFSET (0x12F0C - 0x730C) + + +#define EVERGREEN_DP_VID_STREAM_CNTL 0x730C +# define EVERGREEN_DP_VID_STREAM_CNTL_ENABLE (1 << 0) +# define EVERGREEN_DP_VID_STREAM_STATUS (1 <<16) +#define EVERGREEN_DP_STEER_FIFO 0x7310 +# define EVERGREEN_DP_STEER_FIFO_RESET (1 << 0) #define EVERGREEN_DP_SEC_CNTL 0x7280 # define EVERGREEN_DP_SEC_STREAM_ENABLE (1 << 0) # define EVERGREEN_DP_SEC_ASP_ENABLE (1 << 4) @@ -266,4 +301,15 @@ # define EVERGREEN_DP_SEC_N_BASE_MULTIPLE(x) (((x) & 0xf) << 24) # define EVERGREEN_DP_SEC_SS_EN (1 << 28) +/*DCIO_UNIPHY block*/ +#define NI_DCIO_UNIPHY0_UNIPHY_TX_CONTROL1 (0x6600 -0x6600) +#define NI_DCIO_UNIPHY1_UNIPHY_TX_CONTROL1 (0x6640 -0x6600) +#define NI_DCIO_UNIPHY2_UNIPHY_TX_CONTROL1 (0x6680 - 0x6600) +#define NI_DCIO_UNIPHY3_UNIPHY_TX_CONTROL1 (0x66C0 - 0x6600) +#define NI_DCIO_UNIPHY4_UNIPHY_TX_CONTROL1 (0x6700 - 0x6600) +#define NI_DCIO_UNIPHY5_UNIPHY_TX_CONTROL1 (0x6740 - 0x6600) + +#define NI_DCIO_UNIPHY0_PLL_CONTROL1 0x6618 +# define NI_DCIO_UNIPHY0_PLL_CONTROL1_ENABLE (1 << 0) + #endif From 7973c7c36e96d9c2afda1df9e4f4c2518cbe6588 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Apr 2016 19:19:11 +0800 Subject: [PATCH 518/797] drm: Loongson-3 doesn't fully support wc memory commit 221004c66a58949a0f25c937a6789c0839feb530 upstream. Signed-off-by: Huacai Chen Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- include/drm/drm_cache.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index 461a0558bca4..cebecff536a3 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -39,6 +39,8 @@ static inline bool drm_arch_can_wc_memory(void) { #if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE) return false; +#elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3) + return false; #else return true; #endif From 20a32ec7ae6c46768d91c61d99228f13c2a7912b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 22 Apr 2016 10:05:21 +1000 Subject: [PATCH 519/797] drm/nouveau/gr/gf100: select a stream master to fixup tfb offset queries commit 28dca90533750c7e31e8641c3df426bad9c12941 upstream. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 9f5dfc85147a..36655a74c538 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1717,6 +1717,8 @@ gf100_gr_init(struct gf100_gr *gr) gf100_gr_mmio(gr, gr->func->mmio); + nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001); + memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); for (i = 0, gpc = -1; i < gr->tpc_total; i++) { do { From e51d7655d3dcac3ea2185fec178872b11b9f03be Mon Sep 17 00:00:00 2001 From: "cpaul@redhat.com" Date: Mon, 4 Apr 2016 19:58:47 -0400 Subject: [PATCH 520/797] drm/dp/mst: Validate port in drm_dp_payload_send_msg() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit deba0a2af9592b2022a0bce7b085a318b53ce1db upstream. With the joys of things running concurrently, there's always a chance that the port we get passed in drm_dp_payload_send_msg() isn't actually valid anymore. Because of this, we need to make sure we validate the reference to the port before we use it otherwise we risk running into various race conditions. For instance, on the Dell MST monitor I have here for testing, hotplugging it enough times causes us to kernel panic: [drm:intel_mst_enable_dp] 1 [drm:drm_dp_update_payload_part2] payload 0 1 [drm:intel_get_hpd_pins] hotplug event received, stat 0x00200000, dig 0x10101011, pins 0x00000020 [drm:intel_hpd_irq_handler] digital hpd port B - short [drm:intel_dp_hpd_pulse] got hpd irq on port B - short [drm:intel_dp_check_mst_status] got esi 00 10 00 [drm:drm_dp_update_payload_part2] payload 1 1 general protection fault: 0000 [#1] SMP … Call Trace: [] drm_dp_update_payload_part2+0xc2/0x130 [drm_kms_helper] [] intel_mst_enable_dp+0xf8/0x180 [i915] [] haswell_crtc_enable+0x3ed/0x8c0 [i915] [] intel_atomic_commit+0x5ad/0x1590 [i915] [] ? drm_atomic_set_crtc_for_connector+0x57/0xe0 [drm] [] drm_atomic_commit+0x37/0x60 [drm] [] drm_atomic_helper_set_config+0x7a/0xb0 [drm_kms_helper] [] drm_mode_set_config_internal+0x62/0x100 [drm] [] drm_mode_setcrtc+0x3cd/0x4e0 [drm] [] drm_ioctl+0x143/0x510 [drm] [] ? drm_mode_setplane+0x1b0/0x1b0 [drm] [] ? hrtimer_start_range_ns+0x1b7/0x3a0 [] do_vfs_ioctl+0x92/0x570 [] ? __sys_recvmsg+0x42/0x80 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 RIP [] drm_dp_payload_send_msg+0x146/0x1f0 [drm_kms_helper] Which occurs because of the hotplug event shown in the log, which ends up causing DRM's dp helpers to drop the port we're updating the payload on and panic. Signed-off-by: Lyude Reviewed-by: David Airlie Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 39d7e2e15c11..a4a3de372b69 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1665,13 +1665,19 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_branch *mstb; int len, ret, port_num; + port = drm_dp_get_validated_port_ref(mgr, port); + if (!port) + return -EINVAL; + port_num = port->port_num; mstb = drm_dp_get_validated_mstb_ref(mgr, port->parent); if (!mstb) { mstb = drm_dp_get_last_connected_port_and_mstb(mgr, port->parent, &port_num); - if (!mstb) + if (!mstb) { + drm_dp_put_port(port); return -EINVAL; + } } txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL); @@ -1697,6 +1703,7 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr, kfree(txmsg); fail_put: drm_dp_put_mst_branch_device(mstb); + drm_dp_put_port(port); return ret; } From 3ae01ae65df95a372451e476725ce278bec8787c Mon Sep 17 00:00:00 2001 From: Lyude Date: Wed, 13 Apr 2016 16:50:18 -0400 Subject: [PATCH 521/797] drm/dp/mst: Restore primary hub guid on resume commit 9dc0487d96a0396367a1451b31873482080b527f upstream. Some hubs are forgetful, and end up forgetting whatever GUID we set previously after we do a suspend/resume cycle. This can lead to hotplugging breaking (along with probably other things) since the hub will start sending connection notifications with the wrong GUID. As such, we need to check on resume whether or not the GUID the hub is giving us is valid. Signed-off-by: Lyude Reviewed-by: Harry Wentland Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1460580618-7421-1-git-send-email-cpaul@redhat.com Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index a4a3de372b69..04350dfa4959 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2116,6 +2116,8 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr) if (mgr->mst_primary) { int sret; + u8 guid[16]; + sret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd, DP_RECEIVER_CAP_SIZE); if (sret != DP_RECEIVER_CAP_SIZE) { DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n"); @@ -2130,6 +2132,16 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr) ret = -1; goto out_unlock; } + + /* Some hubs forget their guids after they resume */ + sret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16); + if (sret != 16) { + DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n"); + ret = -1; + goto out_unlock; + } + drm_dp_check_mstb_guid(mgr->mst_primary, guid); + ret = 0; } else ret = -1; From 385af1d58254412e42d06b19e3cbe60b55cf34a6 Mon Sep 17 00:00:00 2001 From: "cpaul@redhat.com" Date: Fri, 22 Apr 2016 16:08:46 -0400 Subject: [PATCH 522/797] drm/dp/mst: Get validated port ref in drm_dp_update_payload_part1() commit 263efde31f97c498e1ebad30e4d2906609d7ad6b upstream. We can thank KASAN for finding this, otherwise I probably would have spent hours on it. This fixes a somewhat harder to trigger kernel panic, occuring while enabling MST where the port we were currently updating the payload on would have all of it's refs dropped before we finished what we were doing: ================================================================== BUG: KASAN: use-after-free in drm_dp_update_payload_part1+0xb3f/0xdb0 [drm_kms_helper] at addr ffff8800d29de018 Read of size 4 by task Xorg/973 ============================================================================= BUG kmalloc-2048 (Tainted: G B W ): kasan: bad access detected ----------------------------------------------------------------------------- INFO: Allocated in drm_dp_add_port+0x1aa/0x1ed0 [drm_kms_helper] age=16477 cpu=0 pid=2175 ___slab_alloc+0x472/0x490 __slab_alloc+0x20/0x40 kmem_cache_alloc_trace+0x151/0x190 drm_dp_add_port+0x1aa/0x1ed0 [drm_kms_helper] drm_dp_send_link_address+0x526/0x960 [drm_kms_helper] drm_dp_check_and_send_link_address+0x1ac/0x210 [drm_kms_helper] drm_dp_mst_link_probe_work+0x77/0xd0 [drm_kms_helper] process_one_work+0x562/0x1350 worker_thread+0xd9/0x1390 kthread+0x1c5/0x260 ret_from_fork+0x22/0x40 INFO: Freed in drm_dp_free_mst_port+0x50/0x60 [drm_kms_helper] age=7521 cpu=0 pid=2175 __slab_free+0x17f/0x2d0 kfree+0x169/0x180 drm_dp_free_mst_port+0x50/0x60 [drm_kms_helper] drm_dp_destroy_connector_work+0x2b8/0x490 [drm_kms_helper] process_one_work+0x562/0x1350 worker_thread+0xd9/0x1390 kthread+0x1c5/0x260 ret_from_fork+0x22/0x40 which on this T460s, would eventually lead to kernel panics in somewhat random places later in intel_mst_enable_dp() if we got lucky enough. Signed-off-by: Lyude Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 04350dfa4959..d268bf18a662 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1786,6 +1786,11 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr) req_payload.start_slot = cur_slots; if (mgr->proposed_vcpis[i]) { port = container_of(mgr->proposed_vcpis[i], struct drm_dp_mst_port, vcpi); + port = drm_dp_get_validated_port_ref(mgr, port); + if (!port) { + mutex_unlock(&mgr->payload_lock); + return -EINVAL; + } req_payload.num_slots = mgr->proposed_vcpis[i]->num_slots; } else { port = NULL; @@ -1811,6 +1816,9 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr) mgr->payloads[i].payload_state = req_payload.payload_state; } cur_slots += req_payload.num_slots; + + if (port) + drm_dp_put_port(port); } for (i = 0; i < mgr->max_payloads; i++) { From 194de738b69315721adc4e6dbafe81c790b318c8 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Sun, 6 Mar 2016 03:21:46 +0200 Subject: [PATCH 523/797] pwm: brcmstb: Fix check of devm_ioremap_resource() return code commit c5857e3f94ab2719dfac649a146cb5dd6f21fcf3 upstream. The change fixes potential oops while accessing iomem on invalid address if devm_ioremap_resource() fails due to some reason. The devm_ioremap_resource() function returns ERR_PTR() and never returns NULL, which makes useless a following check for NULL. Signed-off-by: Vladimir Zapolskiy Fixes: 3a9f5957020f ("pwm: Add Broadcom BCM7038 PWM controller support") Acked-by: Florian Fainelli Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-brcmstb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c index 423ce087cd9c..5d5adee16886 100644 --- a/drivers/pwm/pwm-brcmstb.c +++ b/drivers/pwm/pwm-brcmstb.c @@ -274,8 +274,8 @@ static int brcmstb_pwm_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); p->base = devm_ioremap_resource(&pdev->dev, res); - if (!p->base) { - ret = -ENOMEM; + if (IS_ERR(p->base)) { + ret = PTR_ERR(p->base); goto out_clk; } From 39fa719753bcef274084502c1ec8cfefc556209f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 11 Jan 2016 20:48:32 +0200 Subject: [PATCH 524/797] drm/i915: Cleanup phys status page too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7d3fdfff23852fe458a0d0979a3555fe60f1e563 upstream. Restore the lost phys status page cleanup. Fixes the following splat with DMA_API_DEBUG=y: WARNING: CPU: 0 PID: 21615 at ../lib/dma-debug.c:974 dma_debug_device_change+0x190/0x1f0() pci 0000:00:02.0: DMA-API: device driver has pending DMA allocations while released from device [count=1] One of leaked entries details: [device address=0x0000000023163000] [size=4096 bytes] [mapped with DMA_BIDIRECTIONAL] [mapped as coherent] Modules linked in: i915(-) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm sha256_generic hmac drbg ctr ccm sch_fq_codel binfmt_misc joydev mousedev arc4 ath5k iTCO_wdt mac80211 smsc_ircc2 ath snd_intel8x0m snd_intel8x0 snd_ac97_codec ac97_bus psmouse snd_pcm input_leds i2c_i801 pcspkr snd_timer cfg80211 snd soundcore i2c_core ehci_pci firewire_ohci ehci_hcd firewire_core lpc_ich 8139too rfkill crc_itu_t mfd_core mii usbcore rng_core intel_agp intel_gtt usb_common agpgart irda crc_ccitt fujitsu_laptop led_class parport_pc video parport evdev backlight CPU: 0 PID: 21615 Comm: rmmod Tainted: G U 4.4.0-rc4-mgm-ovl+ #4 Hardware name: FUJITSU SIEMENS LIFEBOOK S6120/FJNB16C, BIOS Version 1.26 05/10/2004 e31a3de0 e31a3de0 e31a3d9c c128d4bd e31a3dd0 c1045a0c c15e00c4 e31a3dfc 0000546f c15dfad2 000003ce c12b3740 000003ce c12b3740 00000000 00000001 f61fb8a0 e31a3de8 c1045a83 00000009 e31a3de0 c15e00c4 e31a3dfc e31a3e4c Call Trace: [] dump_stack+0x16/0x19 [] warn_slowpath_common+0x8c/0xd0 [] ? dma_debug_device_change+0x190/0x1f0 [] ? dma_debug_device_change+0x190/0x1f0 [] warn_slowpath_fmt+0x33/0x40 [] dma_debug_device_change+0x190/0x1f0 [] notifier_call_chain+0x59/0x70 [] __blocking_notifier_call_chain+0x3f/0x80 [] blocking_notifier_call_chain+0x1f/0x30 [] __device_release_driver+0xc3/0xf0 [] driver_detach+0x97/0xa0 [] bus_remove_driver+0x40/0x90 [] driver_unregister+0x28/0x60 [] ? trace_hardirqs_on_caller+0x12c/0x1d0 [] pci_unregister_driver+0x18/0x80 [] drm_pci_exit+0x87/0xb0 [drm] [] i915_exit+0x1b/0x1ee [i915] [] SyS_delete_module+0x14c/0x210 [] ? trace_hardirqs_on_caller+0x12c/0x1d0 [] ? ____fput+0xd/0x10 [] do_fast_syscall_32+0xa4/0x450 [] sysenter_past_esp+0x3b/0x5d ---[ end trace c2ecbc77760f10a0 ]--- Mapped at: [] debug_dma_alloc_coherent+0x33/0x90 [] drm_pci_alloc+0x18c/0x1e0 [drm] [] intel_init_ring_buffer+0x2af/0x490 [i915] [] intel_init_render_ring_buffer+0x130/0x750 [i915] [] i915_gem_init_rings+0x1e/0x110 [i915] v2: s/BUG_ON/WARN_ON/ since dim doens't like the former anymore Cc: Chris Wilson Fixes: 5c6c600 ("drm/i915: Remove DRI1 ring accessors and API") Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson (v1) Link: http://patchwork.freedesktop.org/patch/msgid/1452538112-5331-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_ringbuffer.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f6b2a814e629..0b1015de8536 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1922,6 +1922,17 @@ i915_dispatch_execbuffer(struct drm_i915_gem_request *req, return 0; } +static void cleanup_phys_status_page(struct intel_engine_cs *ring) +{ + struct drm_i915_private *dev_priv = to_i915(ring->dev); + + if (!dev_priv->status_page_dmah) + return; + + drm_pci_free(ring->dev, dev_priv->status_page_dmah); + ring->status_page.page_addr = NULL; +} + static void cleanup_status_page(struct intel_engine_cs *ring) { struct drm_i915_gem_object *obj; @@ -1938,9 +1949,9 @@ static void cleanup_status_page(struct intel_engine_cs *ring) static int init_status_page(struct intel_engine_cs *ring) { - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj = ring->status_page.obj; - if ((obj = ring->status_page.obj) == NULL) { + if (obj == NULL) { unsigned flags; int ret; @@ -2134,7 +2145,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, if (ret) goto error; } else { - BUG_ON(ring->id != RCS); + WARN_ON(ring->id != RCS); ret = init_phys_status_page(ring); if (ret) goto error; @@ -2179,7 +2190,12 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) if (ring->cleanup) ring->cleanup(ring); - cleanup_status_page(ring); + if (I915_NEED_GFX_HWS(ring->dev)) { + cleanup_status_page(ring); + } else { + WARN_ON(ring->id != RCS); + cleanup_phys_status_page(ring); + } i915_cmd_parser_fini_ring(ring); i915_gem_batch_pool_fini(&ring->batch_pool); From 80220c4827ddde2b1c49ababa6c1ab0ad0691112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 15 Oct 2015 17:01:58 +0300 Subject: [PATCH 525/797] drm/i915: skl_update_scaler() wants a rotation bitmask instead of bit number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fa5a7970d372c9c9beb3a0ce79ee1d0c23387d0a upstream. Pass BIT(DRM_ROTATE_0) instead of DRM_ROTATE_0 to skl_update_scaler(). The former is a mask, the latter just the bit number. Fortunately the only thing skl_update_scaler() does with the rotation is check if it's 90/270 degrees or not, and so in this case it would still do the right thing. Cc: Chandra Konduru Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1444917718-28495-1-git-send-email-ville.syrjala@linux.intel.com Fixes: 6156a45602f9 ("drm/i915: skylake primary plane scaling using shared scalers") Reviewed-by: Matt Roper Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f859a5b87ed4..afa81691163d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4447,7 +4447,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state) intel_crtc->base.base.id, intel_crtc->pipe, SKL_CRTC_INDEX); return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX, - &state->scaler_state.scaler_id, DRM_ROTATE_0, + &state->scaler_state.scaler_id, BIT(DRM_ROTATE_0), state->pipe_src_w, state->pipe_src_h, adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay); } From 20d948d8b63538e5a1af20b275c4562a5a6bc470 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 11 Mar 2016 10:51:51 +0300 Subject: [PATCH 526/797] drm/amdkfd: uninitialized variable in dbgdev_wave_control_set_registers() commit 93fce954427effee89e44a976299b15dd75b4bbc upstream. At the end of the function we expect "status" to be zero, but it's either -EINVAL or uninitialized. Fixes: 788bf83db301 ('drm/amdkfd: Add wave control operation to debugger') Signed-off-by: Dan Carpenter Signed-off-by: Oded Gabbay Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index c34c393e9aea..d5e19b5fbbfb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -513,7 +513,7 @@ static int dbgdev_wave_control_set_registers( union SQ_CMD_BITS *in_reg_sq_cmd, union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) { - int status; + int status = 0; union SQ_CMD_BITS reg_sq_cmd; union GRBM_GFX_INDEX_BITS reg_gfx_index; struct HsaDbgWaveMsgAMDGen2 *pMsg; From e0a2d244dcb6068887ef7d3c3af302fe9152298f Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Fri, 11 Mar 2016 14:56:42 +0530 Subject: [PATCH 527/797] drm/i915: Fixup the free space logic in ring_prepare commit d43f3ebf12f59c57782ec652da65ef61c2662b40 upstream. Currently for the case where there is enough space at the end of Ring buffer for accommodating only the base request, the wrapround is done immediately and as a result the base request gets added at the start of Ring buffer. But there may not be enough free space at the beginning to accommodate the base request, as before the wraparound, the wait was effectively done for the reserved_size free space from the start of Ring buffer. In such a case there is a potential of Ring buffer overflow, the instructions at the head of Ring (ACTHD) can get overwritten. Since the base request can fit in the remaining space, there is no need to wraparound immediately. The wraparound will anyway happen later when the reserved part starts getting used. Cc: Chris Wilson Signed-off-by: Akash Goel Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1457688402-10411-1-git-send-email-akash.goel@intel.com Reviewed-by: Chris Wilson (cherry picked from commit 782f6bc0aba037436d6a04d19b23f8b61020a576) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 6 +++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d69547a65dbb..7058f75c7b42 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -776,11 +776,11 @@ static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes) if (unlikely(total_bytes > remain_usable)) { /* * The base request will fit but the reserved space - * falls off the end. So only need to to wait for the - * reserved size after flushing out the remainder. + * falls off the end. So don't need an immediate wrap + * and only need to effectively wait for the reserved + * size space from the start of ringbuffer. */ wait_bytes = remain_actual + ringbuf->reserved_size; - need_wrap = true; } else if (total_bytes > ringbuf->space) { /* No wrapping required, just waiting. */ wait_bytes = total_bytes; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0b1015de8536..9d48443bca2e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2357,11 +2357,11 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes) if (unlikely(total_bytes > remain_usable)) { /* * The base request will fit but the reserved space - * falls off the end. So only need to to wait for the - * reserved size after flushing out the remainder. + * falls off the end. So don't need an immediate wrap + * and only need to effectively wait for the reserved + * size space from the start of ringbuffer. */ wait_bytes = remain_actual + ringbuf->reserved_size; - need_wrap = true; } else if (total_bytes > ringbuf->space) { /* No wrapping required, just waiting. */ wait_bytes = total_bytes; From f4276d5753538996fa93a34646554bfd92f6e071 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 14 Apr 2016 14:39:02 +0300 Subject: [PATCH 528/797] drm/i915: Use fw_domains_put_with_fifo() on HSW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 31318a922395ec9e78d6e2ddf70779355afc7594 upstream. HSW still has the wake FIFO, so let's check it. Cc: Mika Kuoppala Cc: Deepak S Fixes: 05a2fb157e44 ("drm/i915: Consolidate forcewake code") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1460633942-24013-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Mika Kuoppala (cherry picked from commit 3d7d0c85e41afb5a05e98b3a8a72c38357f02594) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_uncore.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 43cba129a0c0..cc91ae832ffb 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1132,7 +1132,11 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev) } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_thread_status; - dev_priv->uncore.funcs.force_wake_put = fw_domains_put; + if (IS_HASWELL(dev)) + dev_priv->uncore.funcs.force_wake_put = + fw_domains_put_with_fifo; + else + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_MT, FORCEWAKE_ACK_HSW); } else if (IS_IVYBRIDGE(dev)) { From 0ea82073cb5e7039299350aba5bc135994c8cbda Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 18 Apr 2016 13:57:48 +0300 Subject: [PATCH 529/797] perf intel-pt: Fix segfault tracing transactions commit 1342e0b7a6c1a060c593037fbac9f4b717f1cb3b upstream. Tracing a workload that uses transactions gave a seg fault as follows: perf record -e intel_pt// workload perf report Program received signal SIGSEGV, Segmentation fault. 0x000000000054b58c in intel_pt_reset_last_branch_rb (ptq=0x1a36110) at util/intel-pt.c:929 929 ptq->last_branch_rb->nr = 0; (gdb) p ptq->last_branch_rb $1 = (struct branch_stack *) 0x0 (gdb) up 1148 intel_pt_reset_last_branch_rb(ptq); (gdb) l 1143 if (ret) 1144 pr_err("Intel Processor Trace: failed to deliver transaction event 1145 ret); 1146 1147 if (pt->synth_opts.callchain) 1148 intel_pt_reset_last_branch_rb(ptq); 1149 1150 return ret; 1151 } 1152 (gdb) p pt->synth_opts.callchain $2 = true (gdb) (gdb) bt #0 0x000000000054b58c in intel_pt_reset_last_branch_rb (ptq=0x1a36110) #1 0x000000000054c1e0 in intel_pt_synth_transaction_sample (ptq=0x1a36110) #2 0x000000000054c5b2 in intel_pt_sample (ptq=0x1a36110) Caused by checking the 'callchain' flag when it should have been the 'last_branch' flag. Fix that. Reported-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Fixes: f14445ee72c5 ("perf intel-pt: Support generating branch stack") Link: http://lkml.kernel.org/r/1460977068-11566-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 97f963a3dcb9..9227c2f076c3 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1127,7 +1127,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", ret); - if (pt->synth_opts.callchain) + if (pt->synth_opts.last_branch) intel_pt_reset_last_branch_rb(ptq); return ret; From 46b9a1550e0ecf73b83c02c8435eedc01dde2055 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 13 Apr 2016 13:59:14 +1000 Subject: [PATCH 530/797] i2c: cpm: Fix build break due to incompatible pointer types commit 609d5a1b2b35bb62b4b3750396e55453160c2a17 upstream. Since commit ea8daa7b9784 ("kbuild: Add option to turn incompatible pointer check into error"), assignments from an incompatible pointer types have become a hard error, eg: drivers/i2c/busses/i2c-cpm.c:545:91: error: passing argument 3 of 'dma_alloc_coherent' from incompatible pointer type Fix the build break by converting txdma & rxdma to dma_addr_t. Signed-off-by: Michael Ellerman Signed-off-by: Wolfram Sang Fixes: ea8daa7b9784 Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-cpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c index 714bdc837769..b167ab25310a 100644 --- a/drivers/i2c/busses/i2c-cpm.c +++ b/drivers/i2c/busses/i2c-cpm.c @@ -116,8 +116,8 @@ struct cpm_i2c { cbd_t __iomem *rbase; u_char *txbuf[CPM_MAXBD]; u_char *rxbuf[CPM_MAXBD]; - u32 txdma[CPM_MAXBD]; - u32 rxdma[CPM_MAXBD]; + dma_addr_t txdma[CPM_MAXBD]; + dma_addr_t rxdma[CPM_MAXBD]; }; static irqreturn_t cpm_i2c_interrupt(int irq, void *dev_id) From 3b566a5c38b7311a545ac536a3b43944153918d2 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 16 Apr 2016 21:14:52 -0400 Subject: [PATCH 531/797] i2c: exynos5: Fix possible ABBA deadlock by keeping I2C clock prepared commit 10ff4c5239a137abfc896ec73ef3d15a0f86a16a upstream. The exynos5 I2C controller driver always prepares and enables a clock before using it and then disables unprepares it when the clock is not used anymore. But this can cause a possible ABBA deadlock in some scenarios since a driver that uses regmap to access its I2C registers, will first grab the regmap lock and then the I2C xfer function will grab the prepare lock when preparing the I2C clock. But since the clock driver also uses regmap for I2C accesses, preparing a clock will first grab the prepare lock and then the regmap lock when using the regmap API. An example of this happens on the Exynos5422 Odroid XU4 board where a s2mps11 PMIC is used and both the s2mps11 regulators and clk drivers share the same I2C regmap. The possible deadlock is reported by the kernel lockdep: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sec_core:428:(regmap)->lock); lock(prepare_lock); lock(sec_core:428:(regmap)->lock); lock(prepare_lock); *** DEADLOCK *** Fix it by leaving the code prepared on probe and use {en,dis}able in the I2C transfer function. This patch is similar to commit 34e81ad5f0b6 ("i2c: s3c2410: fix ABBA deadlock by keeping clock prepared") that fixes the same bug in other driver for an I2C controller found in Samsung SoCs. Reported-by: Anand Moon Signed-off-by: Javier Martinez Canillas Reviewed-by: Anand Moon Reviewed-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-exynos5.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c index b29c7500461a..f54ece8fce78 100644 --- a/drivers/i2c/busses/i2c-exynos5.c +++ b/drivers/i2c/busses/i2c-exynos5.c @@ -671,7 +671,9 @@ static int exynos5_i2c_xfer(struct i2c_adapter *adap, return -EIO; } - clk_prepare_enable(i2c->clk); + ret = clk_enable(i2c->clk); + if (ret) + return ret; for (i = 0; i < num; i++, msgs++) { stop = (i == num - 1); @@ -695,7 +697,7 @@ static int exynos5_i2c_xfer(struct i2c_adapter *adap, } out: - clk_disable_unprepare(i2c->clk); + clk_disable(i2c->clk); return ret; } @@ -747,7 +749,9 @@ static int exynos5_i2c_probe(struct platform_device *pdev) return -ENOENT; } - clk_prepare_enable(i2c->clk); + ret = clk_prepare_enable(i2c->clk); + if (ret) + return ret; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); i2c->regs = devm_ioremap_resource(&pdev->dev, mem); @@ -799,6 +803,10 @@ static int exynos5_i2c_probe(struct platform_device *pdev) platform_set_drvdata(pdev, i2c); + clk_disable(i2c->clk); + + return 0; + err_clk: clk_disable_unprepare(i2c->clk); return ret; @@ -810,6 +818,8 @@ static int exynos5_i2c_remove(struct platform_device *pdev) i2c_del_adapter(&i2c->adap); + clk_unprepare(i2c->clk); + return 0; } @@ -821,6 +831,8 @@ static int exynos5_i2c_suspend_noirq(struct device *dev) i2c->suspended = 1; + clk_unprepare(i2c->clk); + return 0; } @@ -830,7 +842,9 @@ static int exynos5_i2c_resume_noirq(struct device *dev) struct exynos5_i2c *i2c = platform_get_drvdata(pdev); int ret = 0; - clk_prepare_enable(i2c->clk); + ret = clk_prepare_enable(i2c->clk); + if (ret) + return ret; ret = exynos5_hsi2c_clock_setup(i2c); if (ret) { @@ -839,7 +853,7 @@ static int exynos5_i2c_resume_noirq(struct device *dev) } exynos5_i2c_init(i2c); - clk_disable_unprepare(i2c->clk); + clk_disable(i2c->clk); i2c->suspended = 0; return 0; From 7f8150d728eef82de079ce4fc9e8b4c47aca101e Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Fri, 22 Apr 2016 09:29:36 -0600 Subject: [PATCH 532/797] toshiba_acpi: Fix regression caused by hotkey enabling value commit a30b8f81d9d6fe24eab8a023794548b048f08e3c upstream. Commit 52cbae0127ad ("toshiba_acpi: Change default Hotkey enabling value") changed the hotkeys enabling value, as it was the same value Windows uses, however, it turns out that the value tells the EC that the driver will now take care of the hardware events like the physical RFKill switch or the pointing device toggle button. This patch reverts such commit by changing the default hotkey enabling value to 0x09, which enables hotkey events only, making the hardware buttons working again. Fixes bugs 113331 and 114941. Signed-off-by: Azael Avalos Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/toshiba_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index b0f62141ea4d..f774cb576ffa 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -131,7 +131,7 @@ MODULE_LICENSE("GPL"); /* Field definitions */ #define HCI_ACCEL_MASK 0x7fff #define HCI_HOTKEY_DISABLE 0x0b -#define HCI_HOTKEY_ENABLE 0x01 +#define HCI_HOTKEY_ENABLE 0x09 #define HCI_HOTKEY_SPECIAL_FUNCTIONS 0x10 #define HCI_LCD_BRIGHTNESS_BITS 3 #define HCI_LCD_BRIGHTNESS_SHIFT (16-HCI_LCD_BRIGHTNESS_BITS) From 4d32650fcd8c9097fa0f69d39f0aae80a4b7fd79 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 29 Apr 2016 15:42:25 +0200 Subject: [PATCH 533/797] EDAC: i7core, sb_edac: Don't return NOTIFY_BAD from mce_decoder callback commit c4fc1956fa31003bfbe4f597e359d751568e2954 upstream. Both of these drivers can return NOTIFY_BAD, but this terminates processing other callbacks that were registered later on the chain. Since the driver did nothing to log the error it seems wrong to prevent other interested parties from seeing it. E.g. neither of them had even bothered to check the type of the error to see if it was a memory error before the return NOTIFY_BAD. Signed-off-by: Tony Luck Acked-by: Aristeu Rozanski Acked-by: Mauro Carvalho Chehab Cc: linux-edac Link: http://lkml.kernel.org/r/72937355dd92318d2630979666063f8a2853495b.1461864507.git.tony.luck@intel.com Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/i7core_edac.c | 2 +- drivers/edac/sb_edac.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 01087a38da22..792bdae2b91d 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1866,7 +1866,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, i7_dev = get_i7core_dev(mce->socketid); if (!i7_dev) - return NOTIFY_BAD; + return NOTIFY_DONE; mci = i7_dev->mci; pvt = mci->pvt_info; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 90c3fe99c786..37649221f81c 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2254,7 +2254,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, mci = get_mci_for_node_id(mce->socketid); if (!mci) - return NOTIFY_BAD; + return NOTIFY_DONE; pvt = mci->pvt_info; /* From 9d3e910464dbeaae0746ef29c0192caa3e0418c3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2016 18:07:33 +0100 Subject: [PATCH 534/797] ASoC: s3c24xx: use const snd_soc_component_driver pointer commit ba4bc32eaa39ba7687f0958ae90eec94da613b46 upstream. An older patch to convert the API in the s3c i2s driver ended up passing a const pointer into a function that takes a non-const pointer, so we now get a warning: sound/soc/samsung/s3c2412-i2s.c: In function 's3c2412_iis_dev_probe': sound/soc/samsung/s3c2412-i2s.c:172:9: error: passing argument 3 of 's3c_i2sv2_register_component' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers] However, the s3c_i2sv2_register_component() function again passes the pointer into another function taking a const, so we just need to change its prototype. Fixes: eca3b01d0885 ("ASoC: switch over to use snd_soc_register_component() on s3c i2s") Signed-off-by: Arnd Bergmann Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/samsung/s3c-i2s-v2.c | 2 +- sound/soc/samsung/s3c-i2s-v2.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/samsung/s3c-i2s-v2.c b/sound/soc/samsung/s3c-i2s-v2.c index df65c5b494b1..b6ab3fc5789e 100644 --- a/sound/soc/samsung/s3c-i2s-v2.c +++ b/sound/soc/samsung/s3c-i2s-v2.c @@ -709,7 +709,7 @@ static int s3c2412_i2s_resume(struct snd_soc_dai *dai) #endif int s3c_i2sv2_register_component(struct device *dev, int id, - struct snd_soc_component_driver *cmp_drv, + const struct snd_soc_component_driver *cmp_drv, struct snd_soc_dai_driver *dai_drv) { struct snd_soc_dai_ops *ops = (struct snd_soc_dai_ops *)dai_drv->ops; diff --git a/sound/soc/samsung/s3c-i2s-v2.h b/sound/soc/samsung/s3c-i2s-v2.h index 90abab364b49..d0684145ed1f 100644 --- a/sound/soc/samsung/s3c-i2s-v2.h +++ b/sound/soc/samsung/s3c-i2s-v2.h @@ -101,7 +101,7 @@ extern int s3c_i2sv2_probe(struct snd_soc_dai *dai, * soc core. */ extern int s3c_i2sv2_register_component(struct device *dev, int id, - struct snd_soc_component_driver *cmp_drv, + const struct snd_soc_component_driver *cmp_drv, struct snd_soc_dai_driver *dai_drv); #endif /* __SND_SOC_S3C24XX_S3C_I2SV2_I2S_H */ From c276b2c81f2a10f6d74e5cb1cb7d6b6c7ff85e74 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 27 Jan 2016 14:26:18 +0100 Subject: [PATCH 535/797] ASoC: ssm4567: Reset device before regcache_sync() commit 712a8038cc24dba668afe82f0413714ca87184e0 upstream. When the ssm4567 is powered up the driver calles regcache_sync() to restore the register map content. regcache_sync() assumes that the device is in its power-on reset state. Make sure that this is the case by explicitly resetting the ssm4567 register map before calling regcache_sync() otherwise we might end up with a incorrect register map which leads to undefined behaviour. One such undefined behaviour was observed when returning from system suspend while a playback stream is active, in that case the ssm4567 was kept muted after resume. Fixes: 1ee44ce03011 ("ASoC: ssm4567: Add driver for Analog Devices SSM4567 amplifier") Reported-by: Harsha Priya Tested-by: Fang, Yang A Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/ssm4567.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/codecs/ssm4567.c b/sound/soc/codecs/ssm4567.c index e619d5651b09..080c78e88e10 100644 --- a/sound/soc/codecs/ssm4567.c +++ b/sound/soc/codecs/ssm4567.c @@ -352,6 +352,11 @@ static int ssm4567_set_power(struct ssm4567 *ssm4567, bool enable) regcache_cache_only(ssm4567->regmap, !enable); if (enable) { + ret = regmap_write(ssm4567->regmap, SSM4567_REG_SOFT_RESET, + 0x00); + if (ret) + return ret; + ret = regmap_update_bits(ssm4567->regmap, SSM4567_REG_POWER_CTRL, SSM4567_POWER_SPWDN, 0x00); From 99070b6b5154f69e1f85a6547e8113b03986de7f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Mar 2016 12:04:23 +0000 Subject: [PATCH 536/797] ASoC: dapm: Make sure we have a card when displaying component widgets commit 47325078f2a3e543150e7df967e45756b2fff7ec upstream. The dummy component is reused for all cards so we special case and don't bind it to any of them. This means that code like that displaying the component widgets that tries to look at the card will crash. In the future we will fix this by ensuring that the dummy component looks like other components but that is invasive and so not suitable for a fix. Instead add a special case check here. Reported-by: Harry Pan Suggested-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 416514fe9e63..afb70a5d4fd3 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2188,6 +2188,13 @@ static ssize_t dapm_widget_show_component(struct snd_soc_component *cmpnt, int count = 0; char *state = "not set"; + /* card won't be set for the dummy component, as a spot fix + * we're checking for that case specifically here but in future + * we will ensure that the dummy component looks like others. + */ + if (!cmpnt->card) + return 0; + list_for_each_entry(w, &cmpnt->card->widgets, list) { if (w->dapm != dapm) continue; From d74252fd2010e660b0f4b2b7bca0feccaf0214c9 Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Fri, 18 Mar 2016 14:54:22 +0800 Subject: [PATCH 537/797] ASoC: rt5640: Correct the digital interface data select commit 653aa4645244042826f105aab1be3d01b3d493ca upstream. this patch corrects the interface adc/dac control register definition according to datasheet. Signed-off-by: Sugar Zhang Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5640.c | 2 +- sound/soc/codecs/rt5640.h | 36 ++++++++++++++++++------------------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index f2beb1aa5763..b1c8bb39cdf1 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -359,7 +359,7 @@ static const DECLARE_TLV_DB_RANGE(bst_tlv, /* Interface data select */ static const char * const rt5640_data_select[] = { - "Normal", "left copy to right", "right copy to left", "Swap"}; + "Normal", "Swap", "left copy to right", "right copy to left"}; static SOC_ENUM_SINGLE_DECL(rt5640_if1_dac_enum, RT5640_DIG_INF_DATA, RT5640_IF1_DAC_SEL_SFT, rt5640_data_select); diff --git a/sound/soc/codecs/rt5640.h b/sound/soc/codecs/rt5640.h index 3deb8babeabb..243f42633989 100644 --- a/sound/soc/codecs/rt5640.h +++ b/sound/soc/codecs/rt5640.h @@ -442,39 +442,39 @@ #define RT5640_IF1_DAC_SEL_MASK (0x3 << 14) #define RT5640_IF1_DAC_SEL_SFT 14 #define RT5640_IF1_DAC_SEL_NOR (0x0 << 14) -#define RT5640_IF1_DAC_SEL_L2R (0x1 << 14) -#define RT5640_IF1_DAC_SEL_R2L (0x2 << 14) -#define RT5640_IF1_DAC_SEL_SWAP (0x3 << 14) +#define RT5640_IF1_DAC_SEL_SWAP (0x1 << 14) +#define RT5640_IF1_DAC_SEL_L2R (0x2 << 14) +#define RT5640_IF1_DAC_SEL_R2L (0x3 << 14) #define RT5640_IF1_ADC_SEL_MASK (0x3 << 12) #define RT5640_IF1_ADC_SEL_SFT 12 #define RT5640_IF1_ADC_SEL_NOR (0x0 << 12) -#define RT5640_IF1_ADC_SEL_L2R (0x1 << 12) -#define RT5640_IF1_ADC_SEL_R2L (0x2 << 12) -#define RT5640_IF1_ADC_SEL_SWAP (0x3 << 12) +#define RT5640_IF1_ADC_SEL_SWAP (0x1 << 12) +#define RT5640_IF1_ADC_SEL_L2R (0x2 << 12) +#define RT5640_IF1_ADC_SEL_R2L (0x3 << 12) #define RT5640_IF2_DAC_SEL_MASK (0x3 << 10) #define RT5640_IF2_DAC_SEL_SFT 10 #define RT5640_IF2_DAC_SEL_NOR (0x0 << 10) -#define RT5640_IF2_DAC_SEL_L2R (0x1 << 10) -#define RT5640_IF2_DAC_SEL_R2L (0x2 << 10) -#define RT5640_IF2_DAC_SEL_SWAP (0x3 << 10) +#define RT5640_IF2_DAC_SEL_SWAP (0x1 << 10) +#define RT5640_IF2_DAC_SEL_L2R (0x2 << 10) +#define RT5640_IF2_DAC_SEL_R2L (0x3 << 10) #define RT5640_IF2_ADC_SEL_MASK (0x3 << 8) #define RT5640_IF2_ADC_SEL_SFT 8 #define RT5640_IF2_ADC_SEL_NOR (0x0 << 8) -#define RT5640_IF2_ADC_SEL_L2R (0x1 << 8) -#define RT5640_IF2_ADC_SEL_R2L (0x2 << 8) -#define RT5640_IF2_ADC_SEL_SWAP (0x3 << 8) +#define RT5640_IF2_ADC_SEL_SWAP (0x1 << 8) +#define RT5640_IF2_ADC_SEL_L2R (0x2 << 8) +#define RT5640_IF2_ADC_SEL_R2L (0x3 << 8) #define RT5640_IF3_DAC_SEL_MASK (0x3 << 6) #define RT5640_IF3_DAC_SEL_SFT 6 #define RT5640_IF3_DAC_SEL_NOR (0x0 << 6) -#define RT5640_IF3_DAC_SEL_L2R (0x1 << 6) -#define RT5640_IF3_DAC_SEL_R2L (0x2 << 6) -#define RT5640_IF3_DAC_SEL_SWAP (0x3 << 6) +#define RT5640_IF3_DAC_SEL_SWAP (0x1 << 6) +#define RT5640_IF3_DAC_SEL_L2R (0x2 << 6) +#define RT5640_IF3_DAC_SEL_R2L (0x3 << 6) #define RT5640_IF3_ADC_SEL_MASK (0x3 << 4) #define RT5640_IF3_ADC_SEL_SFT 4 #define RT5640_IF3_ADC_SEL_NOR (0x0 << 4) -#define RT5640_IF3_ADC_SEL_L2R (0x1 << 4) -#define RT5640_IF3_ADC_SEL_R2L (0x2 << 4) -#define RT5640_IF3_ADC_SEL_SWAP (0x3 << 4) +#define RT5640_IF3_ADC_SEL_SWAP (0x1 << 4) +#define RT5640_IF3_ADC_SEL_L2R (0x2 << 4) +#define RT5640_IF3_ADC_SEL_R2L (0x3 << 4) /* REC Left Mixer Control 1 (0x3b) */ #define RT5640_G_HP_L_RM_L_MASK (0x7 << 13) From b4ea6cf4883569a7c9c0297305033e9e678a03e4 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Thu, 3 Mar 2016 16:12:48 -0300 Subject: [PATCH 538/797] vb2-memops: Fix over allocation of frame vectors commit 89a095668304e8a02502ffd35edacffdbf49aa8c upstream. On page unaligned frames, create_framevec forces get_vaddr_frames to allocate an extra page at the end of the buffer. Under some circumstances, this leads to -EINVAL on VIDIOC_QBUF. E.g: We have vm_a that vm_area that goes from 0x1000 to 0x3000. And a frame that goes from 0x1800 to 0x2800, i.e. 2 pages. frame_vector_create will be called with the following params: get_vaddr_frames(0x1800, 2, write, 1, vec); get_vaddr will allocate the first page after checking that the memory 0x1800-0x27ff is valid, but it will not allocate the second page because the range 0x2800-0x37ff is out of the vm_a range. This results in create_framevec returning -EFAULT Error Trace: [ 9083.793015] video0: VIDIOC_QBUF: 00:00:00.00000000 index=1, type=vid-cap, flags=0x00002002, field=any, sequence=0, memory=userptr, bytesused=0, offset/userptr=0x7ff2b023ca80, length=5765760 [ 9083.793028] timecode=00:00:00 type=0, flags=0x00000000, frames=0, userbits=0x00000000 [ 9083.793117] video0: VIDIOC_QBUF: error -22: 00:00:00.00000000 index=2, type=vid-cap, flags=0x00000000, field=any, sequence=0, memory=userptr, bytesused=0, offset/userptr=0x7ff2b07bc500, length=5765760 Also use true instead of 1 since that argument is a bool in the get_vaddr_frames() prototype. Fixes: 21fb0cb7ec65 ("[media] vb2: Provide helpers for mapping virtual addresses") Reported-by: Albert Antony Signed-off-by: Ricardo Ribalda Delgado [hans.verkuil@cisco.com: merged the 'bool' change into this patch] Acked-by: Marek Szyprowski Reviewed-by: Jan Kara Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/videobuf2-memops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c index dbec5923fcf0..3c3b517f1d1c 100644 --- a/drivers/media/v4l2-core/videobuf2-memops.c +++ b/drivers/media/v4l2-core/videobuf2-memops.c @@ -49,7 +49,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start, vec = frame_vector_create(nr); if (!vec) return ERR_PTR(-ENOMEM); - ret = get_vaddr_frames(start, nr, write, 1, vec); + ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec); if (ret < 0) goto out_destroy; /* We accept only complete set of PFNs */ From a9da0b3dc72e074a2f84fad5f176750968a76bdb Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 22 Apr 2016 04:00:50 -0300 Subject: [PATCH 539/797] v4l2-dv-timings.h: fix polarity for 4k formats commit 3020ca711871fdaf0c15c8bab677a6bc302e28fe upstream. The VSync polarity was negative instead of positive for the 4k CEA formats. I probably copy-and-pasted these from the DMT 4k format, which does have a negative VSync polarity. Signed-off-by: Hans Verkuil Reported-by: Martin Bugge Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/v4l2-dv-timings.h | 30 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h index c039f1d68a09..086168e18ca8 100644 --- a/include/uapi/linux/v4l2-dv-timings.h +++ b/include/uapi/linux/v4l2-dv-timings.h @@ -183,7 +183,8 @@ #define V4L2_DV_BT_CEA_3840X2160P24 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 1276, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -191,14 +192,16 @@ #define V4L2_DV_BT_CEA_3840X2160P25 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P30 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -206,14 +209,16 @@ #define V4L2_DV_BT_CEA_3840X2160P50 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P60 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -221,7 +226,8 @@ #define V4L2_DV_BT_CEA_4096X2160P24 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 1020, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -229,14 +235,16 @@ #define V4L2_DV_BT_CEA_4096X2160P25 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P30 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -244,14 +252,16 @@ #define V4L2_DV_BT_CEA_4096X2160P50 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P60 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ From b184522f688a31765a24081ed231e480e76edae6 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 22 Apr 2016 14:57:48 +1000 Subject: [PATCH 540/797] cxl: Keep IRQ mappings on context teardown commit d6776bba44d9752f6cdf640046070e71ee4bba7b upstream. Keep IRQ mappings on context teardown. This won't leak IRQs as if we allocate the mapping again, the generic code will give the same mapping used last time. Doing this works around a race in the generic code. Masking the interrupt introduces a race which can crash the kernel or result in IRQ that is never EOIed. The lost of EOI results in all subsequent mappings to the same HW IRQ never receiving an interrupt. We've seen this race with cxl test cases which are doing heavy context startup and teardown at the same time as heavy interrupt load. A fix to the generic code is being investigated also. Signed-off-by: Michael Neuling Tested-by: Andrew Donnellan Acked-by: Ian Munsie Tested-by: Vaibhav Jain Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/irq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 09a406058c46..efbb6945eb18 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -288,7 +288,6 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, void cxl_unmap_irq(unsigned int virq, void *cookie) { free_irq(virq, cookie); - irq_dispose_mapping(virq); } static int cxl_register_one_irq(struct cxl *adapter, From 29ebbba744cf8951202b5f4ea62b4a297f4662c1 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 31 Mar 2016 19:03:25 +0300 Subject: [PATCH 541/797] IB/mlx5: Expose correct max_sge_rd limit commit 986ef95ecdd3eb6fa29433e68faa94c7624083be upstream. mlx5 devices (Connect-IB, ConnectX-4, ConnectX-4-LX) has a limitation where rdma read work queue entries cannot exceed 512 bytes. A rdma_read wqe needs to fit in 512 bytes: - wqe control segment (16 bytes) - rdma segment (16 bytes) - scatter elements (16 bytes each) So max_sge_rd should be: (512 - 16 - 16) / 16 = 30. Reported-by: Christoph Hellwig Tested-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 2 +- include/linux/mlx5/device.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c4e091528390..721d63f5b461 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -273,7 +273,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(struct mlx5_wqe_ctrl_seg)) / sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); - props->max_sge_rd = props->max_sge; + props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0b473cbfa7ef..a91b67b18a73 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -334,6 +334,17 @@ enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; +enum { + /* + * Max wqe size for rdma read is 512 bytes, so this + * limits our max_sge_rd as the wqe needs to fit: + * - ctrl segment (16 bytes) + * - rdma segment (16 bytes) + * - scatter elements (16 bytes each) + */ + MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 +}; + struct mlx5_inbox_hdr { __be16 opcode; u8 rsvd[4]; From c92003c18feb8159cbf64bc0afa7b048869fe3c6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 10 Apr 2016 19:13:13 -0600 Subject: [PATCH 542/797] IB/security: Restrict use of the write() interface commit e6bd18f57aad1a2d1ef40e646d03ed0f2515c9e3 upstream. The drivers/infiniband stack uses write() as a replacement for bi-directional ioctl(). This is not safe. There are ways to trigger write calls that result in the return structure that is normally written to user space being shunted off to user specified kernel memory instead. For the immediate repair, detect and deny suspicious accesses to the write API. For long term, update the user space libraries and the kernel API to something that doesn't present the same security vulnerabilities (likely a structured ioctl() interface). The impacted uAPI interfaces are generally only available if hardware from drivers/infiniband is installed in the system. Reported-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Jason Gunthorpe [ Expanded check to all known write() entry points ] Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucm.c | 4 ++++ drivers/infiniband/core/ucma.c | 3 +++ drivers/infiniband/core/uverbs_main.c | 5 +++++ drivers/infiniband/hw/qib/qib_file_ops.c | 5 +++++ drivers/staging/rdma/hfi1/TODO | 2 +- drivers/staging/rdma/hfi1/file_ops.c | 6 ++++++ include/rdma/ib.h | 16 ++++++++++++++++ 7 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 6b4e8a008bc0..564adf3116e8 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -48,6 +48,7 @@ #include +#include #include #include #include @@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, struct ib_ucm_cmd_hdr hdr; ssize_t result; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 8b5a934e1133..886f61ea6cc7 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, struct rdma_ucm_cmd_hdr hdr; ssize_t ret; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index e3ef28861be6..24f3ca2c4ad7 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -48,6 +48,8 @@ #include +#include + #include "uverbs.h" MODULE_AUTHOR("Roland Dreier"); @@ -682,6 +684,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, int srcu_key; ssize_t ret; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (count < sizeof hdr) return -EINVAL; diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index e449e394963f..24f4a782e0f4 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -45,6 +45,8 @@ #include #include +#include + #include "qib.h" #include "qib_common.h" #include "qib_user_sdma.h" @@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data, ssize_t ret = 0; void *dest; + if (WARN_ON_ONCE(!ib_safe_file_access(fp))) + return -EACCES; + if (count < sizeof(cmd.type)) { ret = -EINVAL; goto bail; diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO index 05de0dad8762..4c6f1d7d2eaf 100644 --- a/drivers/staging/rdma/hfi1/TODO +++ b/drivers/staging/rdma/hfi1/TODO @@ -3,4 +3,4 @@ July, 2015 - Remove unneeded file entries in sysfs - Remove software processing of IB protocol and place in library for use by qib, ipath (if still present), hfi1, and eventually soft-roce - +- Replace incorrect uAPI diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index aae9826ec62b..c851e51b1dc3 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -62,6 +62,8 @@ #include #include +#include + #include "hfi.h" #include "pio.h" #include "device.h" @@ -214,6 +216,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, int uctxt_required = 1; int must_be_root = 0; + /* FIXME: This interface cannot continue out of staging */ + if (WARN_ON_ONCE(!ib_safe_file_access(fp))) + return -EACCES; + if (count < sizeof(cmd)) { ret = -EINVAL; goto bail; diff --git a/include/rdma/ib.h b/include/rdma/ib.h index cf8f9e700e48..a6b93706b0fc 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -34,6 +34,7 @@ #define _RDMA_IB_H #include +#include struct ib_addr { union { @@ -86,4 +87,19 @@ struct sockaddr_ib { __u64 sib_scope_id; }; +/* + * The IB interfaces that use write() as bi-directional ioctl() are + * fundamentally unsafe, since there are lots of ways to trigger "write()" + * calls from various contexts with elevated privileges. That includes the + * traditional suid executable error message writes, but also various kernel + * interfaces that can write to file descriptors. + * + * This function provides protection for the legacy API by restricting the + * calling context. + */ +static inline bool ib_safe_file_access(struct file *filp) +{ + return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS); +} + #endif /* _RDMA_IB_H */ From 513f5c33b5208dbd090f56c843aead053cb3d7a3 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Thu, 21 Apr 2016 18:21:11 +0200 Subject: [PATCH 543/797] efi: Fix out-of-bounds read in variable_matches() commit 630ba0cc7a6dbafbdee43795617c872b35cde1b4 upstream. The variable_matches() function can currently read "var_name[len]", for example when: - var_name[0] == 'a', - len == 1 - match_name points to the NUL-terminated string "ab". This function is supposed to accept "var_name" inputs that are not NUL-terminated (hence the "len" parameter"). Document the function, and access "var_name[*match]" only if "*match" is smaller than "len". Reported-by: Chris Wilson Signed-off-by: Laszlo Ersek Cc: Peter Jones Cc: Matthew Garrett Cc: Jason Andryuk Cc: Jani Nikula Link: http://thread.gmane.org/gmane.comp.freedesktop.xorg.drivers.intel/86906 Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/vars.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 7f2ea21c730d..6f182fd91a6d 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -202,29 +202,44 @@ static const struct variable_validate variable_validate[] = { { NULL_GUID, "", NULL }, }; +/* + * Check if @var_name matches the pattern given in @match_name. + * + * @var_name: an array of @len non-NUL characters. + * @match_name: a NUL-terminated pattern string, optionally ending in "*". A + * final "*" character matches any trailing characters @var_name, + * including the case when there are none left in @var_name. + * @match: on output, the number of non-wildcard characters in @match_name + * that @var_name matches, regardless of the return value. + * @return: whether @var_name fully matches @match_name. + */ static bool variable_matches(const char *var_name, size_t len, const char *match_name, int *match) { for (*match = 0; ; (*match)++) { char c = match_name[*match]; - char u = var_name[*match]; - /* Wildcard in the matching name means we've matched */ - if (c == '*') + switch (c) { + case '*': + /* Wildcard in @match_name means we've matched. */ return true; - /* Case sensitive match */ - if (!c && *match == len) - return true; + case '\0': + /* @match_name has ended. Has @var_name too? */ + return (*match == len); - if (c != u) + default: + /* + * We've reached a non-wildcard char in @match_name. + * Continue only if there's an identical character in + * @var_name. + */ + if (*match < len && c == var_name[*match]) + continue; return false; - - if (!c) - return true; + } } - return true; } bool From b8f80ba7e09ca1945946d4a6d7391c0795ff99f7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2016 22:06:55 +0000 Subject: [PATCH 544/797] efi: Expose non-blocking set_variable() wrapper to efivars commit 9c6672ac9c91f7eb1ec436be1442b8c26d098e55 upstream. Commit 6d80dba1c9fe ("efi: Provide a non-blocking SetVariable() operation") implemented a non-blocking alternative for the UEFI SetVariable() invocation performed by efivars, since it may occur in atomic context. However, this version of the function was never exposed via the efivars struct, so the non-blocking versions was not actually callable. Fix that. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 6d80dba1c9fe ("efi: Provide a non-blocking SetVariable() operation") Link: http://lkml.kernel.org/r/1454364428-494-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 027ca212179f..3b52677f459a 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -180,6 +180,7 @@ static int generic_ops_register(void) { generic_ops.get_variable = efi.get_variable; generic_ops.set_variable = efi.set_variable; + generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking; generic_ops.get_next_variable = efi.get_next_variable; generic_ops.query_variable_store = efi_query_variable_store; From 01d5ccd341290e771ac6b94b08c220df6f81a630 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 27 Apr 2016 14:22:32 -0600 Subject: [PATCH 545/797] x86/apic: Handle zero vector gracefully in clear_vector_irq() commit 1bdb8970392a68489b469c3a330a1adb5ef61beb upstream. If x86_vector_alloc_irq() fails x86_vector_free_irqs() is invoked to cleanup the already allocated vectors. This subsequently calls clear_vector_irq(). The failed irq has no vector assigned, which triggers the BUG_ON(!vector) in clear_vector_irq(). We cannot suppress the call to x86_vector_free_irqs() for the failed interrupt, because the other data related to this irq must be cleaned up as well. So calling clear_vector_irq() with vector == 0 is legitimate. Remove the BUG_ON and return if vector is zero, [ tglx: Massaged changelog ] Fixes: b5dc8e6c21e7 "x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors" Signed-off-by: Keith Busch Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 7af2505f20c2..df6b4eeac0bd 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -254,7 +254,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) struct irq_desc *desc; int cpu, vector; - BUG_ON(!data->cfg.vector); + if (!data->cfg.vector) + return; vector = data->cfg.vector; for_each_cpu_and(cpu, data->domain, cpu_online_mask) From 2da9606aea5a8fd1b710f8c8dd5295da4825e9cd Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Tue, 26 Apr 2016 13:15:35 +0200 Subject: [PATCH 546/797] workqueue: fix ghost PENDING flag while doing MQ IO commit 346c09f80459a3ad97df1816d6d606169a51001a upstream. The bug in a workqueue leads to a stalled IO request in MQ ctx->rq_list with the following backtrace: [ 601.347452] INFO: task kworker/u129:5:1636 blocked for more than 120 seconds. [ 601.347574] Tainted: G O 4.4.5-1-storage+ #6 [ 601.347651] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 601.348142] kworker/u129:5 D ffff880803077988 0 1636 2 0x00000000 [ 601.348519] Workqueue: ibnbd_server_fileio_wq ibnbd_dev_file_submit_io_worker [ibnbd_server] [ 601.348999] ffff880803077988 ffff88080466b900 ffff8808033f9c80 ffff880803078000 [ 601.349662] ffff880807c95000 7fffffffffffffff ffffffff815b0920 ffff880803077ad0 [ 601.350333] ffff8808030779a0 ffffffff815b01d5 0000000000000000 ffff880803077a38 [ 601.350965] Call Trace: [ 601.351203] [] ? bit_wait+0x60/0x60 [ 601.351444] [] schedule+0x35/0x80 [ 601.351709] [] schedule_timeout+0x192/0x230 [ 601.351958] [] ? blk_flush_plug_list+0xc7/0x220 [ 601.352208] [] ? ktime_get+0x37/0xa0 [ 601.352446] [] ? bit_wait+0x60/0x60 [ 601.352688] [] io_schedule_timeout+0xa4/0x110 [ 601.352951] [] ? _raw_spin_unlock_irqrestore+0xe/0x10 [ 601.353196] [] bit_wait_io+0x1b/0x70 [ 601.353440] [] __wait_on_bit+0x5d/0x90 [ 601.353689] [] wait_on_page_bit+0xc0/0xd0 [ 601.353958] [] ? autoremove_wake_function+0x40/0x40 [ 601.354200] [] __filemap_fdatawait_range+0xe4/0x140 [ 601.354441] [] filemap_fdatawait_range+0x14/0x30 [ 601.354688] [] filemap_write_and_wait_range+0x3f/0x70 [ 601.354932] [] blkdev_fsync+0x1b/0x50 [ 601.355193] [] vfs_fsync_range+0x49/0xa0 [ 601.355432] [] blkdev_write_iter+0xca/0x100 [ 601.355679] [] __vfs_write+0xaa/0xe0 [ 601.355925] [] vfs_write+0xa9/0x1a0 [ 601.356164] [] kernel_write+0x38/0x50 The underlying device is a null_blk, with default parameters: queue_mode = MQ submit_queues = 1 Verification that nullb0 has something inflight: root@pserver8:~# cat /sys/block/nullb0/inflight 0 1 root@pserver8:~# find /sys/block/nullb0/mq/0/cpu* -name rq_list -print -exec cat {} \; ... /sys/block/nullb0/mq/0/cpu2/rq_list CTX pending: ffff8838038e2400 ... During debug it became clear that stalled request is always inserted in the rq_list from the following path: save_stack_trace_tsk + 34 blk_mq_insert_requests + 231 blk_mq_flush_plug_list + 281 blk_flush_plug_list + 199 wait_on_page_bit + 192 __filemap_fdatawait_range + 228 filemap_fdatawait_range + 20 filemap_write_and_wait_range + 63 blkdev_fsync + 27 vfs_fsync_range + 73 blkdev_write_iter + 202 __vfs_write + 170 vfs_write + 169 kernel_write + 56 So blk_flush_plug_list() was called with from_schedule == true. If from_schedule is true, that means that finally blk_mq_insert_requests() offloads execution of __blk_mq_run_hw_queue() and uses kblockd workqueue, i.e. it calls kblockd_schedule_delayed_work_on(). That means, that we race with another CPU, which is about to execute __blk_mq_run_hw_queue() work. Further debugging shows the following traces from different CPUs: CPU#0 CPU#1 ---------------------------------- ------------------------------- reqeust A inserted STORE hctx->ctx_map[0] bit marked kblockd_schedule...() returns 1 request B inserted STORE hctx->ctx_map[1] bit marked kblockd_schedule...() returns 0 *** WORK PENDING bit is cleared *** flush_busy_ctxs() is executed, but bit 1, set by CPU#1, is not observed As a result request B pended forever. This behaviour can be explained by speculative LOAD of hctx->ctx_map on CPU#0, which is reordered with clear of PENDING bit and executed _before_ actual STORE of bit 1 on CPU#1. The proper fix is an explicit full barrier , which guarantees that clear of PENDING bit is to be executed before all possible speculative LOADS or STORES inside actual work function. Signed-off-by: Roman Pen Cc: Gioh Kim Cc: Michael Wang Cc: Tejun Heo Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 450c21fd0e6e..0ec05948a97b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -649,6 +649,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work, */ smp_wmb(); set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0); + /* + * The following mb guarantees that previous clear of a PENDING bit + * will not be reordered with any speculative LOADS or STORES from + * work->current_func, which is executed afterwards. This possible + * reordering can lead to a missed execution on attempt to qeueue + * the same @work. E.g. consider this case: + * + * CPU#0 CPU#1 + * ---------------------------- -------------------------------- + * + * 1 STORE event_indicated + * 2 queue_work_on() { + * 3 test_and_set_bit(PENDING) + * 4 } set_..._and_clear_pending() { + * 5 set_work_data() # clear bit + * 6 smp_mb() + * 7 work->current_func() { + * 8 LOAD event_indicated + * } + * + * Without an explicit full barrier speculative LOAD on line 8 can + * be executed before CPU#0 does STORE on line 1. If that happens, + * CPU#0 observes the PENDING bit is still set and new execution of + * a @work is not queued in a hope, that CPU#1 will eventually + * finish the queued @work. Meanwhile CPU#1 does not see + * event_indicated is set, because speculative LOAD was executed + * before actual STORE. + */ + smp_mb(); } static void clear_work_data(struct work_struct *work) From a4e25ff31103e7c9084904418cb95596e3e9d9cf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 15 Mar 2016 14:53:32 -0700 Subject: [PATCH 547/797] slub: clean up code for kmem cgroup support to kmem_cache_free_bulk commit 376bf125ac781d32e202760ed7deb1ae4ed35d31 upstream. This change is primarily an attempt to make it easier to realize the optimizations the compiler performs in-case CONFIG_MEMCG_KMEM is not enabled. Performance wise, even when CONFIG_MEMCG_KMEM is compiled in, the overhead is zero. This is because, as long as no process have enabled kmem cgroups accounting, the assignment is replaced by asm-NOP operations. This is possible because memcg_kmem_enabled() uses a static_key_false() construct. It also helps readability as it avoid accessing the p[] array like: p[size - 1] which "expose" that the array is processed backwards inside helper function build_detached_freelist(). Lastly this also makes the code more robust, in error case like passing NULL pointers in the array. Which were previously handled before commit 033745189b1b ("slub: add missing kmem cgroup support to kmem_cache_free_bulk"). Fixes: 033745189b1b ("slub: add missing kmem cgroup support to kmem_cache_free_bulk") Signed-off-by: Jesper Dangaard Brouer Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 46997517406e..65d5f92d51d2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2819,6 +2819,7 @@ struct detached_freelist { void *tail; void *freelist; int cnt; + struct kmem_cache *s; }; /* @@ -2833,8 +2834,9 @@ struct detached_freelist { * synchronization primitive. Look ahead in the array is limited due * to performance reasons. */ -static int build_detached_freelist(struct kmem_cache *s, size_t size, - void **p, struct detached_freelist *df) +static inline +int build_detached_freelist(struct kmem_cache *s, size_t size, + void **p, struct detached_freelist *df) { size_t first_skipped_index = 0; int lookahead = 3; @@ -2850,8 +2852,11 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, if (!object) return 0; + /* Support for memcg, compiler can optimize this out */ + df->s = cache_from_obj(s, object); + /* Start new detached freelist */ - set_freepointer(s, object, NULL); + set_freepointer(df->s, object, NULL); df->page = virt_to_head_page(object); df->tail = object; df->freelist = object; @@ -2866,7 +2871,7 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, /* df->page is always set at this point */ if (df->page == virt_to_head_page(object)) { /* Opportunity build freelist */ - set_freepointer(s, object, df->freelist); + set_freepointer(df->s, object, df->freelist); df->freelist = object; df->cnt++; p[size] = NULL; /* mark object processed */ @@ -2885,25 +2890,20 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, return first_skipped_index; } - /* Note that interrupts must be enabled when calling this function. */ -void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) +void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) { if (WARN_ON(!size)) return; do { struct detached_freelist df; - struct kmem_cache *s; - - /* Support for memcg */ - s = cache_from_obj(orig_s, p[size - 1]); size = build_detached_freelist(s, size, p, &df); if (unlikely(!df.page)) continue; - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); + slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_); } while (likely(size)); } EXPORT_SYMBOL(kmem_cache_free_bulk); From d52097476caeb14f4d7e3417dda08220d2813cc4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Apr 2016 19:06:48 -0400 Subject: [PATCH 548/797] cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback commit 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 upstream. Since e93ad19d0564 ("cpuset: make mm migration asynchronous"), cpuset kicks off asynchronous NUMA node migration if necessary during task migration and flushes it from cpuset_post_attach_flush() which is called at the end of __cgroup_procs_write(). This is to avoid performing migration with cgroup_threadgroup_rwsem write-locked which can lead to deadlock through dependency on kworker creation. memcg has a similar issue with charge moving, so let's convert it to an official callback rather than the current one-off cpuset specific function. This patch adds cgroup_subsys->post_attach callback and makes cpuset register cpuset_post_attach_flush() as its ->post_attach. The conversion is mostly one-to-one except that the new callback is called under cgroup_mutex. This is to guarantee that no other migration operations are started before ->post_attach callbacks are finished. cgroup_mutex is one of the outermost mutex in the system and has never been and shouldn't be a problem. We can add specialized synchronization around __cgroup_procs_write() but I don't think there's any noticeable benefit. Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup-defs.h | 1 + include/linux/cpuset.h | 6 ------ kernel/cgroup.c | 7 +++++-- kernel/cpuset.c | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index a7c7f74808a4..8da263299754 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -434,6 +434,7 @@ struct cgroup_subsys { int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); + void (*post_attach)(void); int (*can_fork)(struct task_struct *task, void **priv_p); void (*cancel_fork)(struct task_struct *task, void *priv); void (*fork)(struct task_struct *task, void *priv); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index fea160ee5803..85a868ccb493 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask) task_unlock(current); } -extern void cpuset_post_attach_flush(void); - #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } @@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq) return false; } -static inline void cpuset_post_attach_flush(void) -{ -} - #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index dc94f8beb097..b0ea3aebc05a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2721,9 +2721,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off, bool threadgroup) { struct task_struct *tsk; + struct cgroup_subsys *ss; struct cgroup *cgrp; pid_t pid; - int ret; + int ssid, ret; if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) return -EINVAL; @@ -2771,8 +2772,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, rcu_read_unlock(); out_unlock_threadgroup: percpu_up_write(&cgroup_threadgroup_rwsem); + for_each_subsys(ss, ssid) + if (ss->post_attach) + ss->post_attach(); cgroup_kn_unlock(of->kn); - cpuset_post_attach_flush(); return ret ?: nbytes; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2ade632197d5..11eaf14b52c2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #include @@ -1015,7 +1014,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, } } -void cpuset_post_attach_flush(void) +static void cpuset_post_attach(void) { flush_workqueue(cpuset_migrate_mm_wq); } @@ -2083,6 +2082,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .can_attach = cpuset_can_attach, .cancel_attach = cpuset_cancel_attach, .attach = cpuset_attach, + .post_attach = cpuset_post_attach, .bind = cpuset_bind, .legacy_cftypes = files, .early_init = 1, From 52526076a5a686906a0acc22d27530ecb9364d84 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Apr 2016 19:09:02 -0400 Subject: [PATCH 549/797] memcg: relocate charge moving from ->attach to ->post_attach commit 264a0ae164bc0e9144bebcd25ff030d067b1a878 upstream. Hello, So, this ended up a lot simpler than I originally expected. I tested it lightly and it seems to work fine. Petr, can you please test these two patches w/o the lru drain drop patch and see whether the problem is gone? Thanks. ------ 8< ------ If charge moving is used, memcg performs relabeling of the affected pages from its ->attach callback which is called under both cgroup_threadgroup_rwsem and thus can't create new kthreads. This is fragile as various operations may depend on workqueues making forward progress which relies on the ability to create new kthreads. There's no reason to perform charge moving from ->attach which is deep in the task migration path. Move it to ->post_attach which is called after the actual migration is finished and cgroup_threadgroup_rwsem is dropped. * move_charge_struct->mm is added and ->can_attach is now responsible for pinning and recording the target mm. mem_cgroup_clear_mc() is updated accordingly. This also simplifies mem_cgroup_move_task(). * mem_cgroup_move_task() is now called from ->post_attach instead of ->attach. Signed-off-by: Tejun Heo Cc: Johannes Weiner Acked-by: Michal Hocko Debugged-and-tested-by: Petr Mladek Reported-by: Cyril Hrubis Reported-by: Johannes Weiner Fixes: 1ed1328792ff ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem") Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fc0bcc41d57f..6ba4dd988e2e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -196,6 +196,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); /* "mc" and its members are protected by cgroup_mutex */ static struct move_charge_struct { spinlock_t lock; /* for from, to */ + struct mm_struct *mm; struct mem_cgroup *from; struct mem_cgroup *to; unsigned long flags; @@ -4800,6 +4801,8 @@ static void __mem_cgroup_clear_mc(void) static void mem_cgroup_clear_mc(void) { + struct mm_struct *mm = mc.mm; + /* * we must clear moving_task before waking up waiters at the end of * task migration. @@ -4809,7 +4812,10 @@ static void mem_cgroup_clear_mc(void) spin_lock(&mc.lock); mc.from = NULL; mc.to = NULL; + mc.mm = NULL; spin_unlock(&mc.lock); + + mmput(mm); } static int mem_cgroup_can_attach(struct cgroup_taskset *tset) @@ -4866,6 +4872,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) VM_BUG_ON(mc.moved_swap); spin_lock(&mc.lock); + mc.mm = mm; mc.from = from; mc.to = memcg; mc.flags = move_flags; @@ -4875,8 +4882,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) ret = mem_cgroup_precharge_mc(mm); if (ret) mem_cgroup_clear_mc(); + } else { + mmput(mm); } - mmput(mm); return ret; } @@ -4985,11 +4993,11 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, return ret; } -static void mem_cgroup_move_charge(struct mm_struct *mm) +static void mem_cgroup_move_charge(void) { struct mm_walk mem_cgroup_move_charge_walk = { .pmd_entry = mem_cgroup_move_charge_pte_range, - .mm = mm, + .mm = mc.mm, }; lru_add_drain_all(); @@ -5001,7 +5009,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) atomic_inc(&mc.from->moving_account); synchronize_rcu(); retry: - if (unlikely(!down_read_trylock(&mm->mmap_sem))) { + if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) { /* * Someone who are holding the mmap_sem might be waiting in * waitq. So we cancel all extra charges, wake up all waiters, @@ -5018,23 +5026,16 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) * additional charge, the page walk just aborts. */ walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); - up_read(&mm->mmap_sem); + up_read(&mc.mm->mmap_sem); atomic_dec(&mc.from->moving_account); } -static void mem_cgroup_move_task(struct cgroup_taskset *tset) +static void mem_cgroup_move_task(void) { - struct cgroup_subsys_state *css; - struct task_struct *p = cgroup_taskset_first(tset, &css); - struct mm_struct *mm = get_task_mm(p); - - if (mm) { - if (mc.to) - mem_cgroup_move_charge(mm); - mmput(mm); - } - if (mc.to) + if (mc.to) { + mem_cgroup_move_charge(); mem_cgroup_clear_mc(); + } } #else /* !CONFIG_MMU */ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) @@ -5044,7 +5045,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) { } -static void mem_cgroup_move_task(struct cgroup_taskset *tset) +static void mem_cgroup_move_task(void) { } #endif @@ -5258,7 +5259,7 @@ struct cgroup_subsys memory_cgrp_subsys = { .css_reset = mem_cgroup_css_reset, .can_attach = mem_cgroup_can_attach, .cancel_attach = mem_cgroup_cancel_attach, - .attach = mem_cgroup_move_task, + .post_attach = mem_cgroup_move_task, .bind = mem_cgroup_bind, .dfl_cftypes = memory_files, .legacy_cftypes = mem_cgroup_legacy_files, From be591a683e3b4cc58466e08cd6b5e4a71c02b19a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 28 Apr 2016 16:18:32 -0700 Subject: [PATCH 550/797] mm/huge_memory: replace VM_NO_THP VM_BUG_ON with actual VMA check commit 3486b85a29c1741db99d0c522211c82d2b7a56d0 upstream. Khugepaged detects own VMAs by checking vm_file and vm_ops but this way it cannot distinguish private /dev/zero mappings from other special mappings like /dev/hpet which has no vm_ops and popultes PTEs in mmap. This fixes false-positive VM_BUG_ON and prevents installing THP where they are not expected. Link: http://lkml.kernel.org/r/CACT4Y+ZmuZMV5CjSFOeXviwQdABAgT7T+StKfTqan9YDtgEi5g@mail.gmail.com Fixes: 78f11a255749 ("mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups") Signed-off-by: Konstantin Khlebnikov Reported-by: Dmitry Vyukov Acked-by: Vlastimil Babka Acked-by: Kirill A. Shutemov Cc: Dmitry Vyukov Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 62fe06bb7d04..530e6427f823 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2134,10 +2134,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma, * page fault if needed. */ return 0; - if (vma->vm_ops) + if (vma->vm_ops || (vm_flags & VM_NO_THP)) /* khugepaged not yet working on file or special mappings */ return 0; - VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; hend = vma->vm_end & HPAGE_PMD_MASK; if (hstart < hend) @@ -2498,8 +2497,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) return false; if (is_vma_temporary_stack(vma)) return false; - VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); - return true; + return !(vma->vm_flags & VM_NO_THP); } static void collapse_huge_page(struct mm_struct *mm, From e513b90a9aef91e6399decb8e9592f2d75f7ebad Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 28 Apr 2016 16:18:35 -0700 Subject: [PATCH 551/797] numa: fix /proc//numa_maps for THP commit 28093f9f34cedeaea0f481c58446d9dac6dd620f upstream. In gather_pte_stats() a THP pmd is cast into a pte, which is wrong because the layouts may differ depending on the architecture. On s390 this will lead to inaccurate numa_maps accounting in /proc because of misguided pte_present() and pte_dirty() checks on the fake pte. On other architectures pte_present() and pte_dirty() may work by chance, but there may be an issue with direct-access (dax) mappings w/o underlying struct pages when HAVE_PTE_SPECIAL is set and THP is available. In vm_normal_page() the fake pte will be checked with pte_special() and because there is no "special" bit in a pmd, this will always return false and the VM_PFNMAP | VM_MIXEDMAP checking will be skipped. On dax mappings w/o struct pages, an invalid struct page pointer would then be returned that can crash the kernel. This patch fixes the numa_maps THP handling by introducing new "_pmd" variants of the can_gather_numa_stats() and vm_normal_page() functions. Signed-off-by: Gerald Schaefer Cc: Naoya Horiguchi Cc: "Kirill A . Shutemov" Cc: Konstantin Khlebnikov Cc: Michal Hocko Cc: Vlastimil Babka Cc: Jerome Marchand Cc: Johannes Weiner Cc: Dave Hansen Cc: Mel Gorman Cc: Dan Williams Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Michael Holzheu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/task_mmu.c | 33 ++++++++++++++++++++++++++++++--- include/linux/mm.h | 2 ++ mm/memory.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 09cd3edde08a..f6478301db00 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1435,6 +1435,32 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, return page; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static struct page *can_gather_numa_stats_pmd(pmd_t pmd, + struct vm_area_struct *vma, + unsigned long addr) +{ + struct page *page; + int nid; + + if (!pmd_present(pmd)) + return NULL; + + page = vm_normal_page_pmd(vma, addr, pmd); + if (!page) + return NULL; + + if (PageReserved(page)) + return NULL; + + nid = page_to_nid(page); + if (!node_isset(nid, node_states[N_MEMORY])) + return NULL; + + return page; +} +#endif + static int gather_pte_stats(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -1444,13 +1470,13 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, pte_t *orig_pte; pte_t *pte; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { - pte_t huge_pte = *(pte_t *)pmd; struct page *page; - page = can_gather_numa_stats(huge_pte, vma, addr); + page = can_gather_numa_stats_pmd(*pmd, vma, addr); if (page) - gather_stats(page, md, pte_dirty(huge_pte), + gather_stats(page, md, pmd_dirty(*pmd), HPAGE_PMD_SIZE/PAGE_SIZE); spin_unlock(ptl); return 0; @@ -1458,6 +1484,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, if (pmd_trans_unstable(pmd)) return 0; +#endif orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); do { struct page *page = can_gather_numa_stats(*pte, vma, addr); diff --git a/include/linux/mm.h b/include/linux/mm.h index 00bad7793788..fb8b20e5d021 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1084,6 +1084,8 @@ struct zap_details { struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t pmd); int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); diff --git a/mm/memory.c b/mm/memory.c index b80bf4746b67..76dcee317714 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -797,6 +797,46 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, return pfn_to_page(pfn); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t pmd) +{ + unsigned long pfn = pmd_pfn(pmd); + + /* + * There is no pmd_special() but there may be special pmds, e.g. + * in a direct-access (dax) mapping, so let's just replicate the + * !HAVE_PTE_SPECIAL case from vm_normal_page() here. + */ + if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { + if (vma->vm_flags & VM_MIXEDMAP) { + if (!pfn_valid(pfn)) + return NULL; + goto out; + } else { + unsigned long off; + off = (addr - vma->vm_start) >> PAGE_SHIFT; + if (pfn == vma->vm_pgoff + off) + return NULL; + if (!is_cow_mapping(vma->vm_flags)) + return NULL; + } + } + + if (is_zero_pfn(pfn)) + return NULL; + if (unlikely(pfn > highest_memmap_pfn)) + return NULL; + + /* + * NOTE! We still have PageReserved() pages in the page tables. + * eg. VDSO mappings can cause them to exist. + */ +out: + return pfn_to_page(pfn); +} +#endif + /* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range From 87c855f150be9317b9b6ad82c1611ed8d577d986 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 28 Apr 2016 16:18:38 -0700 Subject: [PATCH 552/797] mm: vmscan: reclaim highmem zone if buffer_heads is over limit commit 7bf52fb891b64b8d61caf0b82060adb9db761aec upstream. We have been reclaimed highmem zone if buffer_heads is over limit but commit 6b4f7799c6a5 ("mm: vmscan: invoke slab shrinkers from shrink_zone()") changed the behavior so it doesn't reclaim highmem zone although buffer_heads is over the limit. This patch restores the logic. Fixes: 6b4f7799c6a5 ("mm: vmscan: invoke slab shrinkers from shrink_zone()") Signed-off-by: Minchan Kim Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 2aec4241b42a..0c114e2b01d3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2534,7 +2534,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) sc->gfp_mask |= __GFP_HIGHMEM; for_each_zone_zonelist_nodemask(zone, z, zonelist, - requested_highidx, sc->nodemask) { + gfp_zone(sc->gfp_mask), sc->nodemask) { enum zone_type classzone_idx; if (!populated_zone(zone)) From 36abe7272a248a7e47a4cec8d8ec9c76ef387bac Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 28 Apr 2016 16:18:44 -0700 Subject: [PATCH 553/797] mm/hwpoison: fix wrong num_poisoned_pages accounting commit d7e69488bd04de165667f6bc741c1c0ec6042ab9 upstream. Currently, migration code increses num_poisoned_pages on *failed* migration page as well as successfully migrated one at the trial of memory-failure. It will make the stat wrong. As well, it marks the page as PG_HWPoison even if the migration trial failed. It would mean we cannot recover the corrupted page using memory-failure facility. This patches fixes it. Signed-off-by: Minchan Kim Reported-by: Vlastimil Babka Acked-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 6d17e0ab42d4..bbeb0b71fcf4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -963,7 +963,13 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); /* Soft-offlined page shouldn't go through lru cache list */ - if (reason == MR_MEMORY_FAILURE) { + if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) { + /* + * With this release, we free successfully migrated + * page and set PG_HWPoison on just freed page + * intentionally. Although it's rather weird, it's how + * HWPoison flag works at the moment. + */ put_page(page); if (!test_set_page_hwpoison(page)) num_poisoned_pages_inc(); From 3c6266d57c4c4fa02588070347acf21b610bbd96 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Jan 2016 15:32:15 -0500 Subject: [PATCH 554/797] cgroup: make sure a parent css isn't freed before its children commit 8bb5ef79bc0f4016ecf79e8dce6096a3c63603e4 upstream. There are three subsystem callbacks in css shutdown path - css_offline(), css_released() and css_free(). Except for css_released(), cgroup core didn't guarantee the order of invocation. css_offline() or css_free() could be called on a parent css before its children. This behavior is unexpected and led to bugs in cpu and memory controller. The previous patch updated ordering for css_offline() which fixes the cpu controller issue. While there currently isn't a known bug caused by misordering of css_free() invocations, let's fix it too for consistency. css_free() ordering can be trivially fixed by moving putting of the parent css below css_free() invocation. Signed-off-by: Tejun Heo Cc: Peter Zijlstra Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b0ea3aebc05a..1c9d701f7a72 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4692,14 +4692,15 @@ static void css_free_work_fn(struct work_struct *work) if (ss) { /* css free path */ + struct cgroup_subsys_state *parent = css->parent; int id = css->id; - if (css->parent) - css_put(css->parent); - ss->css_free(css); cgroup_idr_remove(&ss->css_idr, id); cgroup_put(cgrp); + + if (parent) + css_put(parent); } else { /* cgroup free path */ atomic_dec(&cgrp->root->nr_cgrps); From 4a1bb501e4b65908b102f0b371b0621ff18ad5c3 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Thu, 17 Mar 2016 18:00:29 +0000 Subject: [PATCH 555/797] USB: usbip: fix potential out-of-bounds write commit b348d7dddb6c4fbfc810b7a0626e8ec9e29f7cbb upstream. Fix potential out-of-bounds write to urb->transfer_buffer usbip handles network communication directly in the kernel. When receiving a packet from its peer, usbip code parses headers according to protocol. As part of this parsing urb->actual_length is filled. Since the input for urb->actual_length comes from the network, it should be treated as untrusted. Any entity controlling the network may put any value in the input and the preallocated urb->transfer_buffer may not be large enough to hold the data. Thus, the malicious entity is able to write arbitrary data to kernel memory. Signed-off-by: Ignat Korchagin Cc: Sasha Levin Signed-off-by: Paul Gortmaker Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_common.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index facaaf003f19..e40da7759a0e 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -741,6 +741,17 @@ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb) if (!(size > 0)) return 0; + if (size > urb->transfer_buffer_length) { + /* should not happen, probably malicious packet */ + if (ud->side == USBIP_STUB) { + usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); + return 0; + } else { + usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); + return -EPIPE; + } + } + ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size); if (ret != size) { dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret); From 3a4b3d187dba0255cbbb749f64c3b71f8105f44f Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Sun, 3 Apr 2016 16:15:00 -0300 Subject: [PATCH 556/797] videobuf2-core: Check user space planes array in dqbuf commit e7e0c3e26587749b62d17b9dd0532874186c77f7 upstream. The number of planes in videobuf2 is specific to a buffer. In order to verify that the planes array provided by the user is long enough, a new vb2_buf_op is required. Call __verify_planes_array() when the dequeued buffer is known. Return an error to the caller if there was one, otherwise remove the buffer from the done list. Signed-off-by: Sakari Ailus Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-core.c | 10 +++++----- include/media/videobuf2-core.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 33bdd81065e8..11f39791ec33 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1502,7 +1502,7 @@ static int __vb2_wait_for_done_vb(struct vb2_queue *q, int nonblocking) * Will sleep if required for nonblocking == false. */ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb, - int nonblocking) + void *pb, int nonblocking) { unsigned long flags; int ret; @@ -1523,10 +1523,10 @@ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb, /* * Only remove the buffer from done_list if v4l2_buffer can handle all * the planes. - * Verifying planes is NOT necessary since it already has been checked - * before the buffer is queued/prepared. So it can never fail. */ - list_del(&(*vb)->done_entry); + ret = call_bufop(q, verify_planes_array, *vb, pb); + if (!ret) + list_del(&(*vb)->done_entry); spin_unlock_irqrestore(&q->done_lock, flags); return ret; @@ -1604,7 +1604,7 @@ int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking) struct vb2_buffer *vb = NULL; int ret; - ret = __vb2_get_done_vb(q, &vb, nonblocking); + ret = __vb2_get_done_vb(q, &vb, pb, nonblocking); if (ret < 0) return ret; diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index 647ebfe5174f..d4227a8a2a23 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -363,6 +363,7 @@ struct vb2_ops { }; struct vb2_buf_ops { + int (*verify_planes_array)(struct vb2_buffer *vb, const void *pb); int (*fill_user_buffer)(struct vb2_buffer *vb, void *pb); int (*fill_vb2_buffer)(struct vb2_buffer *vb, const void *pb, struct vb2_plane *planes); From 19a4e46b4513bab7d6b368175be2e24ad4665e5a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Sun, 3 Apr 2016 16:31:03 -0300 Subject: [PATCH 557/797] videobuf2-v4l2: Verify planes array in buffer dequeueing commit 2c1f6951a8a82e6de0d82b1158b5e493fc6c54ab upstream. When a buffer is being dequeued using VIDIOC_DQBUF IOCTL, the exact buffer which will be dequeued is not known until the buffer has been removed from the queue. The number of planes is specific to a buffer, not to the queue. This does lead to the situation where multi-plane buffers may be requested and queued with n planes, but VIDIOC_DQBUF IOCTL may be passed an argument struct with fewer planes. __fill_v4l2_buffer() however uses the number of planes from the dequeued videobuf2 buffer, overwriting kernel memory (the m.planes array allocated in video_usercopy() in v4l2-ioctl.c) if the user provided fewer planes than the dequeued buffer had. Oops! Fixes: b0e0e1f83de3 ("[media] media: videobuf2: Prepare to divide videobuf2") Signed-off-by: Sakari Ailus Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-v4l2.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c index 502984c724ff..6c441be8f893 100644 --- a/drivers/media/v4l2-core/videobuf2-v4l2.c +++ b/drivers/media/v4l2-core/videobuf2-v4l2.c @@ -67,6 +67,11 @@ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer return 0; } +static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb) +{ + return __verify_planes_array(vb, pb); +} + /** * __verify_length() - Verify that the bytesused value for each plane fits in * the plane length and that the data offset doesn't exceed the bytesused value. @@ -432,6 +437,7 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb, } static const struct vb2_buf_ops v4l2_buf_ops = { + .verify_planes_array = __verify_planes_array_core, .fill_user_buffer = __fill_v4l2_buffer, .fill_vb2_buffer = __fill_vb2_buffer, .set_timestamp = __set_timestamp, From 34af67eb941ae5371110c9adbd5392c7a3aa841e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 2 May 2016 11:14:34 -0700 Subject: [PATCH 558/797] Revert "regulator: core: Fix nested locking of supplies" This reverts commit b1999fa6e8145305a6c8bda30ea20783717708e6 which was commit 70a7fb80e85ae7f78f8e90cec3fbd862ea6a4d4b upstream. It causes run-time breakage in the 4.4-stable tree and more patches are needed to be applied first before this one in order to resolve the issue. Reported-by: Guenter Roeck Cc: Mark Brown Cc: Arnd Bergmann Cc: Thierry Reding Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7b94b8ee087c..c70017d5f74b 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -132,14 +132,6 @@ static bool have_full_constraints(void) return has_full_constraints || of_have_populated_dt(); } -static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev) -{ - if (rdev && rdev->supply) - return rdev->supply->rdev; - - return NULL; -} - /** * regulator_lock_supply - lock a regulator and its supplies * @rdev: regulator source @@ -148,7 +140,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev) { int i; - for (i = 0; rdev->supply; rdev = rdev_get_supply(rdev), i++) + for (i = 0; rdev->supply; rdev = rdev->supply->rdev, i++) mutex_lock_nested(&rdev->mutex, i); } From f500da32a1663c1bb2587ff04be08d5220b3afca Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 27 Nov 2015 14:46:41 +0100 Subject: [PATCH 559/797] regulator: core: fix regulator_lock_supply regression commit bb41897e38c53458a88b271f2fbcd905ee1f9584 upstream. As noticed by Geert Uytterhoeven, my patch to avoid a harmless build warning in regulator_lock_supply() was total crap and introduced a real bug: > [ BUG: bad unlock balance detected! ] > kworker/u4:0/6 is trying to release lock (&rdev->mutex) at: > [] regulator_set_voltage+0x38/0x50 we still lock the regulator supplies, but not the actual regulators, so we are missing a lock, and the unlock is unbalanced. This rectifies it by first locking the regulator device itself before using the same loop as before to lock its supplies. Reported-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Fixes: 716fec9d1965 ("[SUBMITTED] regulator: core: avoid unused variable warning") Signed-off-by: Mark Brown Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index c70017d5f74b..daffff83ced2 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -140,7 +140,8 @@ static void regulator_lock_supply(struct regulator_dev *rdev) { int i; - for (i = 0; rdev->supply; rdev = rdev->supply->rdev, i++) + mutex_lock(&rdev->mutex); + for (i = 1; rdev->supply; rdev = rdev->supply->rdev, i++) mutex_lock_nested(&rdev->mutex, i); } From 29c9f634cb132107df3e4f07c8e48b35d04b527b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Dec 2015 15:51:52 +0000 Subject: [PATCH 560/797] regulator: core: Ensure we lock all regulators commit 49a6bb7a1c0963f260e4b0dcc2c0e56ec65a28b2 upstream. The latest workaround for the lockdep interface's not using the second argument of mutex_lock_nested() changed the loop missed locking the last regulator due to a thinko with the loop termination condition exiting one regulator too soon. Reported-by: Tyler Baker Signed-off-by: Mark Brown Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index daffff83ced2..f71db02fcb71 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -141,7 +141,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev) int i; mutex_lock(&rdev->mutex); - for (i = 1; rdev->supply; rdev = rdev->supply->rdev, i++) + for (i = 1; rdev; rdev = rdev->supply->rdev, i++) mutex_lock_nested(&rdev->mutex, i); } From 5a58f809d731c23c0b898d2021903db8dee4466f Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 2 Dec 2015 16:54:50 +0100 Subject: [PATCH 561/797] regulator: core: Fix nested locking of supplies commit 70a7fb80e85ae7f78f8e90cec3fbd862ea6a4d4b upstream. Commit fa731ac7ea04 ("regulator: core: avoid unused variable warning") introduced a subtle change in how supplies are locked. Where previously code was always locking the regulator of the current iteration, the new implementation only locks the regulator if it has a supply. For any given power tree that means that the root will never get locked. On the other hand the regulator_unlock_supply() will still release all the locks, which in turn causes the lock debugging code to warn about a mutex being unlocked which wasn't locked. Cc: Mark Brown Cc: Arnd Bergmann Fixes: fa731ac7ea04 ("regulator: core: avoid unused variable warning") Signed-off-by: Thierry Reding Signed-off-by: Mark Brown Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index f71db02fcb71..732ac71b82cd 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -132,6 +132,14 @@ static bool have_full_constraints(void) return has_full_constraints || of_have_populated_dt(); } +static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev) +{ + if (rdev && rdev->supply) + return rdev->supply->rdev; + + return NULL; +} + /** * regulator_lock_supply - lock a regulator and its supplies * @rdev: regulator source @@ -140,8 +148,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev) { int i; - mutex_lock(&rdev->mutex); - for (i = 1; rdev; rdev = rdev->supply->rdev, i++) + for (i = 0; rdev; rdev = rdev_get_supply(rdev), i++) mutex_lock_nested(&rdev->mutex, i); } From 23a67ddd4636584816e2dc2c6393511d55944974 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 1 Feb 2016 15:11:28 +0100 Subject: [PATCH 562/797] locking/mcs: Fix mcs_spin_lock() ordering commit 920c720aa5aa3900a7f1689228fdfc2580a91e7e upstream. Similar to commit b4b29f94856a ("locking/osq: Fix ordering of node initialisation in osq_lock") the use of xchg_acquire() is fundamentally broken with MCS like constructs. Furthermore, it turns out we rely on the global transitivity of this operation because the unlock path observes the pointer with a READ_ONCE(), not an smp_load_acquire(). This is non-critical because the MCS code isn't actually used and mostly serves as documentation, a stepping stone to the more complex things we've build on top of the idea. Reported-by: Andrea Parri Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Fixes: 3552a07a9c4a ("locking/mcs: Use acquire/release semantics") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/locking/mcs_spinlock.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index 5b9102a47ea5..c835270f0c2f 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) node->locked = 0; node->next = NULL; - prev = xchg_acquire(lock, node); + /* + * We rely on the full barrier with global transitivity implied by the + * below xchg() to order the initialization stores above against any + * observation of @node. And to provide the ACQUIRE ordering associated + * with a LOCK primitive. + */ + prev = xchg(lock, node); if (likely(prev == NULL)) { /* * Lock acquired, don't need to set node->locked to 1. Threads From 791e8462e48c6259375f59acf905b05884d648c3 Mon Sep 17 00:00:00 2001 From: Huibin Hong Date: Wed, 24 Feb 2016 18:00:04 +0800 Subject: [PATCH 563/797] spi/rockchip: Make sure spi clk is on in rockchip_spi_set_cs commit b920cc3191d7612f26f36ee494e05b5ffd9044c0 upstream. Rockchip_spi_set_cs could be called by spi_setup, but spi_setup may be called by device driver after runtime suspend. Then the spi clock is closed, rockchip_spi_set_cs may access the spi registers, which causes cpu block in some socs. Fixes: 64e36824b32 ("spi/rockchip: add driver for Rockchip RK3xxx") Signed-off-by: Huibin Hong Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-rockchip.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 79a8bc4f6cec..035767c02072 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -265,7 +265,10 @@ static inline u32 rx_max(struct rockchip_spi *rs) static void rockchip_spi_set_cs(struct spi_device *spi, bool enable) { u32 ser; - struct rockchip_spi *rs = spi_master_get_devdata(spi->master); + struct spi_master *master = spi->master; + struct rockchip_spi *rs = spi_master_get_devdata(master); + + pm_runtime_get_sync(rs->dev); ser = readl_relaxed(rs->regs + ROCKCHIP_SPI_SER) & SER_MASK; @@ -290,6 +293,8 @@ static void rockchip_spi_set_cs(struct spi_device *spi, bool enable) ser &= ~(1 << spi->chip_select); writel_relaxed(ser, rs->regs + ROCKCHIP_SPI_SER); + + pm_runtime_put_sync(rs->dev); } static int rockchip_spi_prepare_message(struct spi_master *master, From fd66dc5d5123672069acba5cb5856e7b0aa9e6d4 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 9 Mar 2016 03:21:29 +0200 Subject: [PATCH 564/797] irqchip/sunxi-nmi: Fix error check of of_io_request_and_map() commit cfe199afefe6201e998ddc07102fc1fdb55f196c upstream. The of_io_request_and_map() returns a valid pointer in iomem region or ERR_PTR(), check for NULL always fails and may cause a NULL pointer dereference on error path. Fixes: 0e841b04c829 ("irqchip/sunxi-nmi: Switch to of_io_request_and_map() from of_iomap()") Signed-off-by: Vladimir Zapolskiy Cc: Jason Cooper Cc: Marc Zyngier Cc: Chen-Yu Tsai Cc: Maxime Ripard Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1457486489-10189-1-git-send-email-vz@mleia.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-sunxi-nmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c index 4ef178078e5b..1254e98f6b57 100644 --- a/drivers/irqchip/irq-sunxi-nmi.c +++ b/drivers/irqchip/irq-sunxi-nmi.c @@ -154,9 +154,9 @@ static int __init sunxi_sc_nmi_irq_init(struct device_node *node, gc = irq_get_domain_generic_chip(domain, 0); gc->reg_base = of_io_request_and_map(node, 0, of_node_full_name(node)); - if (!gc->reg_base) { + if (IS_ERR(gc->reg_base)) { pr_err("unable to map resource\n"); - ret = -ENOMEM; + ret = PTR_ERR(gc->reg_base); goto fail_irqd_remove; } From e60711a18bccf26d0159b263dfb05b374532caf5 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 9 Mar 2016 03:21:40 +0200 Subject: [PATCH 565/797] irqchip/mxs: Fix error check of of_io_request_and_map() commit edf8fcdc6b254236be005851af35ea5e826e7e09 upstream. The of_io_request_and_map() returns a valid pointer in iomem region or ERR_PTR(), check for NULL always fails and may cause a NULL pointer dereference on error path. Fixes: 25e34b44313b ("irqchip/mxs: Prepare driver for hardware with different offsets") Signed-off-by: Vladimir Zapolskiy Cc: Jason Cooper Cc: Marc Zyngier Cc: Oleksij Rempel Cc: Sascha Hauer Cc: Shawn Guo Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1457486500-10237-1-git-send-email-vz@mleia.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-mxs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index efe50845939d..17304705f2cf 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -183,7 +183,7 @@ static void __iomem * __init icoll_init_iobase(struct device_node *np) void __iomem *icoll_base; icoll_base = of_io_request_and_map(np, 0, np->name); - if (!icoll_base) + if (IS_ERR(icoll_base)) panic("%s: unable to map resource", np->full_name); return icoll_base; } From 72291d619e2928556db1d446f0b4afff330276a7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Feb 2016 15:53:11 +0100 Subject: [PATCH 566/797] regulator: s5m8767: fix get_register() error handling commit e07ff9434167981c993a26d2edbbcb8e13801dbb upstream. The s5m8767_pmic_probe() function calls s5m8767_get_register() to read data without checking the return code, which produces a compile-time warning when that data is accessed: drivers/regulator/s5m8767.c: In function 's5m8767_pmic_probe': drivers/regulator/s5m8767.c:924:7: error: 'enable_reg' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/regulator/s5m8767.c:944:30: error: 'enable_val' may be used uninitialized in this function [-Werror=maybe-uninitialized] This changes the s5m8767_get_register() function to return a -EINVAL not just for an invalid register number but also for an invalid regulator number, as both would result in returning uninitialized data. The s5m8767_pmic_probe() function is then changed accordingly to fail on a read error, as all the other callers of s5m8767_get_register() already do. In practice this probably cannot happen, as we don't call s5m8767_get_register() with invalid arguments, but the gcc warning seems valid in principle, in terms writing safe error checking. Signed-off-by: Arnd Bergmann Fixes: 9c4c60554acf ("regulator: s5m8767: Convert to use regulator_[enable|disable|is_enabled]_regmap") Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/s5m8767.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c index 58f5d3b8e981..27343e1c43ef 100644 --- a/drivers/regulator/s5m8767.c +++ b/drivers/regulator/s5m8767.c @@ -202,9 +202,10 @@ static int s5m8767_get_register(struct s5m8767_info *s5m8767, int reg_id, } } - if (i < s5m8767->num_regulators) - *enable_ctrl = - s5m8767_opmode_reg[reg_id][mode] << S5M8767_ENCTRL_SHIFT; + if (i >= s5m8767->num_regulators) + return -EINVAL; + + *enable_ctrl = s5m8767_opmode_reg[reg_id][mode] << S5M8767_ENCTRL_SHIFT; return 0; } @@ -937,8 +938,12 @@ static int s5m8767_pmic_probe(struct platform_device *pdev) else regulators[id].vsel_mask = 0xff; - s5m8767_get_register(s5m8767, id, &enable_reg, + ret = s5m8767_get_register(s5m8767, id, &enable_reg, &enable_val); + if (ret) { + dev_err(s5m8767->dev, "error reading registers\n"); + return ret; + } regulators[id].enable_reg = enable_reg; regulators[id].enable_mask = S5M8767_ENCTRL_MASK; regulators[id].enable_val = enable_val; From aea6995abbe4e13299d8606679cfe1b92fa45932 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 Mar 2016 14:53:29 -0700 Subject: [PATCH 567/797] paride: make 'verbose' parameter an 'int' again commit dec63a4dec2d6d01346fd5d96062e67c0636852b upstream. gcc-6.0 found an ancient bug in the paride driver, which had a "module_param(verbose, bool, 0);" since before 2.6.12, but actually uses it to accept '0', '1' or '2' as arguments: drivers/block/paride/pd.c: In function 'pd_init_dev_parms': drivers/block/paride/pd.c:298:29: warning: comparison of constant '1' with boolean expression is always false [-Wbool-compare] #define DBMSG(msg) ((verbose>1)?(msg):NULL) In 2012, Rusty did a cleanup patch that also changed the type of the variable to 'bool', which introduced what is now a gcc warning. This changes the type back to 'int' and adapts the module_param() line instead, so it should work as documented in case anyone ever cares about running the ancient driver with debugging. Fixes: 90ab5ee94171 ("module_param: make bool parameters really bool (drivers & misc)") Signed-off-by: Arnd Bergmann Rusty Russell Cc: Tim Waugh Cc: Sudip Mukherjee Cc: Jens Axboe Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/paride/pd.c | 4 ++-- drivers/block/paride/pt.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 562b5a4ca7b7..78a39f736c64 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -126,7 +126,7 @@ */ #include -static bool verbose = 0; +static int verbose = 0; static int major = PD_MAJOR; static char *name = PD_NAME; static int cluster = 64; @@ -161,7 +161,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV}; static DEFINE_MUTEX(pd_mutex); static DEFINE_SPINLOCK(pd_lock); -module_param(verbose, bool, 0); +module_param(verbose, int, 0); module_param(major, int, 0); module_param(name, charp, 0); module_param(cluster, int, 0); diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 1740d75e8a32..216a94fed5b4 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -117,7 +117,7 @@ */ -static bool verbose = 0; +static int verbose = 0; static int major = PT_MAJOR; static char *name = PT_NAME; static int disable = 0; @@ -152,7 +152,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3}; #include -module_param(verbose, bool, 0); +module_param(verbose, int, 0); module_param(major, int, 0); module_param(name, charp, 0); module_param_array(drive0, int, NULL, 0); From 9d9fefc8283a2bda6c7daeaab3b310965cb35f85 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Jan 2016 16:57:23 +0100 Subject: [PATCH 568/797] scsi_dh: force modular build if SCSI is a module commit 0c994c03c926d26ce48e6bbabbbe60366044fcae upstream. When the scsi_dh core was moved into the scsi core module, CONFIG_SCSI_DH became a 'bool' option, and now anything depending on it can be built-in even when CONFIG_SCSI=m. This of course cannot link successfully: drivers/scsi/built-in.o: In function `rdac_init': scsi_dh_alua.c:(.init.text+0x14): undefined reference to `scsi_register_device_handler' scsi_dh_alua.c:(.init.text+0x64): undefined reference to `scsi_unregister_device_handler' drivers/scsi/built-in.o: In function `alua_init': scsi_dh_alua.c:(.init.text+0xb0): undefined reference to `scsi_register_device_handler' As a workaround, this adds an extra dependency on CONFIG_SCSI, so Kconfig can figure out whether built-in is allowed or not. Signed-off-by: Arnd Bergmann Fixes: 086b91d052eb ("scsi_dh: integrate into the core SCSI code") Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/device_handler/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/device_handler/Kconfig b/drivers/scsi/device_handler/Kconfig index e5647d59224f..0b331c9c0a8f 100644 --- a/drivers/scsi/device_handler/Kconfig +++ b/drivers/scsi/device_handler/Kconfig @@ -13,13 +13,13 @@ menuconfig SCSI_DH config SCSI_DH_RDAC tristate "LSI RDAC Device Handler" - depends on SCSI_DH + depends on SCSI_DH && SCSI help If you have a LSI RDAC select y. Otherwise, say N. config SCSI_DH_HP_SW tristate "HP/COMPAQ MSA Device Handler" - depends on SCSI_DH + depends on SCSI_DH && SCSI help If you have a HP/COMPAQ MSA device that requires START_STOP to be sent to start it and cannot upgrade the firmware then select y. @@ -27,13 +27,13 @@ config SCSI_DH_HP_SW config SCSI_DH_EMC tristate "EMC CLARiiON Device Handler" - depends on SCSI_DH + depends on SCSI_DH && SCSI help If you have a EMC CLARiiON select y. Otherwise, say N. config SCSI_DH_ALUA tristate "SPC-3 ALUA Device Handler" - depends on SCSI_DH + depends on SCSI_DH && SCSI help SCSI Device handler for generic SPC-3 Asymmetric Logical Unit Access (ALUA). From 0658e8c5e8cc02f12f9ae3df1f3b87ee7283bb24 Mon Sep 17 00:00:00 2001 From: Sushaanth Srirangapathi Date: Mon, 29 Feb 2016 18:42:19 +0530 Subject: [PATCH 569/797] fbdev: da8xx-fb: fix videomodes of lcd panels commit 713fced8d10fa1c759c8fb6bf9aaa681bae68cad upstream. Commit 028cd86b794f4a ("video: da8xx-fb: fix the polarities of the hsync/vsync pulse") fixes polarities of HSYNC/VSYNC pulse but forgot to update known_lcd_panels[] which had sync values according to old logic. This breaks LCD at least on DA850 EVM. This patch fixes this issue and I have tested this for panel "Sharp_LK043T1DG01" using DA850 EVM board. Fixes: 028cd86b794f4a ("video: da8xx-fb: fix the polarities of the hsync/vsync pulse") Signed-off-by: Sushaanth Srirangapathi Signed-off-by: Tomi Valkeinen Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/da8xx-fb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c index 0081725c6b5b..d00510029c93 100644 --- a/drivers/video/fbdev/da8xx-fb.c +++ b/drivers/video/fbdev/da8xx-fb.c @@ -209,8 +209,7 @@ static struct fb_videomode known_lcd_panels[] = { .lower_margin = 2, .hsync_len = 0, .vsync_len = 0, - .sync = FB_SYNC_CLK_INVERT | - FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .sync = FB_SYNC_CLK_INVERT, }, /* Sharp LK043T1DG01 */ [1] = { @@ -224,7 +223,7 @@ static struct fb_videomode known_lcd_panels[] = { .lower_margin = 2, .hsync_len = 41, .vsync_len = 10, - .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .sync = 0, .flag = 0, }, [2] = { @@ -239,7 +238,7 @@ static struct fb_videomode known_lcd_panels[] = { .lower_margin = 10, .hsync_len = 10, .vsync_len = 10, - .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .sync = 0, .flag = 0, }, [3] = { From 81b3a56ed84b0f2c1e2ff75ee2e05d5d4cd2462b Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Wed, 17 Feb 2016 14:46:59 +0100 Subject: [PATCH 570/797] lib/mpi: Endianness fix commit 3ee0cb5fb5eea2110db1b5cb7f67029b7be8a376 upstream. The limbs are integers in the host endianness, so we can't simply iterate over the individual bytes. The current code happens to work on little-endian, because the order of the limbs in the MPI array is the same as the order of the bytes in each limb, but it breaks on big-endian. Fixes: 0f74fbf77d45 ("MPI: Fix mpi_read_buffer") Signed-off-by: Michal Marek Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- lib/mpi/mpicoder.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 3db76b8c1115..e00ff00e861c 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -128,6 +128,23 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) } EXPORT_SYMBOL_GPL(mpi_read_from_buffer); +static int count_lzeros(MPI a) +{ + mpi_limb_t alimb; + int i, lzeros = 0; + + for (i = a->nlimbs - 1; i >= 0; i--) { + alimb = a->d[i]; + if (alimb == 0) { + lzeros += sizeof(mpi_limb_t); + } else { + lzeros += count_leading_zeros(alimb) / 8; + break; + } + } + return lzeros; +} + /** * mpi_read_buffer() - read MPI to a bufer provided by user (msb first) * @@ -146,7 +163,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, uint8_t *p; mpi_limb_t alimb; unsigned int n = mpi_get_size(a); - int i, lzeros = 0; + int i, lzeros; if (buf_len < n || !buf || !nbytes) return -EINVAL; @@ -154,14 +171,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, if (sign) *sign = a->sign; - p = (void *)&a->d[a->nlimbs] - 1; - - for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) { - if (!*p) - lzeros++; - else - break; - } + lzeros = count_lzeros(a); p = buf; *nbytes = n - lzeros; @@ -343,7 +353,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes, u8 *p, *p2; mpi_limb_t alimb, alimb2; unsigned int n = mpi_get_size(a); - int i, x, y = 0, lzeros = 0, buf_len; + int i, x, y = 0, lzeros, buf_len; if (!nbytes || *nbytes < n) return -EINVAL; @@ -351,14 +361,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes, if (sign) *sign = a->sign; - p = (void *)&a->d[a->nlimbs] - 1; - - for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) { - if (!*p) - lzeros++; - else - break; - } + lzeros = count_lzeros(a); *nbytes = n - lzeros; buf_len = sgl->length; From 01c8261c5ec46183e14dec7df335ee88bb037e30 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 14 Dec 2015 14:29:23 +0000 Subject: [PATCH 571/797] misc/bmp085: Enable building as a module commit 50e6315dba721cbc24ccd6d7b299f1782f210a98 upstream. Commit 985087dbcb02 'misc: add support for bmp18x chips to the bmp085 driver' changed the BMP085 config symbol to a boolean. I see no reason why the shared code cannot be built as a module, so change it back to tristate. Fixes: 985087dbcb02 ("misc: add support for bmp18x chips to the bmp085 driver") Cc: Eric Andersson Signed-off-by: Ben Hutchings Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 22892c701c63..4bf7d50b1bc7 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -439,7 +439,7 @@ config ARM_CHARLCD still useful. config BMP085 - bool + tristate depends on SYSFS config BMP085_I2C From 4f8e29e7547be52fa24b0cdc7cf69baac9d82328 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Oct 2015 14:19:01 +0300 Subject: [PATCH 572/797] misc: mic/scif: fix wrap around tests commit 7b64dbf849abdd7e769820e25120758f956a7f13 upstream. Signed integer overflow is undefined. Also I added a check for "(offset < 0)" in scif_unregister() because that makes it match the other conditions and because I didn't want to subtract a negative. Fixes: ba612aa8b487 ('misc: mic: SCIF memory registration and unregistration') Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mic/scif/scif_rma.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index 8310b4dbff06..6a451bd65bf3 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -1511,7 +1511,7 @@ off_t scif_register_pinned_pages(scif_epd_t epd, if ((map_flags & SCIF_MAP_FIXED) && ((ALIGN(offset, PAGE_SIZE) != offset) || (offset < 0) || - (offset + (off_t)len < offset))) + (len > LONG_MAX - offset))) return -EINVAL; might_sleep(); @@ -1614,7 +1614,7 @@ off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, if ((map_flags & SCIF_MAP_FIXED) && ((ALIGN(offset, PAGE_SIZE) != offset) || (offset < 0) || - (offset + (off_t)len < offset))) + (len > LONG_MAX - offset))) return -EINVAL; /* Unsupported protection requested */ @@ -1732,7 +1732,8 @@ scif_unregister(scif_epd_t epd, off_t offset, size_t len) /* Offset is not page aligned or offset+len wraps around */ if ((ALIGN(offset, PAGE_SIZE) != offset) || - (offset + (off_t)len < offset)) + (offset < 0) || + (len > LONG_MAX - offset)) return -EINVAL; err = scif_verify_epd(ep); From dabe14168a929839b4757f496ad6886489078997 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 15 Feb 2016 10:21:53 +0530 Subject: [PATCH 573/797] PM / OPP: Initialize u_volt_min/max to a valid value commit c88c395f4a6485f23f81e385c79945d68bcd5c5d upstream. We kept u_volt_min/max initialized to 0, when only the target voltage is present in DT, instead of the target/min/max triplet. This didn't go well with the regulator framework, as on few calls the min voltage was set to target and max was set to 0 and so resulted in a kernel crash like below: kernel BUG at ../drivers/regulator/core.c:216! [] (regulator_check_voltage) from [] (regulator_set_voltage_unlocked+0x58/0x230) [] (regulator_set_voltage_unlocked) from [] (regulator_set_voltage+0x28/0x54) [] (regulator_set_voltage) from [] (_set_opp_voltage+0x30/0x98) [] (_set_opp_voltage) from [] (dev_pm_opp_set_rate+0xf0/0x28c) [] (dev_pm_opp_set_rate) from [] (__cpufreq_driver_target+0x184/0x2b4) [] (__cpufreq_driver_target) from [] (dbs_check_cpu+0x1b0/0x1f4) [] (dbs_check_cpu) from [] (cpufreq_governor_dbs+0x324/0x5c4) [] (cpufreq_governor_dbs) from [] (__cpufreq_governor+0xe4/0x1ec) [] (__cpufreq_governor) from [] (cpufreq_init_policy+0x64/0x8c) [] (cpufreq_init_policy) from [] (cpufreq_online+0x2fc/0x708) [] (cpufreq_online) from [] (subsys_interface_register+0x94/0xd8) [] (subsys_interface_register) from [] (cpufreq_register_driver+0x14c/0x19c) [] (cpufreq_register_driver) from [] (dt_cpufreq_probe+0x70/0xec) [] (dt_cpufreq_probe) from [] (platform_drv_probe+0x4c/0xb0) [] (platform_drv_probe) from [] (driver_probe_device+0x214/0x2c0) [] (driver_probe_device) from [] (__driver_attach+0x8c/0x90) [] (__driver_attach) from [] (bus_for_each_dev+0x68/0x9c) [] (bus_for_each_dev) from [] (bus_add_driver+0x1a0/0x218) [] (bus_add_driver) from [] (driver_register+0x78/0xf8) [] (driver_register) from [] (do_one_initcall+0x90/0x1d8) [] (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1fc) [] (kernel_init_freeable) from [] (kernel_init+0x8/0xf0) [] (kernel_init) from [] (ret_from_fork+0x14/0x3c) Code: e1550004 baffffeb e3a00000 e8bd8070 (e7f001f2) Fix that by initializing u_volt_min/max to the target voltage in such cases. Reported-and-tested-by: Krzysztof Kozlowski Fixes: 274659029c9d (PM / OPP: Add support to parse "operating-points-v2" bindings) Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/opp/core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index b8e76f75073b..f8580900c273 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -809,8 +809,14 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev) } opp->u_volt = microvolt[0]; - opp->u_volt_min = microvolt[1]; - opp->u_volt_max = microvolt[2]; + + if (count == 1) { + opp->u_volt_min = opp->u_volt; + opp->u_volt_max = opp->u_volt; + } else { + opp->u_volt_min = microvolt[1]; + opp->u_volt_max = microvolt[2]; + } if (!of_property_read_u32(opp->np, "opp-microamp", &val)) opp->u_amp = val; From cab4c949ade11ac67f69bc05124c9d6ffef31917 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 4 Mar 2016 10:55:14 +0000 Subject: [PATCH 574/797] PM / Domains: Fix removal of a subdomain commit beda5fc1ff9b527059290a97b672d2ee0eb7b92f upstream. Commit 30e7a65b3fdb (PM / Domains: Ensure subdomain is not in use before removing) added a test to ensure that a subdomain is not a master to another subdomain or if any devices are using the subdomain before removing. This change incorrectly used the "slave_links" list to determine if the subdomain is a master to another subdomain, where it should have been using the "master_links" list instead. The "slave_links" list will never be empty for a subdomain and so a subdomain can never be removed. Fix this by testing if the "master_links" list is empty instead. Fixes: 30e7a65b3fdb (PM / Domains: Ensure subdomain is not in use before removing) Signed-off-by: Jon Hunter Reviewed-by: Thierry Reding Acked-by: Ulf Hansson Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 65f50eccd49b..a48824deabc5 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1381,7 +1381,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, mutex_lock(&genpd->lock); - if (!list_empty(&subdomain->slave_links) || subdomain->device_count) { + if (!list_empty(&subdomain->master_links) || subdomain->device_count) { pr_warn("%s: unable to remove subdomain %s\n", genpd->name, subdomain->name); ret = -EBUSY; From 1392ec2a303a53512ad16dcf1ab31e77b08c52d9 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Sun, 6 Mar 2016 12:43:57 +0300 Subject: [PATCH 575/797] rtc: hym8563: fix invalid year calculation commit d5861262210067fc01b2fb4f7af2fd85a3453f15 upstream. Year field must be in BCD format, according to hym8563 datasheet. Due to the bug year 2016 became 2010. Fixes: dcaf03849352 ("rtc: add hym8563 rtc-driver") Signed-off-by: Alexander Kochetkov Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-hym8563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index 097325d96db5..b1b4746a0eab 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -144,7 +144,7 @@ static int hym8563_rtc_set_time(struct device *dev, struct rtc_time *tm) * it does not seem to carry it over a subsequent write/read. * So we'll limit ourself to 100 years, starting at 2000 for now. */ - buf[6] = tm->tm_year - 100; + buf[6] = bin2bcd(tm->tm_year - 100); /* * CTL1 only contains TEST-mode bits apart from stop, From 041f2ca3ff039ebe10a48a775b29bf3fa7c993fa Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 1 Mar 2016 09:50:01 +0100 Subject: [PATCH 576/797] rtc: vr41xx: Wire up alarm_irq_enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a25f4a95ec3cded34c1250364eba704c5e4fdac4 upstream. drivers/rtc/rtc-vr41xx.c:229: warning: ‘vr41xx_rtc_alarm_irq_enable’ defined but not used Apparently the conversion to alarm_irq_enable forgot to wire up the callback. Fixes: 16380c153a69c378 ("RTC: Convert rtc drivers to use the alarm_irq_enable method") Signed-off-by: Geert Uytterhoeven Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-vr41xx.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c index f64c282275b3..e1b86bb01062 100644 --- a/drivers/rtc/rtc-vr41xx.c +++ b/drivers/rtc/rtc-vr41xx.c @@ -272,12 +272,13 @@ static irqreturn_t rtclong1_interrupt(int irq, void *dev_id) } static const struct rtc_class_ops vr41xx_rtc_ops = { - .release = vr41xx_rtc_release, - .ioctl = vr41xx_rtc_ioctl, - .read_time = vr41xx_rtc_read_time, - .set_time = vr41xx_rtc_set_time, - .read_alarm = vr41xx_rtc_read_alarm, - .set_alarm = vr41xx_rtc_set_alarm, + .release = vr41xx_rtc_release, + .ioctl = vr41xx_rtc_ioctl, + .read_time = vr41xx_rtc_read_time, + .set_time = vr41xx_rtc_set_time, + .read_alarm = vr41xx_rtc_read_alarm, + .set_alarm = vr41xx_rtc_set_alarm, + .alarm_irq_enable = vr41xx_rtc_alarm_irq_enable, }; static int rtc_probe(struct platform_device *pdev) From 83fe55baa881f1d7fed118b4f4ec3bf325d7285a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Mar 2016 13:07:45 +0300 Subject: [PATCH 577/797] rtc: ds1685: passing bogus values to irq_restore commit 8c09b9fdecab1f4a289f07b46e2ad174b6641928 upstream. We call spin_lock_irqrestore with "flags" set to zero instead of to the value from spin_lock_irqsave(). Fixes: aaaf5fbf56f1 ('rtc: add driver for DS1685 family of real time clocks') Signed-off-by: Dan Carpenter Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-ds1685.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index 05a51ef52703..d5c1b057a739 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -187,9 +187,9 @@ ds1685_rtc_end_data_access(struct ds1685_priv *rtc) * Only use this where you are certain another lock will not be held. */ static inline void -ds1685_rtc_begin_ctrl_access(struct ds1685_priv *rtc, unsigned long flags) +ds1685_rtc_begin_ctrl_access(struct ds1685_priv *rtc, unsigned long *flags) { - spin_lock_irqsave(&rtc->lock, flags); + spin_lock_irqsave(&rtc->lock, *flags); ds1685_rtc_switch_to_bank1(rtc); } @@ -1304,7 +1304,7 @@ ds1685_rtc_sysfs_ctrl_regs_store(struct device *dev, { struct ds1685_priv *rtc = dev_get_drvdata(dev); u8 reg = 0, bit = 0, tmp; - unsigned long flags = 0; + unsigned long flags; long int val = 0; const struct ds1685_rtc_ctrl_regs *reg_info = ds1685_rtc_sysfs_ctrl_regs_lookup(attr->attr.name); @@ -1325,7 +1325,7 @@ ds1685_rtc_sysfs_ctrl_regs_store(struct device *dev, bit = reg_info->bit; /* Safe to spinlock during a write. */ - ds1685_rtc_begin_ctrl_access(rtc, flags); + ds1685_rtc_begin_ctrl_access(rtc, &flags); tmp = rtc->read(rtc, reg); rtc->write(rtc, reg, (val ? (tmp | bit) : (tmp & ~(bit)))); ds1685_rtc_end_ctrl_access(rtc, flags); From 11dd7f9a1ed13794cc77bd144b3aa9dd17c4030f Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 21 Jan 2016 13:24:21 +0100 Subject: [PATCH 578/797] rtc: rx8025: remove rv8803 id commit aaa3cee5deffa28415a6e1852c5afae0f5d210e2 upstream. The rv8803 has its own driver that should be used. Remove its id from the rx8025 driver. Fixes: b1f9d790b59dc04f8813a49a92ddd8651770ffee Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-rx8025.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index bd911bafb809..17341feadad1 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -65,7 +65,6 @@ static const struct i2c_device_id rx8025_id[] = { { "rx8025", 0 }, - { "rv8803", 1 }, { } }; MODULE_DEVICE_TABLE(i2c, rx8025_id); From f55131145b8d16d942ddb363c3e1b72cf4775384 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 4 Feb 2016 09:26:35 +0900 Subject: [PATCH 579/797] rtc: max77686: Properly handle regmap_irq_get_virq() error code commit fb166ba1d7f0a662f7332f4ff660a0d6f4d76915 upstream. The regmap_irq_get_virq() can return 0 or -EINVAL in error conditions but driver checked only for value of 0. This could lead to a cast of -EINVAL to an unsigned int used as a interrupt number for devm_request_threaded_irq(). Although this is not yet fatal (devm_request_threaded_irq() will just fail with -EINVAL) but might be a misleading when diagnosing errors. Signed-off-by: Krzysztof Kozlowski Fixes: 6f1c1e71d933 ("mfd: max77686: Convert to use regmap_irq") Reviewed-by: Javier Martinez Canillas Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-max77686.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 7184a0eda793..725dccae24e7 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -465,7 +465,7 @@ static int max77686_rtc_probe(struct platform_device *pdev) info->virq = regmap_irq_get_virq(max77686->rtc_irq_data, MAX77686_RTCIRQ_RTCA1); - if (!info->virq) { + if (info->virq <= 0) { ret = -ENXIO; goto err_rtc; } From ab2c82dcd6cdb4d4871b151123f1523f2285a27e Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Mon, 22 Feb 2016 10:20:24 +0100 Subject: [PATCH 580/797] drivers/misc/ad525x_dpot: AD5274 fix RDAC read back errors commit f3df53e4d70b5736368a8fe8aa1bb70c1cb1f577 upstream. Fix RDAC read back errors caused by a typo. Value must shift by 2. Fixes: a4bd394956f2 ("drivers/misc/ad525x_dpot.c: new features") Signed-off-by: Michael Hennerich Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ad525x_dpot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c index 15e88078ba1e..f1a0b99f5a9a 100644 --- a/drivers/misc/ad525x_dpot.c +++ b/drivers/misc/ad525x_dpot.c @@ -216,7 +216,7 @@ static s32 dpot_read_i2c(struct dpot_data *dpot, u8 reg) */ value = swab16(value); - if (dpot->uid == DPOT_UID(AD5271_ID)) + if (dpot->uid == DPOT_UID(AD5274_ID)) value = value >> 2; return value; default: From 36828721fbbe4b53a53d62847b476f314123c819 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Feb 2016 10:57:19 -0300 Subject: [PATCH 581/797] perf evlist: Reference count the cpu and thread maps at set_maps() commit a55e5663761366fb883f6f25375dd68bc958b9db upstream. We were dropping the reference we possibly held but not obtaining one for the new maps, which we will drop at perf_evlist__delete(), fix it. This was caught by Steven Noonan in some of the machines which would produce this output when caught by glibc debug mechanisms: $ sudo perf test 21 21: Test object code reading :*** Error in `perf': corrupted double-linked list: 0x00000000023ffcd0 *** ======= Backtrace: ========= /usr/lib/libc.so.6(+0x72055)[0x7f25be0f3055] /usr/lib/libc.so.6(+0x779b6)[0x7f25be0f89b6] /usr/lib/libc.so.6(+0x7a0ed)[0x7f25be0fb0ed] /usr/lib/libc.so.6(__libc_calloc+0xba)[0x7f25be0fceda] perf(parse_events_lex_init_extra+0x38)[0x4cfff8] perf(parse_events+0x55)[0x4a0615] perf(perf_evlist__config+0xcf)[0x4eeb2f] perf[0x479f82] perf(test__code_reading+0x1e)[0x47ad4e] perf(cmd_test+0x5dd)[0x46452d] perf[0x47f4e3] perf(main+0x603)[0x42c723] /usr/lib/libc.so.6(__libc_start_main+0xf0)[0x7f25be0a1610] perf(_start+0x29)[0x42c859] Further investigation using valgrind led to the reference count imbalance fixed in this patch. Reported-and-Tested-by: Steven Noonan Report-Link: http://lkml.kernel.org/r/CAKbGBLjC2Dx5vshxyGmQkcD+VwiAQLbHoXA9i7kvRB2-2opHZQ@mail.gmail.com Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: f30a79b012e5 ("perf tools: Add reference counting for cpu_map object") Link: http://lkml.kernel.org/n/tip-j0u1bdhr47sa511sgg76kb8h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/evlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d1392194a9a9..b4b96120fc3b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1211,12 +1211,12 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, */ if (cpus != evlist->cpus) { cpu_map__put(evlist->cpus); - evlist->cpus = cpus; + evlist->cpus = cpu_map__get(cpus); } if (threads != evlist->threads) { thread_map__put(evlist->threads); - evlist->threads = threads; + evlist->threads = thread_map__get(threads); } perf_evlist__propagate_maps(evlist); From 8481fdf6dc13e3a2b3f7e75e414b5eab3771329d Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Thu, 3 Mar 2016 02:03:11 +0100 Subject: [PATCH 582/797] x86/mm/kmmio: Fix mmiotrace for hugepages commit cfa52c0cfa4d727aa3e457bf29aeff296c528a08 upstream. Because Linux might use bigger pages than the 4K pages to handle those mmio ioremaps, the kmmio code shouldn't rely on the pade id as it currently does. Using the memory address instead of the page id lets us look up how big the page is and what its base address is, so that we won't get a page fault within the same page twice anymore. Tested-by: Pierre Moreau Signed-off-by: Karol Herbst Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-mm@kvack.org Cc: linux-x86_64@vger.kernel.org Cc: nouveau@lists.freedesktop.org Cc: pq@iki.fi Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/1456966991-6861-1-git-send-email-nouveau@karolherbst.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kmmio.c | 88 ++++++++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 29 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 637ab34ed632..ddb2244b06a1 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -33,7 +33,7 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; - unsigned long page; /* location of the fault page */ + unsigned long addr; /* the requested address */ pteval_t old_presence; /* page presence prior to arming */ bool armed; @@ -70,9 +70,16 @@ unsigned int kmmio_count; static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static LIST_HEAD(kmmio_probes); -static struct list_head *kmmio_page_list(unsigned long page) +static struct list_head *kmmio_page_list(unsigned long addr) { - return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + + if (!pte) + return NULL; + addr &= page_level_mask(l); + + return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)]; } /* Accessed per-cpu */ @@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) } /* You must be holding RCU read lock. */ -static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) { struct list_head *head; struct kmmio_fault_page *f; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); - page &= PAGE_MASK; - head = kmmio_page_list(page); + if (!pte) + return NULL; + addr &= page_level_mask(l); + head = kmmio_page_list(addr); list_for_each_entry_rcu(f, head, list) { - if (f->page == page) + if (f->addr == addr) return f; } return NULL; @@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) static int clear_page_presence(struct kmmio_fault_page *f, bool clear) { unsigned int level; - pte_t *pte = lookup_address(f->page, &level); + pte_t *pte = lookup_address(f->addr, &level); if (!pte) { - pr_err("no pte for page 0x%08lx\n", f->page); + pr_err("no pte for addr 0x%08lx\n", f->addr); return -1; } @@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) return -1; } - __flush_tlb_one(f->page); + __flush_tlb_one(f->addr); return 0; } @@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) int ret; WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); if (f->armed) { - pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, !!f->old_presence); + pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n", + f->addr, f->count, !!f->old_presence); } ret = clear_page_presence(f, true); - WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), - f->page); + WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"), + f->addr); f->armed = true; return ret; } @@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { int ret = clear_page_presence(f, false); WARN_ONCE(ret < 0, - KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); + KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr); f->armed = false; } @@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) struct kmmio_context *ctx; struct kmmio_fault_page *faultpage; int ret = 0; /* default to fault not handled */ + unsigned long page_base = addr; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + if (!pte) + return -EINVAL; + page_base &= page_level_mask(l); /* * Preemption is now disabled to prevent process switch during @@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) preempt_disable(); rcu_read_lock(); - faultpage = get_kmmio_fault_page(addr); + faultpage = get_kmmio_fault_page(page_base); if (!faultpage) { /* * Either this page fault is not caused by kmmio, or @@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - if (addr == ctx->addr) { + if (page_base == ctx->addr) { /* * A second fault on the same page means some other * condition needs handling by do_page_fault(), the @@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx->active++; ctx->fpage = faultpage; - ctx->probe = get_kmmio_probe(addr); + ctx->probe = get_kmmio_probe(page_base); ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - ctx->addr = addr; + ctx->addr = page_base; if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); @@ -354,12 +371,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) } /* You must be holding kmmio_lock. */ -static int add_kmmio_fault_page(unsigned long page) +static int add_kmmio_fault_page(unsigned long addr) { struct kmmio_fault_page *f; - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); + f = get_kmmio_fault_page(addr); if (f) { if (!f->count) arm_kmmio_fault_page(f); @@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page) return -1; f->count = 1; - f->page = page; + f->addr = addr; if (arm_kmmio_fault_page(f)) { kfree(f); return -1; } - list_add_rcu(&f->list, kmmio_page_list(f->page)); + list_add_rcu(&f->list, kmmio_page_list(f->addr)); return 0; } /* You must be holding kmmio_lock. */ -static void release_kmmio_fault_page(unsigned long page, +static void release_kmmio_fault_page(unsigned long addr, struct kmmio_fault_page **release_list) { struct kmmio_fault_page *f; - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); + f = get_kmmio_fault_page(addr); if (!f) return; @@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p) int ret = 0; unsigned long size = 0; const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); + unsigned int l; + pte_t *pte; spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { ret = -EEXIST; goto out; } + + pte = lookup_address(p->addr, &l); + if (!pte) { + ret = -EINVAL; + goto out; + } + kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) pr_err("Unable to set page fault.\n"); - size += PAGE_SIZE; + size += page_level_size(l); } out: spin_unlock_irqrestore(&kmmio_lock, flags); @@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p) const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; + unsigned int l; + pte_t *pte; + + pte = lookup_address(p->addr, &l); + if (!pte) + return; spin_lock_irqsave(&kmmio_lock, flags); while (size < size_lim) { release_kmmio_fault_page(p->addr + size, &release_list); - size += PAGE_SIZE; + size += page_level_size(l); } list_del_rcu(&p->list); kmmio_count--; From c745297ba18668f8a760493d7d769563c818616e Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Sat, 12 Mar 2016 21:40:32 -0500 Subject: [PATCH 583/797] ext4: fix NULL pointer dereference in ext4_mark_inode_dirty() commit 5e1021f2b6dff1a86a468a1424d59faae2bc63c1 upstream. ext4_reserve_inode_write() in ext4_mark_inode_dirty() could fail on error (e.g. EIO) and iloc.bh can be NULL in this case. But the error is ignored in the following "if" condition and ext4_expand_extra_isize() might be called with NULL iloc.bh set, which triggers NULL pointer dereference. This is uncovered by commit 8b4953e13f4c ("ext4: reserve code points for the project quota feature"), which enlarges the ext4_inode size, and run the following script on new kernel but with old mke2fs: #/bin/bash mnt=/mnt/ext4 devname=ext4-error dev=/dev/mapper/$devname fsimg=/home/fs.img trap cleanup 0 1 2 3 9 15 cleanup() { umount $mnt >/dev/null 2>&1 dmsetup remove $devname losetup -d $backend_dev rm -f $fsimg exit 0 } rm -f $fsimg fallocate -l 1g $fsimg backend_dev=`losetup -f --show $fsimg` devsize=`blockdev --getsz $backend_dev` good_tab="0 $devsize linear $backend_dev 0" error_tab="0 $devsize error $backend_dev 0" dmsetup create $devname --table "$good_tab" mkfs -t ext4 $dev mount -t ext4 -o errors=continue,strictatime $dev $mnt dmsetup load $devname --table "$error_tab" && dmsetup resume $devname echo 3 > /proc/sys/vm/drop_caches ls -l $mnt exit 0 [ Patch changed to simplify the function a tiny bit. -- Ted ] Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Cc: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 06bda0361e7c..547600556bb9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5109,6 +5109,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) might_sleep(); trace_ext4_mark_inode_dirty(inode, _RET_IP_); err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; if (ext4_handle_valid(handle) && EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { @@ -5139,9 +5141,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) } } } - if (!err) - err = ext4_mark_iloc_dirty(handle, inode, &iloc); - return err; + return ext4_mark_iloc_dirty(handle, inode, &iloc); } /* From 447ea0a34b78213dd668bf7d0a2b7add1c5675e6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 5 Jan 2016 19:36:37 +0100 Subject: [PATCH 584/797] serial: sh-sci: Remove cpufreq notifier to fix crash/deadlock commit ff1cab374ad98f4b9f408525ca9c08992b4ed784 upstream. The BSP team noticed that there is spin/mutex lock issue on sh-sci when CPUFREQ is used. The issue is that the notifier function may call mutex_lock() while the spinlock is held, which can lead to a BUG(). This may happen if CPUFREQ is changed while another CPU calls clk_get_rate(). Taking the spinlock was added to the notifier function in commit e552de2413edad1a ("sh-sci: add platform device private data"), to protect the list of serial ports against modification during traversal. At that time the Common Clock Framework didn't exist yet, and clk_get_rate() just returned clk->rate without taking a mutex. Note that since commit d535a2305facf9b4 ("serial: sh-sci: Require a device per port mapping."), there's no longer a list of serial ports to traverse, and taking the spinlock became superfluous. To fix the issue, just remove the cpufreq notifier: 1. The notifier doesn't work correctly: all it does is update stored clock rates; it does not update the divider in the hardware. The divider will only be updated when calling sci_set_termios(). I believe this was broken back in 2004, when the old drivers/char/sh-sci.c driver (where the notifier did update the divider) was replaced by drivers/serial/sh-sci.c (where the notifier just updated port->uartclk). Cfr. full-history-linux commits 6f8deaef2e9675d9 ("[PATCH] sh: port sh-sci driver to the new API") and 3f73fe878dc9210a ("[PATCH] Remove old sh-sci driver"). 2. On modern SoCs, the sh-sci parent clock rate is no longer related to the CPU clock rate anyway, so using a cpufreq notifier is futile. Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 39 ------------------------------------- 1 file changed, 39 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 51c7507b0444..63a06ab6ba03 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -116,8 +115,6 @@ struct sci_port { struct timer_list rx_timer; unsigned int rx_timeout; #endif - - struct notifier_block freq_transition; }; #define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS @@ -1606,29 +1603,6 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr) return ret; } -/* - * Here we define a transition notifier so that we can update all of our - * ports' baud rate when the peripheral clock changes. - */ -static int sci_notifier(struct notifier_block *self, - unsigned long phase, void *p) -{ - struct sci_port *sci_port; - unsigned long flags; - - sci_port = container_of(self, struct sci_port, freq_transition); - - if (phase == CPUFREQ_POSTCHANGE) { - struct uart_port *port = &sci_port->port; - - spin_lock_irqsave(&port->lock, flags); - port->uartclk = clk_get_rate(sci_port->iclk); - spin_unlock_irqrestore(&port->lock, flags); - } - - return NOTIFY_OK; -} - static const struct sci_irq_desc { const char *desc; irq_handler_t handler; @@ -2559,9 +2533,6 @@ static int sci_remove(struct platform_device *dev) { struct sci_port *port = platform_get_drvdata(dev); - cpufreq_unregister_notifier(&port->freq_transition, - CPUFREQ_TRANSITION_NOTIFIER); - uart_remove_one_port(&sci_uart_driver, &port->port); sci_cleanup_single(port); @@ -2714,16 +2685,6 @@ static int sci_probe(struct platform_device *dev) if (ret) return ret; - sp->freq_transition.notifier_call = sci_notifier; - - ret = cpufreq_register_notifier(&sp->freq_transition, - CPUFREQ_TRANSITION_NOTIFIER); - if (unlikely(ret < 0)) { - uart_remove_one_port(&sci_uart_driver, &sp->port); - sci_cleanup_single(sp); - return ret; - } - #ifdef CONFIG_SH_STANDARD_BIOS sh_bios_gdb_detach(); #endif From 87261de30fd8e5ebd441cd2f05df73ddf04c2af2 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Wed, 3 Feb 2016 14:26:46 +0100 Subject: [PATCH 585/797] mtd: spi-nor: remove micron_quad_enable() commit 3b5394a3ccffbfa1d1d448d48742853a862822c4 upstream. This patch remove the micron_quad_enable() function which force the Quad SPI mode. However, once this mode is enabled, the Micron memory expect ALL commands to use the SPI 4-4-4 protocol. Hence a failure does occur when calling spi_nor_wait_till_ready() right after the update of the Enhanced Volatile Configuration Register (EVCR) in the micron_quad_enable() as the SPI controller driver is not aware about the protocol change. Since there is almost no performance increase using Fast Read 4-4-4 commands instead of Fast Read 1-1-4 commands, we rather keep on using the Extended SPI mode than enabling the Quad SPI mode. Let's take the example of the pretty standard use of 8 dummy cycles during Fast Read operations on 64KB erase sectors: Fast Read 1-1-4 requires 8 cycles for the command, then 24 cycles for the 3byte address followed by 8 dummy clock cycles and finally 65536*2 cycles for the read data; so 131112 clock cycles. On the other hand the Fast Read 4-4-4 would require 2 cycles for the command, then 6 cycles for the 3byte address followed by 8 dummy clock cycles and finally 65536*2 cycles for the read data. So 131088 clock cycles. The theorical bandwidth increase is 0.0%. Now using Fast Read operations on 512byte pages: Fast Read 1-1-4 needs 8+24+8+(512*2) = 1064 clock cycles whereas Fast Read 4-4-4 would requires 2+6+8+(512*2) = 1040 clock cycles. Hence the theorical bandwidth increase is 2.3%. Consecutive reads for non sequential pages is not a relevant use case so The Quad SPI mode is not worth it. mtd_speedtest seems to confirm these figures. Signed-off-by: Cyrille Pitchen Fixes: 548cd3ab54da ("mtd: spi-nor: Add quad I/O support for Micron SPI NOR") Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/spi-nor.c | 46 +---------------------------------- 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 32477c4eb421..37e4135ab213 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1067,45 +1067,6 @@ static int spansion_quad_enable(struct spi_nor *nor) return 0; } -static int micron_quad_enable(struct spi_nor *nor) -{ - int ret; - u8 val; - - ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1); - if (ret < 0) { - dev_err(nor->dev, "error %d reading EVCR\n", ret); - return ret; - } - - write_enable(nor); - - /* set EVCR, enable quad I/O */ - nor->cmd_buf[0] = val & ~EVCR_QUAD_EN_MICRON; - ret = nor->write_reg(nor, SPINOR_OP_WD_EVCR, nor->cmd_buf, 1); - if (ret < 0) { - dev_err(nor->dev, "error while writing EVCR register\n"); - return ret; - } - - ret = spi_nor_wait_till_ready(nor); - if (ret) - return ret; - - /* read EVCR and check it */ - ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1); - if (ret < 0) { - dev_err(nor->dev, "error %d reading EVCR\n", ret); - return ret; - } - if (val & EVCR_QUAD_EN_MICRON) { - dev_err(nor->dev, "Micron EVCR Quad bit not clear\n"); - return -EINVAL; - } - - return 0; -} - static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info) { int status; @@ -1119,12 +1080,7 @@ static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info) } return status; case SNOR_MFR_MICRON: - status = micron_quad_enable(nor); - if (status) { - dev_err(nor->dev, "Micron quad-read not enabled\n"); - return -EINVAL; - } - return status; + return 0; default: status = spansion_quad_enable(nor); if (status) { From 67891850e58b0190005441cf4f54da957fed8e01 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 24 Feb 2016 16:07:23 -0800 Subject: [PATCH 586/797] mtd: brcmnand: Fix v7.1 register offsets commit d267aefc54a28efc5bda7f009598dc83b5f98734 upstream. The BRCMNAND controller revision 7.1 is almost 100% compatible with the previous v6.0 register offset layout, except for the Correctable Error Reporting Threshold registers. Fix this by adding another table with the correct offsets for CORR_THRESHOLD and CORR_THRESHOLD_EXT. Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller") Signed-off-by: Florian Fainelli Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/brcmnand/brcmnand.c | 34 +++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index 12c6190c6e33..4a07ba1195b5 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -309,6 +309,36 @@ static const u16 brcmnand_regs_v60[] = { [BRCMNAND_FC_BASE] = 0x400, }; +/* BRCMNAND v7.1 */ +static const u16 brcmnand_regs_v71[] = { + [BRCMNAND_CMD_START] = 0x04, + [BRCMNAND_CMD_EXT_ADDRESS] = 0x08, + [BRCMNAND_CMD_ADDRESS] = 0x0c, + [BRCMNAND_INTFC_STATUS] = 0x14, + [BRCMNAND_CS_SELECT] = 0x18, + [BRCMNAND_CS_XOR] = 0x1c, + [BRCMNAND_LL_OP] = 0x20, + [BRCMNAND_CS0_BASE] = 0x50, + [BRCMNAND_CS1_BASE] = 0, + [BRCMNAND_CORR_THRESHOLD] = 0xdc, + [BRCMNAND_CORR_THRESHOLD_EXT] = 0xe0, + [BRCMNAND_UNCORR_COUNT] = 0xfc, + [BRCMNAND_CORR_COUNT] = 0x100, + [BRCMNAND_CORR_EXT_ADDR] = 0x10c, + [BRCMNAND_CORR_ADDR] = 0x110, + [BRCMNAND_UNCORR_EXT_ADDR] = 0x114, + [BRCMNAND_UNCORR_ADDR] = 0x118, + [BRCMNAND_SEMAPHORE] = 0x150, + [BRCMNAND_ID] = 0x194, + [BRCMNAND_ID_EXT] = 0x198, + [BRCMNAND_LL_RDATA] = 0x19c, + [BRCMNAND_OOB_READ_BASE] = 0x200, + [BRCMNAND_OOB_READ_10_BASE] = 0, + [BRCMNAND_OOB_WRITE_BASE] = 0x280, + [BRCMNAND_OOB_WRITE_10_BASE] = 0, + [BRCMNAND_FC_BASE] = 0x400, +}; + enum brcmnand_cs_reg { BRCMNAND_CS_CFG_EXT = 0, BRCMNAND_CS_CFG, @@ -404,7 +434,9 @@ static int brcmnand_revision_init(struct brcmnand_controller *ctrl) } /* Register offsets */ - if (ctrl->nand_version >= 0x0600) + if (ctrl->nand_version >= 0x0701) + ctrl->reg_offsets = brcmnand_regs_v71; + else if (ctrl->nand_version >= 0x0600) ctrl->reg_offsets = brcmnand_regs_v60; else if (ctrl->nand_version >= 0x0500) ctrl->reg_offsets = brcmnand_regs_v50; From 35bfb7949b7f23cdbfda12d83a8038f640b49141 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Garc=C3=ADa?= Date: Fri, 1 Apr 2016 18:29:23 -0300 Subject: [PATCH 587/797] mtd: nand: Drop mtd.owner requirement in nand_scan commit 20c07a5bf094198ff2382aa5e7c930b3c9807792 upstream. Since commit 807f16d4db95 ("mtd: core: set some defaults when dev.parent is set"), it's now legal for drivers to call nand_scan and nand_scan_ident without setting mtd.owner. Drop the check and while at it remove the BUG() abuse. Fixes: 807f16d4db95 ("mtd: core: set some defaults when dev.parent is set") Signed-off-by: Ezequiel Garcia Acked-by: Boris Brezillon [Brian: editorial note - while commit 807f16d4db95 wasn't explicitly broken, some follow-up commits in the v4.4 release broke a few drivers, since they would hit this BUG() if they used nand_scan() and were built as modules] Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/nand_base.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 3ff583f165cd..ce7b2cab5762 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -3979,7 +3979,6 @@ static int nand_dt_init(struct mtd_info *mtd, struct nand_chip *chip, * This is the first phase of the normal nand_scan() function. It reads the * flash ID and sets up MTD fields accordingly. * - * The mtd->owner field must be set to the module of the caller. */ int nand_scan_ident(struct mtd_info *mtd, int maxchips, struct nand_flash_dev *table) @@ -4403,19 +4402,12 @@ EXPORT_SYMBOL(nand_scan_tail); * * This fills out all the uninitialized function pointers with the defaults. * The flash ID is read and the mtd/chip structures are filled with the - * appropriate values. The mtd->owner field must be set to the module of the - * caller. + * appropriate values. */ int nand_scan(struct mtd_info *mtd, int maxchips) { int ret; - /* Many callers got this wrong, so check for it for a while... */ - if (!mtd->owner && caller_is_module()) { - pr_crit("%s called with NULL mtd->owner!\n", __func__); - BUG(); - } - ret = nand_scan_ident(mtd, maxchips, NULL); if (!ret) ret = nand_scan_tail(mtd); From c3173539ec17901391863321e1eaf335b0029a09 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 21 Jan 2016 19:50:09 -0300 Subject: [PATCH 588/797] perf hists browser: Only offer symbol scripting when a symbol is under the cursor commit c221acb0f970d3b80d72c812cda19c121acf5d52 upstream. When this feature was introduced a check was made if there was a resolved symbol under the cursor, it got lost in commit ea7cd5923309 ("perf hists browser: Split popup menu actions - part 2"), reinstate it. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa , Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Fixes: ea7cd5923309 ("perf hists browser: Split popup menu actions - part 2") Link: http://lkml.kernel.org/r/1452960197-5323-9-git-send-email-namhyung@kernel.org [ Carved out from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/ui/browsers/hists.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 81def6c3f24b..3900386a3629 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2059,10 +2059,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, * * See hist_browser__show_entry. */ - nr_options += add_script_opt(browser, - &actions[nr_options], - &options[nr_options], - NULL, browser->selection->sym); + if (sort__has_sym && browser->selection->sym) { + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + NULL, browser->selection->sym); + } } nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], NULL, NULL); From dcfdb38c41385bc3cb7be295eb5b9d4b00ea1177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20=C5=9Alusarz?= Date: Tue, 19 Jan 2016 20:03:03 +0100 Subject: [PATCH 589/797] perf tools: handle spaces in file names obtained from /proc/pid/maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 89fee59b504f86925894fcc9ba79d5c933842f93 upstream. Steam frequently puts game binaries in folders with spaces. Note: "(deleted)" markers are now treated as part of the file name. Signed-off-by: Marcin Ślusarz Acked-by: Namhyung Kim Fixes: 6064803313ba ("perf tools: Use sscanf for parsing /proc/pid/maps") Link: http://lkml.kernel.org/r/20160119190303.GA17579@marcin-Inspiron-7720 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 8b10621b415c..956187bf1a85 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -274,7 +274,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n", &event->mmap2.start, &event->mmap2.len, prot, &event->mmap2.pgoff, &event->mmap2.maj, &event->mmap2.min, From d7b60bafb195dd349821e422b1dbc8b897eeb368 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 7 Mar 2016 16:44:44 -0300 Subject: [PATCH 590/797] perf stat: Document --detailed option commit f594bae08183fb6b57db55387794ece3e1edf6f6 upstream. I'm surprised this remained undocumented since at least 2011. And it is actually a very useful switch, as Steve and I came to realize recently. Add the text from 2cba3ffb9a9d ("perf stat: Add -d -d and -d -d -d options to show more CPU events") which added the incrementing aspect to -d. Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Borislav Petkov Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Davidlohr Bueso Cc: Jiri Olsa Cc: Mel Gorman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Fixes: 2cba3ffb9a9d ("perf stat: Add -d -d and -d -d -d options to show more CPU events") Link: http://lkml.kernel.org/r/1457347294-32546-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/perf/Documentation/perf-stat.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4e074a660826..90c3558c2c12 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -62,6 +62,14 @@ OPTIONS --scale:: scale/normalize counter values +-d:: +--detailed:: + print more detailed statistics, can be specified up to 3 times + + -d: detailed events, L1 and LLC data cache + -d -d: more detailed events, dTLB and iTLB events + -d -d -d: very detailed events, adding prefetch events + -r:: --repeat=:: repeat command and print average + stddev (max: 100). 0 means forever. From 0b680de452570274716c2c9990903acea525f0d0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 7 Dec 2015 14:28:03 -0500 Subject: [PATCH 591/797] ext4: fix races between page faults and hole punching commit ea3d7209ca01da209cda6f0dea8be9cc4b7a933b upstream. Currently, page faults and hole punching are completely unsynchronized. This can result in page fault faulting in a page into a range that we are punching after truncate_pagecache_range() has been called and thus we can end up with a page mapped to disk blocks that will be shortly freed. Filesystem corruption will shortly follow. Note that the same race is avoided for truncate by checking page fault offset against i_size but there isn't similar mechanism available for punching holes. Fix the problem by creating new rw semaphore i_mmap_sem in inode and grab it for writing over truncate, hole punching, and other functions removing blocks from extent tree and for read over page faults. We cannot easily use i_data_sem for this since that ranks below transaction start and we need something ranking above it so that it can be held over the whole truncate / hole punching operation. Also remove various workarounds we had in the code to reduce race window when page fault could have created pages with stale mapping information. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 10 +++++++ fs/ext4/extents.c | 54 ++++++++++++++++++++----------------- fs/ext4/file.c | 66 +++++++++++++++++++++++++++++++++++++++------- fs/ext4/inode.c | 36 ++++++++++++++++++------- fs/ext4/super.c | 1 + fs/ext4/truncate.h | 2 ++ 6 files changed, 127 insertions(+), 42 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d4156e1c128d..89df9f55595b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -933,6 +933,15 @@ struct ext4_inode_info { * by other means, so we have i_data_sem. */ struct rw_semaphore i_data_sem; + /* + * i_mmap_sem is for serializing page faults with truncate / punch hole + * operations. We have to make sure that new page cannot be faulted in + * a section of the inode that is being punched. We cannot easily use + * i_data_sem for this since we need protection for the whole punch + * operation and i_data_sem ranks below transaction start so we have + * to occasionally drop it. + */ + struct rw_semaphore i_mmap_sem; struct inode vfs_inode; struct jbd2_inode *jinode; @@ -2507,6 +2516,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, loff_t lstart, loff_t lend); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); +extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 551353b1b17a..5be9ca5a8a7a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4770,7 +4770,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, int partial_begin, partial_end; loff_t start, end; ext4_lblk_t lblk; - struct address_space *mapping = inode->i_mapping; unsigned int blkbits = inode->i_blkbits; trace_ext4_zero_range(inode, offset, len, mode); @@ -4785,17 +4784,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, return ret; } - /* - * Write out all dirty pages to avoid race conditions - * Then release them. - */ - if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { - ret = filemap_write_and_wait_range(mapping, offset, - offset + len - 1); - if (ret) - return ret; - } - /* * Round up offset. This is not fallocate, we neet to zero out * blocks, so convert interior block aligned part of the range to @@ -4856,16 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset, flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE); - /* Now release the pages and zero block aligned part of pages*/ - truncate_pagecache_range(inode, start, end - 1); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); - /* Wait all existing dio workers, newcomers will block on i_mutex */ ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); + /* + * Prevent page faults from reinstantiating pages we have + * released from page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); + /* Now release the pages and zero block aligned part of pages */ + truncate_pagecache_range(inode, start, end - 1); + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags, mode); + up_write(&EXT4_I(inode)->i_mmap_sem); if (ret) goto out_dio; } @@ -5524,17 +5518,22 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - truncate_pagecache(inode, ioffset); - /* Wait for existing dio to complete */ ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); + /* + * Prevent page faults from reinstantiating pages we have released from + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); + truncate_pagecache(inode, ioffset); + credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto out_dio; + goto out_mmap; } down_write(&EXT4_I(inode)->i_data_sem); @@ -5573,7 +5572,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); -out_dio: +out_mmap: + up_write(&EXT4_I(inode)->i_mmap_sem); ext4_inode_resume_unlocked_dio(inode); out_mutex: mutex_unlock(&inode->i_mutex); @@ -5660,17 +5660,22 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - truncate_pagecache(inode, ioffset); - /* Wait for existing dio to complete */ ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); + /* + * Prevent page faults from reinstantiating pages we have released from + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); + truncate_pagecache(inode, ioffset); + credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto out_dio; + goto out_mmap; } /* Expand file to avoid data loss if there is error while shifting */ @@ -5741,7 +5746,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); -out_dio: +out_mmap: + up_write(&EXT4_I(inode)->i_mmap_sem); ext4_inode_resume_unlocked_dio(inode); out_mutex: mutex_unlock(&inode->i_mutex); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 113837e7ba98..0d24ebcd7c9e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -209,15 +209,18 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { int result; handle_t *handle = NULL; - struct super_block *sb = file_inode(vma->vm_file)->i_sb; + struct inode *inode = file_inode(vma->vm_file); + struct super_block *sb = inode->i_sb; bool write = vmf->flags & FAULT_FLAG_WRITE; if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, EXT4_DATA_TRANS_BLOCKS(sb)); - } + } else + down_read(&EXT4_I(inode)->i_mmap_sem); if (IS_ERR(handle)) result = VM_FAULT_SIGBUS; @@ -228,8 +231,10 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (write) { if (!IS_ERR(handle)) ext4_journal_stop(handle); + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(sb); - } + } else + up_read(&EXT4_I(inode)->i_mmap_sem); return result; } @@ -246,10 +251,12 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, ext4_chunk_trans_blocks(inode, PMD_SIZE / PAGE_SIZE)); - } + } else + down_read(&EXT4_I(inode)->i_mmap_sem); if (IS_ERR(handle)) result = VM_FAULT_SIGBUS; @@ -260,30 +267,71 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, if (write) { if (!IS_ERR(handle)) ext4_journal_stop(handle); + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(sb); - } + } else + up_read(&EXT4_I(inode)->i_mmap_sem); return result; } static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_mkwrite(vma, vmf, ext4_get_block_dax, - ext4_end_io_unwritten); + int err; + struct inode *inode = file_inode(vma->vm_file); + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); + err = __dax_mkwrite(vma, vmf, ext4_get_block_dax, + ext4_end_io_unwritten); + up_read(&EXT4_I(inode)->i_mmap_sem); + sb_end_pagefault(inode->i_sb); + + return err; +} + +/* + * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite() + * handler we check for races agaist truncate. Note that since we cycle through + * i_mmap_sem, we are sure that also any hole punching that began before we + * were called is finished by now and so if it included part of the file we + * are working on, our pte will get unmapped and the check for pte_same() in + * wp_pfn_shared() fails. Thus fault gets retried and things work out as + * desired. + */ +static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + struct super_block *sb = inode->i_sb; + int ret = VM_FAULT_NOPAGE; + loff_t size; + + sb_start_pagefault(sb); + file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (vmf->pgoff >= size) + ret = VM_FAULT_SIGBUS; + up_read(&EXT4_I(inode)->i_mmap_sem); + sb_end_pagefault(sb); + + return ret; } static const struct vm_operations_struct ext4_dax_vm_ops = { .fault = ext4_dax_fault, .pmd_fault = ext4_dax_pmd_fault, .page_mkwrite = ext4_dax_mkwrite, - .pfn_mkwrite = dax_pfn_mkwrite, + .pfn_mkwrite = ext4_dax_pfn_mkwrite, }; #else #define ext4_dax_vm_ops ext4_file_vm_ops #endif static const struct vm_operations_struct ext4_file_vm_ops = { - .fault = filemap_fault, + .fault = ext4_filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = ext4_page_mkwrite, }; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 547600556bb9..214e30a3ef9e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3651,6 +3651,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + + /* + * Prevent page faults from reinstantiating pages we have released from + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); first_block_offset = round_up(offset, sb->s_blocksize); last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; @@ -3659,10 +3668,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) truncate_pagecache_range(inode, first_block_offset, last_block_offset); - /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) credits = ext4_writepage_trans_blocks(inode); else @@ -3708,16 +3713,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) if (IS_SYNC(inode)) ext4_handle_sync(handle); - /* Now release the pages again to reduce race window */ - if (last_block_offset > first_block_offset) - truncate_pagecache_range(inode, first_block_offset, - last_block_offset); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); out_stop: ext4_journal_stop(handle); out_dio: + up_write(&EXT4_I(inode)->i_mmap_sem); ext4_inode_resume_unlocked_dio(inode); out_mutex: mutex_unlock(&inode->i_mutex); @@ -4851,6 +4852,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } else ext4_wait_for_tail_page_commit(inode); } + down_write(&EXT4_I(inode)->i_mmap_sem); /* * Truncate pagecache after we've waited for commit * in data=journal mode to make pages freeable. @@ -4858,6 +4860,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) truncate_pagecache(inode, inode->i_size); if (shrink) ext4_truncate(inode); + up_write(&EXT4_I(inode)->i_mmap_sem); } if (!rc) { @@ -5306,6 +5309,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); file_update_time(vma->vm_file); + + down_read(&EXT4_I(inode)->i_mmap_sem); /* Delalloc case is easy... */ if (test_opt(inode->i_sb, DELALLOC) && !ext4_should_journal_data(inode) && @@ -5375,6 +5380,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) out_ret: ret = block_page_mkwrite_return(ret); out: + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(inode->i_sb); return ret; } + +int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + int err; + + down_read(&EXT4_I(inode)->i_mmap_sem); + err = filemap_fault(vma, vmf); + up_read(&EXT4_I(inode)->i_mmap_sem); + + return err; +} diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ba1cf0bf2f81..852c26806af2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -958,6 +958,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&ei->i_orphan); init_rwsem(&ei->xattr_sem); init_rwsem(&ei->i_data_sem); + init_rwsem(&ei->i_mmap_sem); inode_init_once(&ei->vfs_inode); } diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h index 011ba6670d99..c70d06a383e2 100644 --- a/fs/ext4/truncate.h +++ b/fs/ext4/truncate.h @@ -10,8 +10,10 @@ */ static inline void ext4_truncate_failed_write(struct inode *inode) { + down_write(&EXT4_I(inode)->i_mmap_sem); truncate_inode_pages(inode->i_mapping, inode->i_size); ext4_truncate(inode); + up_write(&EXT4_I(inode)->i_mmap_sem); } /* From e096ade68c13011ba6548a542c1fc00e14555f5c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 7 Dec 2015 14:29:17 -0500 Subject: [PATCH 592/797] ext4: move unlocked dio protection from ext4_alloc_file_blocks() commit 17048e8a083fec7ad841d88ef0812707fbc7e39f upstream. Currently ext4_alloc_file_blocks() was handling protection against unlocked DIO. However we now need to sometimes call it under i_mmap_sem and sometimes not and DIO protection ranks above it (although strictly speaking this cannot currently create any deadlocks). Also ext4_zero_range() was actually getting & releasing unlocked DIO protection twice in some cases. Luckily it didn't introduce any real bug but it was a land mine waiting to be stepped on. So move DIO protection out from ext4_alloc_file_blocks() into the two callsites. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5be9ca5a8a7a..65b5ada2833f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, if (len <= EXT_UNWRITTEN_MAX_LEN) flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; - /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); - /* * credits to insert 1 extent into extent tree */ @@ -4752,8 +4748,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, goto retry; } - ext4_inode_resume_unlocked_dio(inode); - return ret > 0 ? ret2 : ret; } @@ -4827,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + /* Preallocate the range including the unaligned edges */ if (partial_begin || partial_end) { ret = ext4_alloc_file_blocks(file, @@ -4835,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, round_down(offset, 1 << blkbits)) >> blkbits, new_size, flags, mode); if (ret) - goto out_mutex; + goto out_dio; } @@ -4844,10 +4842,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE); - /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); - /* * Prevent page faults from reinstantiating pages we have * released from page cache. @@ -4992,8 +4986,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto out; } + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags, mode); + ext4_inode_resume_unlocked_dio(inode); if (ret) goto out; From 1f7b7e9a4ba3d60af27c78a149743d269e6fb848 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 7 Dec 2015 14:31:11 -0500 Subject: [PATCH 593/797] ext4: fix races between buffered IO and collapse / insert range commit 32ebffd3bbb4162da5ff88f9a35dd32d0a28ea70 upstream. Current code implementing FALLOC_FL_COLLAPSE_RANGE and FALLOC_FL_INSERT_RANGE is prone to races with buffered writes and page faults. If buffered write or write via mmap manages to squeeze between filemap_write_and_wait_range() and truncate_pagecache() in the fallocate implementations, the written data is simply discarded by truncate_pagecache() although it should have been shifted. Fix the problem by moving filemap_write_and_wait_range() call inside i_mutex and i_mmap_sem. That way we are protected against races with both buffered writes and page faults. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 59 +++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 65b5ada2833f..4b105c96df08 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5487,21 +5487,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) return ret; } - /* - * Need to round down offset to be aligned with page size boundary - * for page size > block size. - */ - ioffset = round_down(offset, PAGE_SIZE); - - /* Write out all dirty pages */ - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, - LLONG_MAX); - if (ret) - return ret; - - /* Take mutex lock */ mutex_lock(&inode->i_mutex); - /* * There is no need to overlap collapse range with EOF, in which case * it is effectively a truncate operation @@ -5526,6 +5512,27 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) * page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + /* + * Need to round down offset to be aligned with page size boundary + * for page size > block size. + */ + ioffset = round_down(offset, PAGE_SIZE); + /* + * Write tail of the last page before removed range since it will get + * removed from the page cache below. + */ + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset); + if (ret) + goto out_mmap; + /* + * Write data that will be shifted to preserve them when discarding + * page cache below. We are also protected from pages becoming dirty + * by i_mmap_sem. + */ + ret = filemap_write_and_wait_range(inode->i_mapping, offset + len, + LLONG_MAX); + if (ret) + goto out_mmap; truncate_pagecache(inode, ioffset); credits = ext4_writepage_trans_blocks(inode); @@ -5626,21 +5633,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) return ret; } - /* - * Need to round down to align start offset to page size boundary - * for page size > block size. - */ - ioffset = round_down(offset, PAGE_SIZE); - - /* Write out all dirty pages */ - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, - LLONG_MAX); - if (ret) - return ret; - - /* Take mutex lock */ mutex_lock(&inode->i_mutex); - /* Currently just for extent based files */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { ret = -EOPNOTSUPP; @@ -5668,6 +5661,16 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) * page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + /* + * Need to round down to align start offset to page size boundary + * for page size > block size. + */ + ioffset = round_down(offset, PAGE_SIZE); + /* Write out all dirty pages */ + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, + LLONG_MAX); + if (ret) + goto out_mmap; truncate_pagecache(inode, ioffset); credits = ext4_writepage_trans_blocks(inode); From 21228341bf17496062b0e6a1b37265f6bcf5c8f3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 7 Dec 2015 14:34:49 -0500 Subject: [PATCH 594/797] ext4: fix races of writeback with punch hole and zero range commit 011278485ecc3cd2a3954b5d4c73101d919bf1fa upstream. When doing delayed allocation, update of on-disk inode size is postponed until IO submission time. However hole punch or zero range fallocate calls can end up discarding the tail page cache page and thus on-disk inode size would never be properly updated. Make sure the on-disk inode size is updated before truncating page cache. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 3 +++ fs/ext4/extents.c | 5 +++++ fs/ext4/inode.c | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 89df9f55595b..b7e921d207fb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2881,6 +2881,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) return changed; } +int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, + loff_t len); + struct ext4_group_info { unsigned long bb_state; struct rb_root bb_free_root; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4b105c96df08..3578b25fccfd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4847,6 +4847,11 @@ static long ext4_zero_range(struct file *file, loff_t offset, * released from page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + ret = ext4_update_disksize_before_punch(inode, offset, len); + if (ret) { + up_write(&EXT4_I(inode)->i_mmap_sem); + goto out_dio; + } /* Now release the pages and zero block aligned part of pages */ truncate_pagecache_range(inode, start, end - 1); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 214e30a3ef9e..e31d762eedce 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3586,6 +3586,35 @@ int ext4_can_truncate(struct inode *inode) return 0; } +/* + * We have to make sure i_disksize gets properly updated before we truncate + * page cache due to hole punching or zero range. Otherwise i_disksize update + * can get lost as it may have been postponed to submission of writeback but + * that will never happen after we truncate page cache. + */ +int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, + loff_t len) +{ + handle_t *handle; + loff_t size = i_size_read(inode); + + WARN_ON(!mutex_is_locked(&inode->i_mutex)); + if (offset > size || offset + len < size) + return 0; + + if (EXT4_I(inode)->i_disksize >= size) + return 0; + + handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ext4_update_i_disksize(inode, size); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + + return 0; +} + /* * ext4_punch_hole: punches a hole in a file by releaseing the blocks * associated with the given offset and length @@ -3664,9 +3693,13 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; /* Now release the pages and zero block aligned part of pages*/ - if (last_block_offset > first_block_offset) + if (last_block_offset > first_block_offset) { + ret = ext4_update_disksize_before_punch(inode, offset, length); + if (ret) + goto out_dio; truncate_pagecache_range(inode, first_block_offset, last_block_offset); + } if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) credits = ext4_writepage_trans_blocks(inode); From 40cab474b47b2fc87911812687e83f8cd21aea1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 19 Feb 2016 10:35:39 -0800 Subject: [PATCH 595/797] ARM: OMAP3: Add cpuidle parameters table for omap3430 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 98f42221501353067251fbf11e732707dbb68ce3 upstream. Based on CPU type choose generic omap3 or omap3430 specific cpuidle parameters. Parameters for omap3430 were measured on Nokia N900 device and added by commit 5a1b1d3a9efa ("OMAP3: RX-51: Pass cpu idle parameters") which were later removed by commit 231900afba52 ("ARM: OMAP3: cpuidle - remove rx51 cpuidle parameters table") due to huge code complexity. This patch brings cpuidle parameters for omap3430 devices again, but uses simple condition based on CPU type. Fixes: 231900afba52 ("ARM: OMAP3: cpuidle - remove rx51 cpuidle parameters table") Signed-off-by: Pali Rohár Acked-by: Daniel Lezcano Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/cpuidle34xx.c | 69 ++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index aa7b379e2661..2a3db0bd9e15 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -34,6 +34,7 @@ #include "pm.h" #include "control.h" #include "common.h" +#include "soc.h" /* Mach specific information to be recorded in the C-state driver_data */ struct omap3_idle_statedata { @@ -315,6 +316,69 @@ static struct cpuidle_driver omap3_idle_driver = { .safe_state_index = 0, }; +/* + * Numbers based on measurements made in October 2009 for PM optimized kernel + * with CPU freq enabled on device Nokia N900. Assumes OPP2 (main idle OPP, + * and worst case latencies). + */ +static struct cpuidle_driver omap3430_idle_driver = { + .name = "omap3430_idle", + .owner = THIS_MODULE, + .states = { + { + .enter = omap3_enter_idle_bm, + .exit_latency = 110 + 162, + .target_residency = 5, + .name = "C1", + .desc = "MPU ON + CORE ON", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 106 + 180, + .target_residency = 309, + .name = "C2", + .desc = "MPU ON + CORE ON", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 107 + 410, + .target_residency = 46057, + .name = "C3", + .desc = "MPU RET + CORE ON", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 121 + 3374, + .target_residency = 46057, + .name = "C4", + .desc = "MPU OFF + CORE ON", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 855 + 1146, + .target_residency = 46057, + .name = "C5", + .desc = "MPU RET + CORE RET", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 7580 + 4134, + .target_residency = 484329, + .name = "C6", + .desc = "MPU OFF + CORE RET", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 7505 + 15274, + .target_residency = 484329, + .name = "C7", + .desc = "MPU OFF + CORE OFF", + }, + }, + .state_count = ARRAY_SIZE(omap3_idle_data), + .safe_state_index = 0, +}; + /* Public functions */ /** @@ -333,5 +397,8 @@ int __init omap3_idle_init(void) if (!mpu_pd || !core_pd || !per_pd || !cam_pd) return -ENODEV; - return cpuidle_register(&omap3_idle_driver, NULL); + if (cpu_is_omap3430()) + return cpuidle_register(&omap3430_idle_driver, NULL); + else + return cpuidle_register(&omap3_idle_driver, NULL); } From 159c52e15f95712dd22aa5d64b17a79a7fd8f939 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 28 Nov 2015 23:56:47 +0100 Subject: [PATCH 596/797] ARM: prima2: always enable reset controller commit ef2b1d777d643af227a22309d8b79898b90b123c upstream. The atlas7 clock controller driver registers a reset controller for itself, which causes a link error when the subsystem is disabled: drivers/built-in.o: In function `atlas7_clk_init': drivers/clk/sirf/clk-atlas7.c:1681: undefined reference to `reset_controller_register' As the clk driver does not have a Kconfig symbol for itself but it always built-in when the platform is enabled, we have to ensure that the reset controller subsystem is also built-in in this case. Signed-off-by: Arnd Bergmann Acked-by: Philipp Zabel Fixes: 301c5d29402e ("clk: sirf: add CSR atlas7 clk and reset support") Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-prima2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig index 9ab8932403e5..56e55fd37d13 100644 --- a/arch/arm/mach-prima2/Kconfig +++ b/arch/arm/mach-prima2/Kconfig @@ -1,6 +1,7 @@ menuconfig ARCH_SIRF bool "CSR SiRF" if ARCH_MULTI_V7 select ARCH_HAS_RESET_CONTROLLER + select RESET_CONTROLLER select ARCH_REQUIRE_GPIOLIB select GENERIC_IRQ_CHIP select NO_IOPORT_MAP From abc48d066b7b5063db56f4a81e367c84b9582882 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 29 Jan 2016 15:50:38 +0100 Subject: [PATCH 597/797] ARM: EXYNOS: select THERMAL_OF commit dc7eb9d589e595954792cc192bcbb92932e5c2ff upstream. We cannot select a symbol that has disabled dependencies, so we get a warning if we ever enable EXYNOS_THERMAL without also turning on THERMAL_OF: warning: (ARCH_EXYNOS) selects EXYNOS_THERMAL which has unmet direct dependencies (THERMAL && (ARCH_EXYNOS || COMPILE_TEST) && THERMAL_OF) This adds another 'select' in the platform code to avoid that case. Alternatively, we could decide to not select EXYNOS_THERMAL here and instead make it a user option. Signed-off-by: Arnd Bergmann Fixes: f87e6bd3f740 ("thermal: exynos: Add the dependency of CONFIG_THERMAL_OF instead of CONFIG_OF") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-exynos/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 3a10f1a8317a..bfd8bb371477 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -26,6 +26,7 @@ menuconfig ARCH_EXYNOS select S5P_DEV_MFC select SRAM select THERMAL + select THERMAL_OF select MFD_SYSCON help Support for SAMSUNG EXYNOS SoCs (EXYNOS4/5) From ea075ae7f00c6416b12d68abf29b6a57a15b3916 Mon Sep 17 00:00:00 2001 From: Lior Amsalem Date: Wed, 10 Feb 2016 17:29:15 +0100 Subject: [PATCH 598/797] ARM: dts: armada-375: use armada-370-sata for SATA commit b3a7f31eb7375633cd6a742f19488fc5a4208b36 upstream. The Armada 375 has the same SATA IP as Armada 370 and Armada XP, which requires the PHY speed to be set in the LP_PHY_CTL register for SATA hotplug to work. Therefore, this commit updates the compatible string used to describe the SATA IP in Armada 375 from marvell,orion-sata to marvell,armada-370-sata. Fixes: 4de59085091f753d08c8429d756b46756ab94665 ("ARM: mvebu: add Device Tree description of the Armada 375 SoC") Signed-off-by: Lior Amsalem Signed-off-by: Thomas Petazzoni Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/armada-375.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi index 7ccce7529b0c..cc952cf8ec30 100644 --- a/arch/arm/boot/dts/armada-375.dtsi +++ b/arch/arm/boot/dts/armada-375.dtsi @@ -529,7 +529,7 @@ crypto@90000 { }; sata@a0000 { - compatible = "marvell,orion-sata"; + compatible = "marvell,armada-370-sata"; reg = <0xa0000 0x5000>; interrupts = ; clocks = <&gateclk 14>, <&gateclk 20>; From eb7f1c5fb5c8e888ca8b728e17e71426ea809590 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sat, 13 Feb 2016 00:49:20 +0100 Subject: [PATCH 599/797] ARM: dts: pxa: fix dma engine node to pxa3xx-nand commit 07c6b2d01d351f0512ed7145625265e435ab3240 upstream. Since the switch from mmp_pdma to pxa_dma driver for pxa architectures, the pxa_dma requires 2 arguments, namely the requestor line and the requested priority. Fix the only left device node which was still passing only one argument, making the pxa3xx-nand driver misbehave in a device-tree configuration, ie. failing all data transfers. Fixes: c943646d1f49 ("ARM: dts: pxa: add dma engine node to pxa3xx-nand") Signed-off-by: Robert Jarzmik Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/pxa3xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/pxa3xx.dtsi b/arch/arm/boot/dts/pxa3xx.dtsi index cf6998a0804d..564341af7e97 100644 --- a/arch/arm/boot/dts/pxa3xx.dtsi +++ b/arch/arm/boot/dts/pxa3xx.dtsi @@ -30,7 +30,7 @@ nand0: nand@43100000 { reg = <0x43100000 90>; interrupts = <45>; clocks = <&clks CLK_NAND>; - dmas = <&pdma 97>; + dmas = <&pdma 97 3>; dma-names = "data"; #address-cells = <1>; #size-cells = <1>; From c565897ffe54ec0e36854db7fcfe88014e05ce41 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 22 Feb 2016 09:01:53 -0300 Subject: [PATCH 600/797] bus: imx-weim: Take the 'status' property value into account commit 33b96d2c9579213cf3f36d7b29841b1e464750c4 upstream. Currently we have an incorrect behaviour when multiple devices are present under the weim node. For example: &weim { ... status = "okay"; sram@0,0 { ... status = "okay"; }; mram@0,0 { ... status = "disabled"; }; }; In this case only the 'sram' device should be probed and not 'mram'. However what happens currently is that the status variable is ignored, causing the 'sram' device to be disabled and 'mram' to be enabled. Change the weim_parse_dt() function to use for_each_available_child_of_node()so that the devices marked with 'status = disabled' are not probed. Suggested-by: Wolfgang Netbal Signed-off-by: Fabio Estevam Reviewed-by: Sascha Hauer Acked-by: Shawn Guo Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- drivers/bus/imx-weim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index e98d15eaa799..1827fc4d15c1 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -150,7 +150,7 @@ static int __init weim_parse_dt(struct platform_device *pdev, return ret; } - for_each_child_of_node(pdev->dev.of_node, child) { + for_each_available_child_of_node(pdev->dev.of_node, child) { if (!child->name) continue; From 1b06e9942d51804170631351ada984947e87f042 Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Sat, 5 Mar 2016 08:11:55 +0800 Subject: [PATCH 601/797] jme: Do not enable NIC WoL functions on S0 commit 0772a99b818079e628a1da122ac7ee023faed83e upstream. Otherwise it might be back on resume right after going to suspend in some hardware. Reported-by: Diego Viola Signed-off-by: Guo-Fu Tseng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/jme.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 973dade2d07f..39da60007ade 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -270,11 +270,17 @@ jme_reset_mac_processor(struct jme_adapter *jme) } static inline void -jme_clear_pm(struct jme_adapter *jme) +jme_clear_pm_enable_wol(struct jme_adapter *jme) { jwrite32(jme, JME_PMCS, PMCS_STMASK | jme->reg_pmcs); } +static inline void +jme_clear_pm_disable_wol(struct jme_adapter *jme) +{ + jwrite32(jme, JME_PMCS, PMCS_STMASK); +} + static int jme_reload_eeprom(struct jme_adapter *jme) { @@ -1853,7 +1859,7 @@ jme_open(struct net_device *netdev) struct jme_adapter *jme = netdev_priv(netdev); int rc; - jme_clear_pm(jme); + jme_clear_pm_disable_wol(jme); JME_NAPI_ENABLE(jme); tasklet_init(&jme->linkch_task, jme_link_change_tasklet, @@ -1929,7 +1935,7 @@ jme_powersave_phy(struct jme_adapter *jme) jme_set_100m_half(jme); if (jme->reg_pmcs & (PMCS_LFEN | PMCS_LREN)) jme_wait_link(jme); - jme_clear_pm(jme); + jme_clear_pm_enable_wol(jme); } else { jme_phy_off(jme); } @@ -2646,7 +2652,6 @@ jme_set_wol(struct net_device *netdev, if (wol->wolopts & WAKE_MAGIC) jme->reg_pmcs |= PMCS_MFEN; - jwrite32(jme, JME_PMCS, jme->reg_pmcs); device_set_wakeup_enable(&jme->pdev->dev, !!(jme->reg_pmcs)); return 0; @@ -3172,7 +3177,7 @@ jme_init_one(struct pci_dev *pdev, jme->mii_if.mdio_read = jme_mdio_read; jme->mii_if.mdio_write = jme_mdio_write; - jme_clear_pm(jme); + jme_clear_pm_disable_wol(jme); device_set_wakeup_enable(&pdev->dev, true); jme_set_phyfifo_5level(jme); @@ -3304,7 +3309,7 @@ jme_resume(struct device *dev) if (!netif_running(netdev)) return 0; - jme_clear_pm(jme); + jme_clear_pm_disable_wol(jme); jme_phy_on(jme); if (test_bit(JME_FLAG_SSET, &jme->flags)) jme_set_settings(netdev, &jme->old_ecmd); From e91b1dbdc1f064872a6a2bb2375ae9202dd5e6e0 Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Sat, 5 Mar 2016 08:11:56 +0800 Subject: [PATCH 602/797] jme: Fix device PM wakeup API usage commit 81422e672f8181d7ad1ee6c60c723aac649f538f upstream. According to Documentation/power/devices.txt The driver should not use device_set_wakeup_enable() which is the policy for user to decide. Using device_init_wakeup() to initialize dev->power.should_wakeup and dev->power.can_wakeup on driver initialization. And use device_may_wakeup() on suspend to decide if WoL function should be enabled on NIC. Reported-by: Diego Viola Signed-off-by: Guo-Fu Tseng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/jme.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 39da60007ade..1257b18e6b90 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -1931,7 +1931,7 @@ jme_wait_link(struct jme_adapter *jme) static void jme_powersave_phy(struct jme_adapter *jme) { - if (jme->reg_pmcs) { + if (jme->reg_pmcs && device_may_wakeup(&jme->pdev->dev)) { jme_set_100m_half(jme); if (jme->reg_pmcs & (PMCS_LFEN | PMCS_LREN)) jme_wait_link(jme); @@ -2652,8 +2652,6 @@ jme_set_wol(struct net_device *netdev, if (wol->wolopts & WAKE_MAGIC) jme->reg_pmcs |= PMCS_MFEN; - device_set_wakeup_enable(&jme->pdev->dev, !!(jme->reg_pmcs)); - return 0; } @@ -3178,7 +3176,7 @@ jme_init_one(struct pci_dev *pdev, jme->mii_if.mdio_write = jme_mdio_write; jme_clear_pm_disable_wol(jme); - device_set_wakeup_enable(&pdev->dev, true); + device_init_wakeup(&pdev->dev, true); jme_set_phyfifo_5level(jme); jme->pcirev = pdev->revision; From 22327f609cef2a3f9bf0781fb2e9dda07ec64c98 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Jan 2016 18:13:49 +0000 Subject: [PATCH 603/797] unbreak allmodconfig KCONFIG_ALLCONFIG=... commit 6b87b70c5339f30e3c5b32085e69625906513dc2 upstream. Prior to 3.13 make allmodconfig KCONFIG_ALLCONFIG=/dev/null used to be equivalent to make allmodconfig; these days it hardwires MODULES to n. In fact, any KCONFIG_ALLCONFIG that doesn't set MODULES explicitly is treated as if it set it to n. Regression had been introduced by commit cfa98f ("kconfig: do not override symbols already set"); what happens is that conf_read_simple() does sym_calc_value(modules_sym) on exit, which leaves SYMBOL_VALID set and has conf_set_all_new_symbols() skip modules_sym. It's pretty easy to fix - simply move that call of sym_calc_value() into the callers, except for the ones in KCONFIG_ALLCONFIG handling. Objections? Signed-off-by: Al Viro Fixes: cfa98f2e0ae9 ("kconfig: do not override symbols already set") Signed-off-by: Michal Marek Signed-off-by: Greg Kroah-Hartman --- scripts/kconfig/confdata.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 0b7dc2fd7bac..dd243d2abd87 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -267,10 +267,8 @@ int conf_read_simple(const char *name, int def) if (in) goto load; sym_add_change_count(1); - if (!sym_defconfig_list) { - sym_calc_value(modules_sym); + if (!sym_defconfig_list) return 1; - } for_all_defaults(sym_defconfig_list, prop) { if (expr_calc_value(prop->visible.expr) == no || @@ -403,7 +401,6 @@ int conf_read_simple(const char *name, int def) } free(line); fclose(in); - sym_calc_value(modules_sym); return 0; } @@ -414,8 +411,12 @@ int conf_read(const char *name) sym_set_change_count(0); - if (conf_read_simple(name, S_DEF_USER)) + if (conf_read_simple(name, S_DEF_USER)) { + sym_calc_value(modules_sym); return 1; + } + + sym_calc_value(modules_sym); for_all_symbols(i, sym) { sym_calc_value(sym); @@ -846,6 +847,7 @@ static int conf_split_config(void) name = conf_get_autoconfig_name(); conf_read_simple(name, S_DEF_AUTO); + sym_calc_value(modules_sym); if (chdir("include/config")) return 1; From 1f5c4e0cb83cde427f1b8b95aa9a2a42e249fd53 Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Mon, 15 Feb 2016 15:33:28 +0800 Subject: [PATCH 604/797] thermal: rockchip: fix a impossible condition caused by the warning commit 43b4eb9fe719b107c8e5d49d1edbff0c135a42cb upstream. As the Dan report the smatch check the thermal driver warning: drivers/thermal/rockchip_thermal.c:551 rockchip_configure_from_dt() warn: impossible condition '(thermal->tshut_temp > ((~0 >> 1))) => (s32min-s32max > s32max)' Although The shut_temp read from DT is u32,the temperature is currently represented as int not long in the thermal driver. Let's change to make shut_temp instead of the thermal->tshut_temp for the condition. Fixes: commit 437df2172e8d ("thermal: rockchip: consistently use int for temperatures") Reported-by: Dan Carpenter Signed-off-by: Caesar Wang Signed-off-by: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/rockchip_thermal.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c index e845841ab036..7106288efae3 100644 --- a/drivers/thermal/rockchip_thermal.c +++ b/drivers/thermal/rockchip_thermal.c @@ -545,15 +545,14 @@ static int rockchip_configure_from_dt(struct device *dev, thermal->chip->tshut_temp); thermal->tshut_temp = thermal->chip->tshut_temp; } else { + if (shut_temp > INT_MAX) { + dev_err(dev, "Invalid tshut temperature specified: %d\n", + shut_temp); + return -ERANGE; + } thermal->tshut_temp = shut_temp; } - if (thermal->tshut_temp > INT_MAX) { - dev_err(dev, "Invalid tshut temperature specified: %d\n", - thermal->tshut_temp); - return -ERANGE; - } - if (of_property_read_u32(np, "rockchip,hw-tshut-mode", &tshut_mode)) { dev_warn(dev, "Missing tshut mode property, using default (%s)\n", From 03d86237007729b006808e8eab90e96a565deee4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 4 Mar 2016 17:20:13 +1100 Subject: [PATCH 605/797] sunrpc/cache: drop reference when sunrpc_cache_pipe_upcall() detects a race commit a6ab1e8126d205238defbb55d23661a3a5c6a0d8 upstream. sunrpc_cache_pipe_upcall() can detect a race if CACHE_PENDING is no longer set. In this case it aborts the queuing of the upcall. However it has already taken a new counted reference on "h" and doesn't "put" it, even though it frees the data structure holding the reference. So let's delay the "cache_get" until we know we need it. Fixes: f9e1aedc6c79 ("sunrpc/cache: remove races with queuing an upcall.") Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/cache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 21e20353178e..63fb5ee212cf 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1182,14 +1182,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) } crq->q.reader = 0; - crq->item = cache_get(h); crq->buf = buf; crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); - if (test_bit(CACHE_PENDING, &h->flags)) + if (test_bit(CACHE_PENDING, &h->flags)) { + crq->item = cache_get(h); list_add_tail(&crq->q.list, &detail->queue); - else + } else /* Lost a race, no longer PENDING, so don't enqueue */ ret = -EAGAIN; spin_unlock(&queue_lock); From 5b6e810f352b00c7bf5e7e32557a39b6d550458a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 14 Mar 2016 15:29:45 +0100 Subject: [PATCH 606/797] megaraid_sas: add missing curly braces in ioctl handler commit 3deb9438d34a09f6796639b652a01d110aca9f75 upstream. gcc-6 found a dubious indentation in the megasas_mgmt_fw_ioctl function: drivers/scsi/megaraid/megaraid_sas_base.c: In function 'megasas_mgmt_fw_ioctl': drivers/scsi/megaraid/megaraid_sas_base.c:6658:4: warning: statement is indented as if it were guarded by... [-Wmisleading-indentation] kbuff_arr[i] = NULL; ^~~~~~~~~ drivers/scsi/megaraid/megaraid_sas_base.c:6653:3: note: ...this 'if' clause, but it is not if (kbuff_arr[i]) ^~ The code is actually correct, as there is no downside in clearing a NULL pointer again. This clarifies the code and avoids the warning by adding extra curly braces. Signed-off-by: Arnd Bergmann Fixes: 90dc9d98f01b ("megaraid_sas : MFI MPT linked list corruption fix") Reviewed-by: Hannes Reinecke Acked-by: Sumit Saxena Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 97a1c1c33b05..00ce3e269a43 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6282,12 +6282,13 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, } for (i = 0; i < ioc->sge_count; i++) { - if (kbuff_arr[i]) + if (kbuff_arr[i]) { dma_free_coherent(&instance->pdev->dev, le32_to_cpu(kern_sge32[i].length), kbuff_arr[i], le32_to_cpu(kern_sge32[i].phys_addr)); kbuff_arr[i] = NULL; + } } megasas_return_cmd(instance, cmd); From f4b1d0a9a3f4291ba4ab48dd27efd01d3775d7f6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 22 Dec 2015 17:25:17 +0200 Subject: [PATCH 607/797] stm class: Select CONFIG_SRCU commit 042d4460b5b4379a12f375045ff9065cf6758735 upstream. The newly added STM code uses SRCU, but does not ensure that this code is part of the kernel: drivers/built-in.o: In function `stm_source_link_show': include/linux/srcu.h:221: undefined reference to `__srcu_read_lock' include/linux/srcu.h:238: undefined reference to `__srcu_read_unlock' drivers/built-in.o: In function `stm_source_link_drop': include/linux/srcu.h:221: undefined reference to `__srcu_read_lock' include/linux/srcu.h:238: undefined reference to `__srcu_read_unlock' This adds a Kconfig 'select' statement like all the other SRCU using drivers have. Signed-off-by: Arnd Bergmann Fixes: 7bd1d4093c2f ("stm class: Introduce an abstraction for System Trace Module devices") Signed-off-by: Alexander Shishkin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/stm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig index 83e9f591a54b..e7a348807f0c 100644 --- a/drivers/hwtracing/stm/Kconfig +++ b/drivers/hwtracing/stm/Kconfig @@ -1,6 +1,7 @@ config STM tristate "System Trace Module devices" select CONFIGFS_FS + select SRCU help A System Trace Module (STM) is a device exporting data in System Trace Protocol (STP) format as defined by MIPI STP standards. From b393b9da446626170a39bcd79c52e8ebadb19c8c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Feb 2016 14:36:09 +0300 Subject: [PATCH 608/797] extcon: max77843: Use correct size for reading the interrupt register commit c4924e92442d7218bd725e47fa3988c73aae84c9 upstream. The info->status[] array has 3 elements. We are using size MAX77843_MUIC_IRQ_NUM (16) instead of MAX77843_MUIC_STATUS_NUM (3) as intended. Fixes: 135d9f7d135a ('extcon: max77843: Clear IRQ bits state before request IRQ') Signed-off-by: Dan Carpenter Reviewed-by: Jaewon Kim Reviewed-by: Krzysztof Kozlowski [cw00.choi: Modify the patch title] Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/extcon/extcon-max77843.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c index 9f9ea334399c..b6cb30d207be 100644 --- a/drivers/extcon/extcon-max77843.c +++ b/drivers/extcon/extcon-max77843.c @@ -803,7 +803,7 @@ static int max77843_muic_probe(struct platform_device *pdev) /* Clear IRQ bits before request IRQs */ ret = regmap_bulk_read(max77843->regmap_muic, MAX77843_MUIC_REG_INT1, info->status, - MAX77843_MUIC_IRQ_NUM); + MAX77843_MUIC_STATUS_NUM); if (ret) { dev_err(&pdev->dev, "Failed to Clear IRQ bits\n"); goto err_muic_irq; From 1a1a512b983108015ced1e7a7c7775cfeec42d8c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 4 May 2016 14:50:15 -0700 Subject: [PATCH 609/797] Linux 4.4.9 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1928fcd539cc..0722cdf52152 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 8 +SUBLEVEL = 9 EXTRAVERSION = NAME = Blurry Fish Butt From cc798dcca00a8b71e5c09a38d3921461f931c85f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Nov 2015 13:26:19 +0000 Subject: [PATCH 610/797] arm64: mm: detect bad __create_mapping uses If a caller of __create_mapping provides a PA and VA which have different sub-page offsets, it is not clear which offset they expect to apply to the mapping, and is indicative of a bad caller. In some cases, the region we wish to map may validly have a sub-page offset in the physical and virtual addresses. For example, EFI runtime regions have 4K granularity, yet may be mapped by a 64K page kernel. So long as the physical and virtual offsets are the same, the region will be mapped at the expected VAs. Disallow calls with differing sub-page offsets, and WARN when they are encountered, so that we can detect and fix such cases. Cc: Laura Abbott Acked-by: Ard Biesheuvel Acked-by: Catalin Marinas Reviewed-by: Steve Capper Signed-off-by: Mark Rutland Signed-off-by: Will Deacon (cherry picked from commit cc5d2b3b95cdbb3fed4e38e667d17b9ac7250f7a) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 116ad654dd59..61a82d68330d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -251,6 +251,13 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, { unsigned long addr, length, end, next; + /* + * If the virtual and physical address don't have the same offset + * within a page, we cannot map the region as the caller expects. + */ + if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) + return; + addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); From 6329e5d3b74540578dbbf4550fdc53c52c706c94 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Nov 2015 13:26:20 +0000 Subject: [PATCH 611/797] arm64: mm: allow sections for unaligned bases Callees of __create_mapping may decide to create section mappings if sufficient low bits of the physical and virtual addresses they were passed are zero. While __create_mapping rounds the virtual base address down, it does not similarly round the physical base address down, and hence non-zero bits in the physical address can prevent use of a section mapping, even where a whole next-level table would be used instead. Round down the physical base address in __create_mapping to enable all callees to always create section mappings when such a mapping is possible. Cc: Laura Abbott Acked-by: Ard Biesheuvel Acked-by: Catalin Marinas Reviewed-by: Steve Capper Signed-off-by: Mark Rutland Signed-off-by: Will Deacon (cherry picked from commit 9c4e08a3022b6df90d31ef4007291faabfce5431) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 61a82d68330d..d2a6194f4bec 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -258,6 +258,7 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) return; + phys &= PAGE_MASK; addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); From 6e8ef09edf4a9e61a04fad753ffeebaecc60b568 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Oct 2015 18:56:19 +0000 Subject: [PATCH 612/797] arm64: pgtable: implement pte_accessible() This patch implements the pte_accessible() macro, which can be used to test whether or not a given pte is a candidate for allocation in the TLB. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon (cherry picked from commit 76c714be0e5e60c935a53b31be58939510ba1d0f) Signed-off-by: Alex Shi --- arch/arm64/include/asm/pgtable.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c63868ae9a4a..cd5dfc97268e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -168,6 +168,16 @@ extern struct page *empty_zero_page; #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) +#define pte_valid_young(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) + +/* + * Could the pte be present in the TLB? We must check mm_tlb_flush_pending + * so that we don't erroneously return false for pages that have been + * remapped as PROT_NONE but are yet to be flushed from the TLB. + */ +#define pte_accessible(mm, pte) \ + (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) { From 1a9cc42c0a812241f6cd679a19aefba2900437a7 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 20 Nov 2015 17:59:10 +0800 Subject: [PATCH 613/797] arm64: add __init/__initdata section marker to some functions/variables These functions/variables are not needed after booting, so mark them as __init or __initdata. Signed-off-by: Jisheng Zhang Signed-off-by: Will Deacon (cherry picked from commit a7c61a3452d39078919f0e1f493ff966fb64f0db) Signed-off-by: Alex Shi --- arch/arm64/kernel/armv8_deprecated.c | 6 +++--- arch/arm64/kernel/cpufeature.c | 9 +++++---- arch/arm64/kernel/fpsimd.c | 2 +- arch/arm64/mm/dma-mapping.c | 4 ++-- arch/arm64/mm/init.c | 6 +++--- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 937f5e58a4d3..3e01207917b1 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -62,7 +62,7 @@ struct insn_emulation { }; static LIST_HEAD(insn_emulation); -static int nr_insn_emulated; +static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); static void register_emulation_hooks(struct insn_emulation_ops *ops) @@ -173,7 +173,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, return ret; } -static void register_insn_emulation(struct insn_emulation_ops *ops) +static void __init register_insn_emulation(struct insn_emulation_ops *ops) { unsigned long flags; struct insn_emulation *insn; @@ -237,7 +237,7 @@ static struct ctl_table ctl_abi[] = { { } }; -static void register_insn_emulation_sysctl(struct ctl_table *table) +static void __init register_insn_emulation_sysctl(struct ctl_table *table) { unsigned long flags; int i = 0; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0669c63281ea..5c90aa490a2b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -684,7 +684,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { {}, }; -static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: @@ -729,7 +729,7 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities * return rc; } -static void setup_cpu_hwcaps(void) +static void __init setup_cpu_hwcaps(void) { int i; const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; @@ -758,7 +758,8 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, * Run through the enabled capabilities and enable() it on all active * CPUs */ -static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +static void __init +enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; @@ -897,7 +898,7 @@ static inline void set_sys_caps_initialised(void) #endif /* CONFIG_HOTPLUG_CPU */ -static void setup_feature_capabilities(void) +static void __init setup_feature_capabilities(void) { update_cpu_capabilities(arm64_features, "detected feature:"); enable_cpu_capabilities(arm64_features); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 4c46c54a3ad7..acc1afd5c749 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -289,7 +289,7 @@ static struct notifier_block fpsimd_cpu_pm_notifier_block = { .notifier_call = fpsimd_cpu_pm_notifier, }; -static void fpsimd_pm_init(void) +static void __init fpsimd_pm_init(void) { cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 354144e33218..a6e757cbab77 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -40,7 +40,7 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, static struct gen_pool *atomic_pool; #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K -static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; static int __init early_coherent_pool(char *p) { @@ -896,7 +896,7 @@ static int __iommu_attach_notifier(struct notifier_block *nb, return 0; } -static int register_iommu_dma_ops_notifier(struct bus_type *bus) +static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) { struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); int ret; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 4cb98aa8c27b..10fab52eed95 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -71,7 +71,7 @@ early_param("initrd", early_initrd); * currently assumes that for memory starting above 4G, 32-bit devices will * use a DMA offset. */ -static phys_addr_t max_zone_dma_phys(void) +static phys_addr_t __init max_zone_dma_phys(void) { phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); return min(offset + (1ULL << 32), memblock_end_of_DRAM()); @@ -126,11 +126,11 @@ EXPORT_SYMBOL(pfn_valid); #endif #ifndef CONFIG_SPARSEMEM -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { } #else -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { struct memblock_region *reg; From 2e310797997214e0cc606013ac308167b6b72dc0 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 2 Dec 2015 14:00:10 +0000 Subject: [PATCH 614/797] arm64: fix COMPAT_SHMLBA definition for large pages ARM glibc uses (4 * __getpagesize()) for SHMLBA, which is correct for 4KB pages and works fine for 64KB pages, but the kernel uses a hardcoded 16KB that is too small for 64KB page based kernels. This changes the definition to what user space sees when using 64KB pages. Acked-by: Arnd Bergmann Signed-off-by: Yury Norov Signed-off-by: Will Deacon (cherry picked from commit b9b7aebb42d1b1392f3111de61136bb6cf3aae3f) Signed-off-by: Alex Shi --- arch/arm64/include/asm/shmparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h index 4df608a8459e..e368a55ebd22 100644 --- a/arch/arm64/include/asm/shmparam.h +++ b/arch/arm64/include/asm/shmparam.h @@ -21,7 +21,7 @@ * alignment value. Since we don't have aliasing D-caches, the rest of * the time we can safely use PAGE_SIZE. */ -#define COMPAT_SHMLBA 0x4000 +#define COMPAT_SHMLBA (4 * PAGE_SIZE) #include From 4ea9dd702768f3d1c3ab346f30fdce8b8d1a8ef9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 23 Nov 2015 15:12:59 +0000 Subject: [PATCH 615/797] arm64: enable HAVE_IRQ_TIME_ACCOUNTING arm64 relies on the arm_arch_timer for sched_clock, so we can select HAVE_IRQ_TIME_ACCOUNTING and have the core sched-clock code enable the feature at runtime based on the rate. Reported-by: Mario Smarduch Signed-off-by: Will Deacon (cherry picked from commit 24da208db32ee1e4757ceaba898c47add8e5361e) Signed-off-by: Alex Shi --- Documentation/features/time/irq-time-acct/arch-support.txt | 2 +- arch/arm64/Kconfig | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt index e63316239938..4199ffecc0ff 100644 --- a/Documentation/features/time/irq-time-acct/arch-support.txt +++ b/Documentation/features/time/irq-time-acct/arch-support.txt @@ -9,7 +9,7 @@ | alpha: | .. | | arc: | TODO | | arm: | ok | - | arm64: | .. | + | arm64: | ok | | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 871f21783866..4876459c0838 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -70,6 +70,7 @@ config ARM64 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GENERIC_DMA_COHERENT select HAVE_HW_BREAKPOINT if PERF_EVENTS + select HAVE_IRQ_TIME_ACCOUNTING select HAVE_MEMBLOCK select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS From 0348dff2c49b25c7b5702ef887f03177f4b0c0fd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 19 Nov 2015 17:48:31 +0000 Subject: [PATCH 616/797] arm64: spinlock: serialise spin_unlock_wait against concurrent lockers Boqun Feng reported a rather nasty ordering issue with spin_unlock_wait on architectures implementing spin_lock with LL/SC sequences and acquire semantics: | CPU 1 CPU 2 CPU 3 | ================== ==================== ============== | spin_unlock(&lock); | spin_lock(&lock): | r1 = *lock; // r1 == 0; | o = READ_ONCE(object); // reordered here | object = NULL; | smp_mb(); | spin_unlock_wait(&lock); | *lock = 1; | smp_mb(); | o->dead = true; | if (o) // true | BUG_ON(o->dead); // true!! The crux of the problem is that spin_unlock_wait(&lock) can return on CPU 1 whilst CPU 2 is in the process of taking the lock. This can be resolved by upgrading spin_unlock_wait to a LOCK operation, forcing it to serialise against a concurrent locker and giving it acquire semantics in the process (although it is not at all clear whether this is needed - different callers seem to assume different things about the barrier semantics and architectures are similarly disjoint in their implementations of the macro). This patch implements spin_unlock_wait using an LL/SC sequence with acquire semantics on arm64. For v8.1 systems with the LSE atomics, the exclusive writeback is omitted, since the spin_lock operation is indivisible and no intermediate state can be observed. Signed-off-by: Will Deacon (cherry picked from commit d86b8da04dfa4771a68bdbad6c424d40f22f0d14) Signed-off-by: Alex Shi --- arch/arm64/include/asm/spinlock.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index c85e96d174a5..fc9682bfe002 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -26,9 +26,28 @@ * The memory barriers are implicit with the load-acquire and store-release * instructions. */ +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + unsigned int tmp; + arch_spinlock_t lockval; -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + asm volatile( +" sevl\n" +"1: wfe\n" +"2: ldaxr %w0, %2\n" +" eor %w1, %w0, %w0, ror #16\n" +" cbnz %w1, 1b\n" + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ +" stxr %w1, %w0, %2\n" +" cbnz %w1, 2b\n", /* Serialise against any concurrent lockers */ + /* LSE atomics */ +" nop\n" +" nop\n") + : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) + : + : "memory"); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) From 2ef8b1f56c1989157c8ef929f4f89bd6a3ac7950 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Fri, 4 Dec 2015 11:38:39 +0800 Subject: [PATCH 617/797] arm64: ftrace: stop using kstop_machine to enable/disable tracing For ftrace on arm64, kstop_machine which is hugely disruptive to a running system is not needed to convert nops to ftrace calls or back, because that to be modified instrucions, that NOP, B or BL, are all safe instructions which called "concurrent modification and execution of instructions", that can be executed by one thread of execution as they are being modified by another thread of execution without requiring explicit synchronization. Signed-off-by: Li Bin Reviewed-by: Steven Rostedt Signed-off-by: Will Deacon (cherry picked from commit 81a6a146e88eca5d6726569779778d61489d85aa) Signed-off-by: Alex Shi --- arch/arm64/kernel/ftrace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c851be795080..9669b331a23b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -93,6 +93,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return ftrace_modify_code(pc, old, new, true); } +void arch_ftrace_update_code(int command) +{ + ftrace_modify_all_code(command); +} + int __init ftrace_dyn_arch_init(void) { return 0; From 42c1d121864549d11fb0f002df8dc8ef35219107 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Fri, 4 Dec 2015 11:38:40 +0800 Subject: [PATCH 618/797] arm64: ftrace: fix the comments for ftrace_modify_code There is no need to worry about module and __init text disappearing case, because that ftrace has a module notifier that is called when a module is being unloaded and before the text goes away and this code grabs the ftrace_lock mutex and removes the module functions from the ftrace list, such that it will no longer do any modifications to that module's text, the update to make functions be traced or not is done under the ftrace_lock mutex as well. And by now, __init section codes should not been modified by ftrace, because it is black listed in recordmcount.c and ignored by ftrace. Suggested-by: Steven Rostedt Signed-off-by: Li Bin Signed-off-by: Will Deacon (cherry picked from commit 004ab584e028093996cf5b8e220b8bc50c5111cf) Signed-off-by: Alex Shi --- arch/arm64/kernel/ftrace.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 9669b331a23b..8f7005bc35bd 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -29,12 +29,11 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, /* * Note: - * Due to modules and __init, code can disappear and change, - * we need to protect against faulting as well as code changing. - * We do this by aarch64_insn_*() which use the probe_kernel_*(). - * - * No lock is held here because all the modifications are run - * through stop_machine(). + * We are paranoid about modifying text, as if a bug were to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with aarch64_insn_*() which uses + * probe_kernel_*(), and make sure what we read is what we expected it + * to be before modifying it. */ if (validate) { if (aarch64_insn_read((void *)pc, &replaced)) From a5b499e62f4070c7bfe2d322516a937e6f48d04f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 4 Dec 2015 12:42:29 +0000 Subject: [PATCH 619/797] arm64: Add trace_hardirqs_off annotation in ret_to_user When a kernel is built with CONFIG_TRACE_IRQFLAGS the following warning is produced when entering userspace for the first time: WARNING: at /work/Linux/linux-2.6-aarch64/kernel/locking/lockdep.c:3519 Modules linked in: CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc3+ #639 Hardware name: Juno (DT) task: ffffffc9768a0000 ti: ffffffc9768a8000 task.ti: ffffffc9768a8000 PC is at check_flags.part.22+0x19c/0x1a8 LR is at check_flags.part.22+0x19c/0x1a8 pc : [] lr : [] pstate: 600001c5 sp : ffffffc9768abe10 x29: ffffffc9768abe10 x28: ffffffc9768a8000 x27: 0000000000000000 x26: 0000000000000001 x25: 00000000000000a6 x24: ffffffc00064be6c x23: ffffffc0009f249e x22: ffffffc9768a0000 x21: ffffffc97fea5480 x20: 00000000000001c0 x19: ffffffc00169a000 x18: 0000005558cc7b58 x17: 0000007fb78e3180 x16: 0000005558d2e238 x15: ffffffffffffffff x14: 0ffffffffffffffd x13: 0000000000000008 x12: 0101010101010101 x11: 7f7f7f7f7f7f7f7f x10: fefefefefefeff63 x9 : 7f7f7f7f7f7f7f7f x8 : 6e655f7371726964 x7 : 0000000000000001 x6 : ffffffc0001079c4 x5 : 0000000000000000 x4 : 0000000000000001 x3 : ffffffc001698438 x2 : 0000000000000000 x1 : ffffffc9768a0000 x0 : 000000000000002e Call trace: [] check_flags.part.22+0x19c/0x1a8 [] lock_is_held+0x80/0x98 [] __schedule+0x404/0x730 [] schedule+0x44/0xb8 [] ret_to_user+0x0/0x24 possible reason: unannotated irqs-off. irq event stamp: 502169 hardirqs last enabled at (502169): [] el0_irq_naked+0x1c/0x24 hardirqs last disabled at (502167): [] __do_softirq+0x17c/0x298 softirqs last enabled at (502168): [] __do_softirq+0x1fc/0x298 softirqs last disabled at (502143): [] irq_exit+0xa0/0xf0 This happens because we disable interrupts in ret_to_user before calling schedule() in work_resched. This patch adds the necessary trace_hardirqs_off annotation. Signed-off-by: Catalin Marinas Reported-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon (cherry picked from commit db3899a6477a4dccd26cbfb7f408b6be2cc068e0) Signed-off-by: Alex Shi --- arch/arm64/kernel/entry.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7ed3d75f6304..e5b25389c48f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -634,6 +634,9 @@ work_pending: bl do_notify_resume b ret_to_user work_resched: +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off // the IRQs are off here, inform the tracing code +#endif bl schedule /* From c0f49bdceea7d2f0cfe1c6f857e4e1cfa919382a Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Fri, 4 Dec 2015 11:02:25 +0000 Subject: [PATCH 620/797] arm64: Store struct thread_info in sp_el0 There is need for figuring out how to manage struct thread_info data when IRQ stack is introduced. struct thread_info information should be copied to IRQ stack under the current thread_info calculation logic whenever context switching is invoked. This is too expensive to keep supporting the approach. Instead, this patch pays attention to sp_el0 which is an unused scratch register in EL1 context. sp_el0 utilization not only simplifies the management, but also prevents text section size from being increased largely due to static allocated IRQ stack as removing masking operation using THREAD_SIZE in many places. Reviewed-by: Catalin Marinas Signed-off-by: Jungseok Lee Signed-off-by: James Morse Signed-off-by: Will Deacon (cherry picked from commit 6cdf9c7ca687e01840d0215437620a20263012fc) Signed-off-by: Alex Shi --- arch/arm64/include/asm/thread_info.h | 10 ++++++++-- arch/arm64/kernel/entry.S | 15 ++++++++++++--- arch/arm64/kernel/head.S | 5 +++++ arch/arm64/kernel/sleep.S | 3 +++ 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 90c7ff233735..abd64bd1f6d9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -73,10 +73,16 @@ register unsigned long current_stack_pointer asm ("sp"); */ static inline struct thread_info *current_thread_info(void) __attribute_const__; +/* + * struct thread_info can be accessed directly via sp_el0. + */ static inline struct thread_info *current_thread_info(void) { - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); + unsigned long sp_el0; + + asm ("mrs %0, sp_el0" : "=r" (sp_el0)); + + return (struct thread_info *)sp_el0; } #define thread_saved_pc(tsk) \ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e5b25389c48f..245fa6837880 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -88,7 +88,8 @@ .if \el == 0 mrs x21, sp_el0 - get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, + mov tsk, sp + and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. .else @@ -107,6 +108,13 @@ str x21, [sp, #S_SYSCALLNO] .endif + /* + * Set sp_el0 to current thread_info. + */ + .if \el == 0 + msr sp_el0, tsk + .endif + /* * Registers that may be useful after this macro is invoked: * @@ -164,8 +172,7 @@ alternative_endif .endm .macro get_thread_info, rd - mov \rd, sp - and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack + mrs \rd, sp_el0 .endm /* @@ -599,6 +606,8 @@ ENTRY(cpu_switch_to) ldp x29, x9, [x8], #16 ldr lr, [x8] mov sp, x9 + and x9, x9, #~(THREAD_SIZE - 1) + msr sp_el0, x9 ret ENDPROC(cpu_switch_to) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b685257926f0..17ce7285bb12 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -424,6 +424,9 @@ __mmap_switched: b 1b 2: adr_l sp, initial_sp, x4 + mov x4, sp + and x4, x4, #~(THREAD_SIZE - 1) + msr sp_el0, x4 // Save thread_info str_l x21, __fdt_pointer, x5 // Save FDT pointer str_l x24, memstart_addr, x6 // Save PHYS_OFFSET mov x29, #0 @@ -611,6 +614,8 @@ ENDPROC(secondary_startup) ENTRY(__secondary_switched) ldr x0, [x21] // get secondary_data.stack mov sp, x0 + and x0, x0, #~(THREAD_SIZE - 1) + msr sp_el0, x0 // save thread_info mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index f586f7c875e2..e33fe33876ab 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -173,6 +173,9 @@ ENTRY(cpu_resume) /* load physical address of identity map page table in x1 */ adrp x1, idmap_pg_dir mov sp, x2 + /* save thread_info */ + and x2, x2, #~(THREAD_SIZE - 1) + msr sp_el0, x2 /* * cpu_do_resume expects x0 to contain context physical address * pointer and x1 to contain physical address of 1:1 page tables From 2f478c1abcc07611671c5fee4bd10974dde476ab Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 4 Dec 2015 11:02:26 +0000 Subject: [PATCH 621/797] arm64: Modify stack trace and dump for use with irq_stack This patch allows unwind_frame() to traverse from interrupt stack to task stack correctly. It requires data from a dummy stack frame, created during irq_stack_entry(), added by a later patch. A similar approach is taken to modify dump_backtrace(), which expects to find struct pt_regs underneath any call to functions marked __exception. When on an irq_stack, the struct pt_regs is stored on the old task stack, the location of which is stored in the dummy stack frame. Reviewed-by: Catalin Marinas Signed-off-by: AKASHI Takahiro [james.morse: merged two patches, reworked for per_cpu irq_stacks, and no alignment guarantees, added irq_stack definitions] Signed-off-by: James Morse Signed-off-by: Will Deacon (cherry picked from commit 132cd887b5c54758d04bf25c52fa48f45e843a30) Signed-off-by: Alex Shi --- arch/arm64/include/asm/irq.h | 32 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/irq.c | 3 +++ arch/arm64/kernel/stacktrace.c | 29 +++++++++++++++++++++++++++-- arch/arm64/kernel/traps.c | 14 +++++++++++++- 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 8e8d30684392..e2f3f135a3bc 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -1,10 +1,32 @@ #ifndef __ASM_IRQ_H #define __ASM_IRQ_H +#define IRQ_STACK_SIZE THREAD_SIZE +#define IRQ_STACK_START_SP THREAD_START_SP + +#ifndef __ASSEMBLER__ + +#include + #include +#include struct pt_regs; +DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); + +/* + * The highest address on the stack, and the first to be used. Used to + * find the dummy-stack frame put down by el?_irq() in entry.S. + */ +#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP) + +/* + * The offset from irq_stack_ptr where entry.S will store the original + * stack pointer. Used by unwind_frame() and dump_backtrace(). + */ +#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x10)); + extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); static inline int nr_legacy_irqs(void) @@ -12,4 +34,14 @@ static inline int nr_legacy_irqs(void) return 0; } +static inline bool on_irq_stack(unsigned long sp, int cpu) +{ + /* variable names the same as kernel/stacktrace.c */ + unsigned long low = (unsigned long)per_cpu(irq_stack, cpu); + unsigned long high = low + IRQ_STACK_START_SP; + + return (low <= sp && sp <= high); +} + +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 9f17ec071ee0..1e3cef578e21 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -30,6 +30,9 @@ unsigned long irq_err_count; +/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned */ +DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16); + int arch_show_interrupts(struct seq_file *p, int prec) { show_ipi_list(p, prec); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ccb6078ed9f2..b947eeffa5b2 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -20,6 +20,7 @@ #include #include +#include #include /* @@ -39,17 +40,41 @@ int notrace unwind_frame(struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; + unsigned long irq_stack_ptr; + + /* + * Use raw_smp_processor_id() to avoid false-positives from + * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping + * task stacks, we can be pre-empted in this case, so + * {raw_,}smp_processor_id() may give us the wrong value. Sleeping + * tasks can't ever be on an interrupt stack, so regardless of cpu, + * the checks will always fail. + */ + irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id()); low = frame->sp; - high = ALIGN(low, THREAD_SIZE); + /* irq stacks are not THREAD_SIZE aligned */ + if (on_irq_stack(frame->sp, raw_smp_processor_id())) + high = irq_stack_ptr; + else + high = ALIGN(low, THREAD_SIZE) - 0x20; - if (fp < low || fp > high - 0x18 || fp & 0xf) + if (fp < low || fp > high || fp & 0xf) return -EINVAL; frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); frame->pc = *(unsigned long *)(fp + 8); + /* + * Check whether we are going to walk through from interrupt stack + * to task stack. + * If we reach the end of the stack - and its an interrupt stack, + * read the original task stack pointer from the dummy frame. + */ + if (frame->sp == irq_stack_ptr) + frame->sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + return 0; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index e9b9b5364393..8a0084541f84 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -146,6 +146,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; + unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); @@ -180,9 +181,20 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) if (ret < 0) break; stack = frame.sp; - if (in_exception_text(where)) + if (in_exception_text(where)) { + /* + * If we switched to the irq_stack before calling this + * exception handler, then the pt_regs will be on the + * task stack. The easiest way to tell is if the large + * pt_regs would overlap with the end of the irq_stack. + */ + if (stack < irq_stack_ptr && + (stack + sizeof(struct pt_regs)) > irq_stack_ptr) + stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + dump_mem("", "Exception stack", stack, stack + sizeof(struct pt_regs), false); + } } } From ea288f7a80b63d6956d23f50dd04fa70f8e7368f Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 4 Dec 2015 11:02:27 +0000 Subject: [PATCH 622/797] arm64: Add do_softirq_own_stack() and enable irq_stacks entry.S is modified to switch to the per_cpu irq_stack during el{0,1}_irq. irq_count is used to detect recursive interrupts on the irq_stack, it is updated late by do_softirq_own_stack(), when called on the irq_stack, before __do_softirq() re-enables interrupts to process softirqs. do_softirq_own_stack() is added by this patch, but does not yet switch stack. This patch adds the dummy stack frame and data needed by the previous stack tracing patches. Reviewed-by: Catalin Marinas Signed-off-by: James Morse Signed-off-by: Will Deacon (cherry picked from commit 8e23dacd12a48e58125b84c817da50850b73280a) Signed-off-by: Alex Shi --- arch/arm64/include/asm/irq.h | 2 ++ arch/arm64/kernel/entry.S | 42 ++++++++++++++++++++++++++++++++++-- arch/arm64/kernel/irq.c | 38 +++++++++++++++++++++++++++++++- 3 files changed, 79 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index e2f3f135a3bc..fa2a8d0e4792 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -11,6 +11,8 @@ #include #include +#define __ARCH_HAS_DO_SOFTIRQ + struct pt_regs; DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 245fa6837880..8f7e737949fe 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -175,6 +176,42 @@ alternative_endif mrs \rd, sp_el0 .endm + .macro irq_stack_entry, dummy_lr + mov x19, sp // preserve the original sp + + adr_l x25, irq_stack + mrs x26, tpidr_el1 + add x25, x25, x26 + + /* + * Check the lowest address on irq_stack for the irq_count value, + * incremented by do_softirq_own_stack if we have re-enabled irqs + * while on the irq_stack. + */ + ldr x26, [x25] + cbnz x26, 9998f // recursive use? + + /* switch to the irq stack */ + mov x26, #IRQ_STACK_START_SP + add x26, x25, x26 + mov sp, x26 + + /* Add a dummy stack frame */ + stp x29, \dummy_lr, [sp, #-16]! // dummy stack frame + mov x29, sp + stp xzr, x19, [sp, #-16]! + +9998: + .endm + + /* + * x19 should be preserved between irq_stack_entry and + * irq_stack_exit. + */ + .macro irq_stack_exit + mov sp, x19 + .endm + /* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - x0 to x6. @@ -190,10 +227,11 @@ tsk .req x28 // current thread_info * Interrupt handling. */ .macro irq_handler - adrp x1, handle_arch_irq - ldr x1, [x1, #:lo12:handle_arch_irq] + ldr_l x1, handle_arch_irq mov x0, sp + irq_stack_entry x22 blr x1 + irq_stack_exit .endm .text diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 1e3cef578e21..ff7ebb710e51 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -25,14 +25,24 @@ #include #include #include +#include #include #include unsigned long irq_err_count; -/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned */ +/* + * irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. + * irq_stack[0] is used as irq_count, a non-zero value indicates the stack + * is in use, and el?_irq() shouldn't switch to it. This is used to detect + * recursive use of the irq_stack, it is lazily updated by + * do_softirq_own_stack(), which is called on the irq_stack, before + * re-enabling interrupts to process softirqs. + */ DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16); +#define IRQ_COUNT() (*per_cpu(irq_stack, smp_processor_id())) + int arch_show_interrupts(struct seq_file *p, int prec) { show_ipi_list(p, prec); @@ -56,3 +66,29 @@ void __init init_IRQ(void) if (!handle_arch_irq) panic("No interrupt controller found."); } + +/* + * do_softirq_own_stack() is called from irq_exit() before __do_softirq() + * re-enables interrupts, at which point we may re-enter el?_irq(). We + * increase irq_count here so that el1_irq() knows that it is already on the + * irq stack. + * + * Called with interrupts disabled, so we don't worry about moving cpu, or + * being interrupted while modifying irq_count. + * + * This function doesn't actually switch stack. + */ +void do_softirq_own_stack(void) +{ + int cpu = smp_processor_id(); + + WARN_ON_ONCE(!irqs_disabled()); + + if (on_irq_stack(current_stack_pointer, cpu)) { + IRQ_COUNT()++; + __do_softirq(); + IRQ_COUNT()--; + } else { + __do_softirq(); + } +} From 306fe6c320ec846544b25130a2fe61e3d394cb6f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Dec 2015 13:58:42 +0000 Subject: [PATCH 623/797] arm64: irq: fix walking from irq stack to task stack Running with CONFIG_DEBUG_SPINLOCK=y can trigger a BUG with the new IRQ stack code: BUG: spinlock lockup suspected on CPU#1 This is due to the IRQ_STACK_TO_TASK_STACK macro incorrectly retrieving the task stack pointer stashed at the top of the IRQ stack. Sayeth James: | Yup, this is what is happening. Its an off-by-one due to broken | thinking about how the stack works. My broken thinking was: | | > top ------------ | > | dummy_lr | <- irq_stack_ptr | > ------------ | > | x29 | | > ------------ | > | x19 | <- irq_stack_ptr - 0x10 | > ------------ | > | xzr | | > ------------ | | But the stack-pointer is decreased before use. So it actually looks | like this: | | > ------------ | > | | <- irq_stack_ptr | > top ------------ | > | dummy_lr | | > ------------ | > | x29 | <- irq_stack_ptr - 0x10 | > ------------ | > | x19 | | > ------------ | > | xzr | <- irq_stack_ptr - 0x20 | > ------------ | | The value being used as the original stack is x29, which in all the | tests is sp but without the current frames data, hence there are no | missing frames in the output. | | Jungseok Lee picked it up with a 32bit user space because aarch32 | can't use x29, so it remains 0 forever. The fix he posted is correct. This patch fixes the macro and adds some of this wisdom to a comment, so that the layout of the IRQ stack is well understood. Cc: James Morse Reported-by: Jungseok Lee Signed-off-by: Will Deacon (cherry picked from commit 7596abf2e5661d52c4f414f37addeed54e098880) Signed-off-by: Alex Shi --- arch/arm64/include/asm/irq.h | 20 ++++++++++++++++++-- arch/arm64/kernel/entry.S | 2 +- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index fa2a8d0e4792..877c7e358384 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -19,7 +19,23 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); /* * The highest address on the stack, and the first to be used. Used to - * find the dummy-stack frame put down by el?_irq() in entry.S. + * find the dummy-stack frame put down by el?_irq() in entry.S, which + * is structured as follows: + * + * ------------ + * | | <- irq_stack_ptr + * top ------------ + * | elr_el1 | + * ------------ + * | x29 | <- irq_stack_ptr - 0x10 + * ------------ + * | xzr | + * ------------ + * | x19 | <- irq_stack_ptr - 0x20 + * ------------ + * + * where x19 holds a copy of the task stack pointer. + * */ #define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP) @@ -27,7 +43,7 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); * The offset from irq_stack_ptr where entry.S will store the original * stack pointer. Used by unwind_frame() and dump_backtrace(). */ -#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x10)); +#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x20)); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 8f7e737949fe..be7ec544b540 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -199,7 +199,7 @@ alternative_endif /* Add a dummy stack frame */ stp x29, \dummy_lr, [sp, #-16]! // dummy stack frame mov x29, sp - stp xzr, x19, [sp, #-16]! + stp x19, xzr, [sp, #-16]! 9998: .endm From 95e1db8bd78d2b3f15f7d4e7896735a041c775f6 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 10 Dec 2015 10:22:39 +0000 Subject: [PATCH 624/797] arm64: Add this_cpu_ptr() assembler macro for use in entry.S irq_stack is a per_cpu variable, that needs to be access from entry.S. Use an assembler macro instead of the unreadable details. Signed-off-by: James Morse Signed-off-by: Will Deacon (cherry picked from commit aa4d5d3cbc258c355151a3903211b27359390ec5) Signed-off-by: Alex Shi --- arch/arm64/include/asm/assembler.h | 11 +++++++++++ arch/arm64/kernel/entry.S | 4 +--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 12eff928ef8b..bb7b72734c24 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -193,6 +193,17 @@ lr .req x30 // link register str \src, [\tmp, :lo12:\sym] .endm + /* + * @sym: The name of the per-cpu variable + * @reg: Result of per_cpu(sym, smp_processor_id()) + * @tmp: scratch register + */ + .macro this_cpu_ptr, sym, reg, tmp + adr_l \reg, \sym + mrs \tmp, tpidr_el1 + add \reg, \reg, \tmp + .endm + /* * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index be7ec544b540..e394f8c9595a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -179,9 +179,7 @@ alternative_endif .macro irq_stack_entry, dummy_lr mov x19, sp // preserve the original sp - adr_l x25, irq_stack - mrs x26, tpidr_el1 - add x25, x25, x26 + this_cpu_ptr irq_stack, x25, x26 /* * Check the lowest address on irq_stack for the irq_count value, From e330d15430acce6073bb2c8486fba7555be1e923 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 10 Dec 2015 10:22:40 +0000 Subject: [PATCH 625/797] arm64: when walking onto the task stack, check sp & fp are in current->stack When unwind_frame() reaches the bottom of the irq_stack, the last fp points to the original task stack. unwind_frame() uses IRQ_STACK_TO_TASK_STACK() to find the sp value. If either values is wrong, we may end up walking a corrupt stack. Check these values are sane by testing if they are both on the stack pointed to by current->stack. Signed-off-by: James Morse Signed-off-by: Will Deacon (cherry picked from commit 1ffe199b1c9b72a8e752a9ae2a7af10128ab2ca1) Signed-off-by: Alex Shi --- arch/arm64/kernel/stacktrace.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index b947eeffa5b2..d916d5b6aef6 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -71,9 +71,17 @@ int notrace unwind_frame(struct stackframe *frame) * to task stack. * If we reach the end of the stack - and its an interrupt stack, * read the original task stack pointer from the dummy frame. + * + * Check the frame->fp we read from the bottom of the irq_stack, + * and the original task stack pointer are both in current->stack. */ - if (frame->sp == irq_stack_ptr) - frame->sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + if (frame->sp == irq_stack_ptr) { + unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + + if(object_is_on_stack((void *)orig_sp) && + object_is_on_stack((void *)frame->fp)) + frame->sp = orig_sp; + } return 0; } From 70dfc6968ad22e057520da92b7b4da86041d3ea7 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 10 Dec 2015 10:22:41 +0000 Subject: [PATCH 626/797] arm64: don't call C code with el0's fp register On entry from el0, we save all the registers on the kernel stack, and restore them before returning. x29 remains unchanged when we call out to C code, which will store x29 as the frame-pointer on the stack. Instead, write 0 into x29 after entry from el0, to avoid any risk of tracing into user space. Signed-off-by: James Morse Signed-off-by: Will Deacon (cherry picked from commit 49003a8d6b35e128ef5e51433e60e783a46fbe5f) Signed-off-by: Alex Shi --- arch/arm64/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e394f8c9595a..2284c296e3f7 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -93,6 +93,8 @@ and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. + + mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE .endif From 2949f7a8a1516b704750cd343aebb36de2428d38 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Dec 2015 12:44:36 +0000 Subject: [PATCH 627/797] arm64: mm: remove pointless PAGE_MASKing As pgd_offset{,_k} shift the input address by PGDIR_SHIFT, the sub-page bits will always be shifted out. There is no need to apply PAGE_MASK before this. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Will Deacon (cherry picked from commit e2c30ee320eb96304896c7ab84499e5bc5e5fb6e) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d2a6194f4bec..c5bd5bca8e3d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -288,7 +288,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, size, prot, early_alloc); } @@ -309,7 +309,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, return; } - return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + return __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, size, prot, late_alloc); } From a2151a0e23afcaf1fbb8f2e63bf2335f61e12172 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Dec 2015 12:44:37 +0000 Subject: [PATCH 628/797] arm64: Remove redundant padding from linker script Currently we place an ALIGN_DEBUG_RO between text and data for the .text and .init sections, and depending on configuration each of these may result in up to SECTION_SIZE bytes worth of padding (for DEBUG_RODATA_ALIGN). We make no distinction between the text and data in each of these sections at any point when creating the initial page tables in head.S. We also make no distinction when modifying the tables; __map_memblock, fixup_executable, mark_rodata_ro, and fixup_init only work at section granularity. Thus this padding is unnecessary. For the spit between init text and data we impose a minimum alignment of 16 bytes, but this is also unnecessary. The init data is output immediately after the padding before any symbols are defined, so this is not required to keep a symbol for linker a section array correctly associated with the data. Any objects within the section will be given at least their usual alignment regardless. This patch removes the redundant padding. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Will Deacon (cherry picked from commit 5b28cd9d084eca8ddc46270d2720305bfd40e348) Signed-off-by: Alex Shi --- arch/arm64/kernel/vmlinux.lds.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 71426a78db12..cc2572db32a6 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -113,7 +113,6 @@ SECTIONS *(.got) /* Global offset table */ } - ALIGN_DEBUG_RO RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) NOTES @@ -128,7 +127,6 @@ SECTIONS ARM_EXIT_KEEP(EXIT_TEXT) } - ALIGN_DEBUG_RO_MIN(16) .init.data : { INIT_DATA INIT_SETUP(16) From 9250d09be9e3f467145584fd476a6132e04ddaef Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Dec 2015 12:44:38 +0000 Subject: [PATCH 629/797] arm64: mm: fold alternatives into .init Currently we treat the alternatives separately from other data that's only used during initialisation, using separate .altinstructions and .altinstr_replacement linker sections. These are freed for general allocation separately from .init*. This is problematic as: * We do not remove execute permissions, as we do for .init, leaving the memory executable. * We pad between them, making the kernel Image bianry up to PAGE_SIZE bytes larger than necessary. This patch moves the two sections into the contiguous region used for .init*. This saves some memory, ensures that we remove execute permissions, and allows us to remove some code made redundant by this reorganisation. Signed-off-by: Mark Rutland Cc: Andre Przywara Cc: Catalin Marinas Cc: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Will Deacon (cherry picked from commit 9aa4ec1571da62366cfddc20f3b923609604fe63) Signed-off-by: Alex Shi --- arch/arm64/include/asm/alternative.h | 1 - arch/arm64/kernel/alternative.c | 6 ------ arch/arm64/kernel/vmlinux.lds.S | 5 ++--- arch/arm64/mm/init.c | 1 - 4 files changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index d56ec0715157..e4962f04201e 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -19,7 +19,6 @@ struct alt_instr { void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -void free_alternatives_memory(void); #define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index ab9db0e9818c..d2ee1b21a10d 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -158,9 +158,3 @@ void apply_alternatives(void *start, size_t length) __apply_alternatives(®ion); } - -void free_alternatives_memory(void) -{ - free_reserved_area(__alt_instructions, __alt_instructions_end, - 0, "alternatives"); -} diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index cc2572db32a6..e3928f578891 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -141,9 +141,6 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(PAGE_SIZE); - __init_end = .; - . = ALIGN(4); .altinstructions : { __alt_instructions = .; @@ -155,6 +152,8 @@ SECTIONS } . = ALIGN(PAGE_SIZE); + __init_end = .; + _data = .; _sdata = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 10fab52eed95..dba32ceff17a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -360,7 +360,6 @@ void free_initmem(void) { fixup_init(); free_initmem_default(0); - free_alternatives_memory(); } #ifdef CONFIG_BLK_DEV_INITRD From c92251d1a82c7c071662ca90300ebf488ab3d6f1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 10 Dec 2015 16:54:32 +0000 Subject: [PATCH 630/797] arm64: cmpxchg: Don't incldue linux/mmdebug.h The arm64 asm/cmpxchg.h includes linux/mmdebug.h but doesn't so far as I can tell actually use anything from it. Removing the inclusion reduces spurious header dependency rebuilds and also avoids issues with recursive inclusions of headers causing build breaks due to attempts to use things before they are defined if linux/mmdebug.h starts pulling in more low level headers. Such errors have happened in -next recently, for example: In file included from include/linux/completion.h:11:0, from include/linux/rcupdate.h:43, from include/linux/tracepoint.h:19, from include/linux/mmdebug.h:6, from ./arch/arm64/include/asm/cmpxchg.h:22, from ./arch/arm64/include/asm/atomic.h:41, from include/linux/atomic.h:4, from include/linux/spinlock.h:406, from include/linux/seqlock.h:35, from include/linux/time.h:5, from include/uapi/linux/timex.h:56, from include/linux/timex.h:56, from include/linux/sched.h:19, from arch/arm64/kernel/asm-offsets.c:21: include/linux/wait.h: In function 'wait_on_atomic_t': include/linux/wait.h:1218:2: error: implicit declaration of function 'atomic_read' [-Werror=implicit-function-declaration] if (atomic_read(val) == 0) Signed-off-by: Mark Brown Signed-off-by: Will Deacon (cherry picked from commit 4a6ccf30263f4e265c0f171561bf4c40bed5f273) Signed-off-by: Alex Shi --- arch/arm64/include/asm/cmpxchg.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 9ea611ea69df..510c7b404454 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -19,7 +19,6 @@ #define __ASM_CMPXCHG_H #include -#include #include #include From 9429ab599551a430b8e6d9d8bfccfc9f31288211 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 11 Dec 2015 11:04:31 +0000 Subject: [PATCH 631/797] arm64: mm: place __cpu_setup in .text We drop __cpu_setup in .text.init, which ends up being part of .text. The .text.init section was a legacy section name which has been unused elsewhere for a long time. The ".text.init" name is misleading if read as a synonym for ".init.text". Any CPU may execute __cpu_setup before turning the MMU on, so it should simply live in .text. Remove the pointless section assignment. This will leave __cpu_setup in the .text section. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon (cherry picked from commit f00083cae331e5d3eecade6b4fdc35d0825e73ef) Signed-off-by: Alex Shi --- arch/arm64/mm/proc.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index b8f04b3f2786..c164d2cb35c0 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -140,8 +140,6 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) - .section ".text.init", #alloc, #execinstr - /* * __cpu_setup * From 7ae24aa87b5b0c58d378b6fc8fb0d8e72fbfd81d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 17 Nov 2015 14:45:47 +0000 Subject: [PATCH 632/797] arm64: Documentation: add list of software workarounds for errata It's not immediately obvious which hardware errata are worked around in the Linux kernel for an arbitrary kernel tree, so add a file to keep track of what we're working around. Acked-by: Catalin Marinas Signed-off-by: Will Deacon (cherry picked from commit 9cb9c9e5ba8453537e8e645318edf231fe54eaf9) Signed-off-by: Alex Shi --- Documentation/arm64/silicon-errata.txt | 58 ++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 Documentation/arm64/silicon-errata.txt diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt new file mode 100644 index 000000000000..58b71ddf9b60 --- /dev/null +++ b/Documentation/arm64/silicon-errata.txt @@ -0,0 +1,58 @@ + Silicon Errata and Software Workarounds + ======================================= + +Author: Will Deacon +Date : 27 November 2015 + +It is an unfortunate fact of life that hardware is often produced with +so-called "errata", which can cause it to deviate from the architecture +under specific circumstances. For hardware produced by ARM, these +errata are broadly classified into the following categories: + + Category A: A critical error without a viable workaround. + Category B: A significant or critical error with an acceptable + workaround. + Category C: A minor error that is not expected to occur under normal + operation. + +For more information, consult one of the "Software Developers Errata +Notice" documents available on infocenter.arm.com (registration +required). + +As far as Linux is concerned, Category B errata may require some special +treatment in the operating system. For example, avoiding a particular +sequence of code, or configuring the processor in a particular way. A +less common situation may require similar actions in order to declassify +a Category A erratum into a Category C erratum. These are collectively +known as "software workarounds" and are only required in the minority of +cases (e.g. those cases that both require a non-secure workaround *and* +can be triggered by Linux). + +For software workarounds that may adversely impact systems unaffected by +the erratum in question, a Kconfig entry is added under "Kernel +Features" -> "ARM errata workarounds via the alternatives framework". +These are enabled by default and patched in at runtime when an affected +CPU is detected. For less-intrusive workarounds, a Kconfig option is not +available and the code is structured (preferably with a comment) in such +a way that the erratum will not be hit. + +This approach can make it slightly onerous to determine exactly which +errata are worked around in an arbitrary kernel source tree, so this +file acts as a registry of software workarounds in the Linux Kernel and +will be updated when new workarounds are committed and backported to +stable kernels. + +| Implementor | Component | Erratum ID | Kconfig | ++----------------+-----------------+-----------------+-------------------------+ +| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | +| ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 | +| ARM | Cortex-A53 | #819472 | ARM64_ERRATUM_819472 | +| ARM | Cortex-A53 | #845719 | ARM64_ERRATUM_845719 | +| ARM | Cortex-A53 | #843419 | ARM64_ERRATUM_843419 | +| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | +| ARM | Cortex-A57 | #852523 | N/A | +| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | +| | | | | +| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | +| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | From a79c216b06b18f1545d55e8f238dd9b49896f347 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 15 Dec 2015 11:21:25 +0000 Subject: [PATCH 633/797] arm64: reduce stack use in irq_handler The code for switching to irq_stack stores three pieces of information on the stack, fp+lr, as a fake stack frame (that lets us walk back onto the interrupted tasks stack frame), and the address of the struct pt_regs that contains the register values from kernel entry. (which dump_backtrace() will print in any stack trace). To reduce this, we store fp, and the pointer to the struct pt_regs. unwind_frame() can recognise this as the irq_stack dummy frame, (as it only appears at the top of the irq_stack), and use the struct pt_regs values to find the missing interrupted link-register. Suggested-by: Will Deacon Signed-off-by: James Morse Signed-off-by: Will Deacon (cherry picked from commit 971c67ce37cfeeaf560e792a2c3bc21d8b67163a) Signed-off-by: Alex Shi --- arch/arm64/include/asm/irq.h | 11 ++++------- arch/arm64/kernel/entry.S | 12 +++++++----- arch/arm64/kernel/stacktrace.c | 19 ++++++++++++++++--- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 877c7e358384..3bece4379bd9 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -25,16 +25,13 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); * ------------ * | | <- irq_stack_ptr * top ------------ - * | elr_el1 | + * | x19 | <- irq_stack_ptr - 0x08 * ------------ * | x29 | <- irq_stack_ptr - 0x10 * ------------ - * | xzr | - * ------------ - * | x19 | <- irq_stack_ptr - 0x20 - * ------------ * - * where x19 holds a copy of the task stack pointer. + * where x19 holds a copy of the task stack pointer where the struct pt_regs + * from kernel_entry can be found. * */ #define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP) @@ -43,7 +40,7 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); * The offset from irq_stack_ptr where entry.S will store the original * stack pointer. Used by unwind_frame() and dump_backtrace(). */ -#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x20)); +#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08))) extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2284c296e3f7..0667fb7d8bb1 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -178,7 +178,7 @@ alternative_endif mrs \rd, sp_el0 .endm - .macro irq_stack_entry, dummy_lr + .macro irq_stack_entry mov x19, sp // preserve the original sp this_cpu_ptr irq_stack, x25, x26 @@ -196,10 +196,12 @@ alternative_endif add x26, x25, x26 mov sp, x26 - /* Add a dummy stack frame */ - stp x29, \dummy_lr, [sp, #-16]! // dummy stack frame + /* + * Add a dummy stack frame, this non-standard format is fixed up + * by unwind_frame() + */ + stp x29, x19, [sp, #-16]! mov x29, sp - stp x19, xzr, [sp, #-16]! 9998: .endm @@ -229,7 +231,7 @@ tsk .req x28 // current thread_info .macro irq_handler ldr_l x1, handle_arch_irq mov x0, sp - irq_stack_entry x22 + irq_stack_entry blr x1 irq_stack_exit .endm diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d916d5b6aef6..b9fd3a8abfc1 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -70,17 +70,30 @@ int notrace unwind_frame(struct stackframe *frame) * Check whether we are going to walk through from interrupt stack * to task stack. * If we reach the end of the stack - and its an interrupt stack, - * read the original task stack pointer from the dummy frame. + * unpack the dummy frame to find the original elr. * * Check the frame->fp we read from the bottom of the irq_stack, * and the original task stack pointer are both in current->stack. */ if (frame->sp == irq_stack_ptr) { + struct pt_regs *irq_args; unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); - if(object_is_on_stack((void *)orig_sp) && - object_is_on_stack((void *)frame->fp)) + if (object_is_on_stack((void *)orig_sp) && + object_is_on_stack((void *)frame->fp)) { frame->sp = orig_sp; + + /* orig_sp is the saved pt_regs, find the elr */ + irq_args = (struct pt_regs *)orig_sp; + frame->pc = irq_args->pc; + } else { + /* + * This frame has a non-standard format, and we + * didn't fix it, because the data looked wrong. + * Refuse to output this frame. + */ + return -EINVAL; + } } return 0; From ac7406c28c8bada863d36c46ca246bb7b76f3e9f Mon Sep 17 00:00:00 2001 From: Ashok Kumar Date: Thu, 17 Dec 2015 01:38:31 -0800 Subject: [PATCH 634/797] arm64: Defer dcache flush in __cpu_copy_user_page Defer dcache flushing to __sync_icache_dcache by calling flush_dcache_page which clears PG_dcache_clean flag. Acked-by: Catalin Marinas Signed-off-by: Ashok Kumar Signed-off-by: Will Deacon (cherry picked from commit e6b1185f77351aa154e63bd54b05d07ff99d4ffa) Signed-off-by: Alex Shi --- arch/arm64/mm/copypage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 13bbc3be6f5a..22e4cb4d6f53 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -24,8 +24,9 @@ void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) { + struct page *page = virt_to_page(kto); copy_page(kto, kfrom); - __flush_dcache_area(kto, PAGE_SIZE); + flush_dcache_page(page); } EXPORT_SYMBOL_GPL(__cpu_copy_user_page); From 358e3c80a223c4d79a786be2e71e51cab91c2e7e Mon Sep 17 00:00:00 2001 From: Ashok Kumar Date: Thu, 17 Dec 2015 01:38:32 -0800 Subject: [PATCH 635/797] arm64: Use PoU cache instr for I/D coherency In systems with three levels of cache(PoU at L1 and PoC at L3), PoC cache flush instructions flushes L2 and L3 caches which could affect performance. For cache flushes for I and D coherency, PoU should suffice. So changing all I and D coherency related cache flushes to PoU. Introduced a new __clean_dcache_area_pou API for dcache flush till PoU and provided a common macro for __flush_dcache_area and __clean_dcache_area_pou. Also, now in __sync_icache_dcache, icache invalidation for non-aliasing VIPT icache is done only for that particular page instead of the earlier __flush_icache_all. Reviewed-by: Catalin Marinas Reviewed-by: Mark Rutland Signed-off-by: Ashok Kumar Signed-off-by: Will Deacon (cherry picked from commit 0a28714c53fd4f7aea709be7577dfbe0095c8c3e) Signed-off-by: Alex Shi Conflicts: included reset_pmuserenr_el0 in arch/arm64/mm/proc-macros.S --- arch/arm64/include/asm/cacheflush.h | 1 + arch/arm64/mm/cache.S | 28 ++++++++++++++---------- arch/arm64/mm/flush.c | 33 ++++++++++++++++------------- arch/arm64/mm/proc-macros.S | 22 +++++++++++++++++++ 4 files changed, 58 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 54efedaf331f..7fc294c3bc5b 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -68,6 +68,7 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); +extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); static inline void flush_cache_mm(struct mm_struct *mm) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index cfa44a6adc0a..6df07069a025 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -81,25 +81,31 @@ ENDPROC(__flush_cache_user_range) /* * __flush_dcache_area(kaddr, size) * - * Ensure that the data held in the page kaddr is written back to the - * page in question. + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned and invalidated to the PoC. * * - kaddr - kernel address * - size - size in question */ ENTRY(__flush_dcache_area) - dcache_line_size x2, x3 - add x1, x0, x1 - sub x3, x2, #1 - bic x0, x0, x3 -1: dc civac, x0 // clean & invalidate D line / unified line - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy + dcache_by_line_op civac, sy, x0, x1, x2, x3 ret ENDPIPROC(__flush_dcache_area) +/* + * __clean_dcache_area_pou(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned to the PoU. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__clean_dcache_area_pou) + dcache_by_line_op cvau, ish, x0, x1, x2, x3 + ret +ENDPROC(__clean_dcache_area_pou) + /* * __inval_cache_range(start, end) * - start - start address of region diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index c26b804015e8..46649d6e6c5a 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -34,19 +34,24 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, __flush_icache_all(); } +static void sync_icache_aliases(void *kaddr, unsigned long len) +{ + unsigned long addr = (unsigned long)kaddr; + + if (icache_is_aliasing()) { + __clean_dcache_area_pou(kaddr, len); + __flush_icache_all(); + } else { + flush_icache_range(addr, addr + len); + } +} + static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *kaddr, unsigned long len) { - if (vma->vm_flags & VM_EXEC) { - unsigned long addr = (unsigned long)kaddr; - if (icache_is_aliasing()) { - __flush_dcache_area(kaddr, len); - __flush_icache_all(); - } else { - flush_icache_range(addr, addr + len); - } - } + if (vma->vm_flags & VM_EXEC) + sync_icache_aliases(kaddr, len); } /* @@ -74,13 +79,11 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) if (!page_mapping(page)) return; - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_area(page_address(page), - PAGE_SIZE << compound_order(page)); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + sync_icache_aliases(page_address(page), + PAGE_SIZE << compound_order(page)); + else if (icache_is_aivivt()) __flush_icache_all(); - } else if (icache_is_aivivt()) { - __flush_icache_all(); - } } /* diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index d69dffffaa89..984edcda1850 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -74,3 +74,25 @@ msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [kaddr, kaddr + size) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * kaddr: starting virtual address of the region + * size: size of the region + * Corrupts: kaddr, size, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + dcache_line_size \tmp1, \tmp2 + add \size, \kaddr, \size + sub \tmp2, \tmp1, #1 + bic \kaddr, \kaddr, \tmp2 +9998: dc \op, \kaddr + add \kaddr, \kaddr, \tmp1 + cmp \kaddr, \size + b.lo 9998b + dsb \domain + .endm From 720089ef0ba8007c34dfa7d80d96e27d6d23088f Mon Sep 17 00:00:00 2001 From: David Woods Date: Thu, 17 Dec 2015 14:31:26 -0500 Subject: [PATCH 636/797] arm64: hugetlb: add support for PTE contiguous bit The arm64 MMU supports a Contiguous bit which is a hint that the TTE is one of a set of contiguous entries which can be cached in a single TLB entry. Supporting this bit adds new intermediate huge page sizes. The set of huge page sizes available depends on the base page size. Without using contiguous pages the huge page sizes are as follows. 4KB: 2MB 1GB 64KB: 512MB With a 4KB granule, the contiguous bit groups together sets of 16 pages and with a 64KB granule it groups sets of 32 pages. This enables two new huge page sizes in each case, so that the full set of available sizes is as follows. 4KB: 64KB 2MB 32MB 1GB 64KB: 2MB 512MB 16GB If a 16KB granule is used then the contiguous bit groups 128 pages at the PTE level and 32 pages at the PMD level. If the base page size is set to 64KB then 2MB pages are enabled by default. It is possible in the future to make 2MB the default huge page size for both 4KB and 64KB granules. Reviewed-by: Chris Metcalf Reviewed-by: Steve Capper Signed-off-by: David Woods Signed-off-by: Will Deacon (cherry picked from commit 66b3923a1a0f77a563b43f43f6ad091354abbfe9) Signed-off-by: Alex Shi --- arch/arm64/Kconfig | 3 - arch/arm64/include/asm/hugetlb.h | 44 ++-- arch/arm64/include/asm/pgtable-hwdef.h | 18 +- arch/arm64/include/asm/pgtable.h | 10 +- arch/arm64/mm/hugetlbpage.c | 274 ++++++++++++++++++++++++- include/linux/hugetlb.h | 2 - 6 files changed, 313 insertions(+), 38 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4876459c0838..ffa3c549a4ba 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -530,9 +530,6 @@ config HW_PERF_EVENTS config SYS_SUPPORTS_HUGETLBFS def_bool y -config ARCH_WANT_GENERAL_HUGETLB - def_bool y - config ARCH_WANT_HUGE_PMD_SHARE def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index bb4052e85dba..bbc1e35aa601 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -26,36 +26,7 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - set_pte_at(mm, addr, ptep, pte); -} -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - ptep_clear_flush(vma, addr, ptep); -} - -static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - ptep_set_wrprotect(mm, addr, ptep); -} - -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - return ptep_get_and_clear(mm, addr, ptep); -} - -static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) -{ - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); -} static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, @@ -97,4 +68,19 @@ static inline void arch_clear_hugepage_flags(struct page *page) clear_bit(PG_dcache_clean, &page->flags); } +extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable); +#define arch_make_huge_pte arch_make_huge_pte +extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty); +extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); +extern void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); +extern void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d6739e836f7b..5c25b831273d 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -90,7 +90,23 @@ /* * Contiguous page definitions. */ -#define CONT_PTES (_AC(1, UL) << CONT_SHIFT) +#ifdef CONFIG_ARM64_64K_PAGES +#define CONT_PTE_SHIFT 5 +#define CONT_PMD_SHIFT 5 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define CONT_PTE_SHIFT 7 +#define CONT_PMD_SHIFT 5 +#else +#define CONT_PTE_SHIFT 4 +#define CONT_PMD_SHIFT 4 +#endif + +#define CONT_PTES (1 << CONT_PTE_SHIFT) +#define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) +#define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) +#define CONT_PMDS (1 << CONT_PMD_SHIFT) +#define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) +#define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) /* the the numerical offset of the PTE within a range of CONT_PTES */ #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index cd5dfc97268e..fd3d7c177c5f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -228,7 +228,8 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline pte_t pte_mkcont(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_CONT)); + pte = set_pte_bit(pte, __pgprot(PTE_CONT)); + return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE)); } static inline pte_t pte_mknoncont(pte_t pte) @@ -236,6 +237,11 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } +static inline pmd_t pmd_mkcont(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | PMD_SECT_CONT); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; @@ -309,7 +315,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Hugetlb definitions. */ -#define HUGE_MAX_HSTATE 2 +#define HUGE_MAX_HSTATE 4 #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 383b03ff38f8..82d607c3614e 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -41,17 +41,289 @@ int pud_huge(pud_t pud) #endif } +static int find_num_contig(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, size_t *pgsize) +{ + pgd_t *pgd = pgd_offset(mm, addr); + pud_t *pud; + pmd_t *pmd; + + *pgsize = PAGE_SIZE; + if (!pte_cont(pte)) + return 1; + if (!pgd_present(*pgd)) { + VM_BUG_ON(!pgd_present(*pgd)); + return 1; + } + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) { + VM_BUG_ON(!pud_present(*pud)); + return 1; + } + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + VM_BUG_ON(!pmd_present(*pmd)); + return 1; + } + if ((pte_t *)pmd == ptep) { + *pgsize = PMD_SIZE; + return CONT_PMDS; + } + return CONT_PTES; +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + size_t pgsize; + int i; + int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize); + unsigned long pfn; + pgprot_t hugeprot; + + if (ncontig == 1) { + set_pte_at(mm, addr, ptep, pte); + return; + } + + pfn = pte_pfn(pte); + hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); + for (i = 0; i < ncontig; i++) { + pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep, + pte_val(pfn_pte(pfn, hugeprot))); + set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); + ptep++; + pfn += pgsize >> PAGE_SHIFT; + addr += pgsize; + } +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return NULL; + + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + } else if (sz == (PAGE_SIZE * CONT_PTES)) { + pmd_t *pmd = pmd_alloc(mm, pud, addr); + + WARN_ON(addr & (sz - 1)); + /* + * Note that if this code were ever ported to the + * 32-bit arm platform then it will cause trouble in + * the case where CONFIG_HIGHPTE is set, since there + * will be no pte_unmap() to correspond with this + * pte_alloc_map(). + */ + pte = pte_alloc_map(mm, NULL, pmd, addr); + } else if (sz == PMD_SIZE) { + if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && + pud_none(*pud)) + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + } else if (sz == (PMD_SIZE * CONT_PMDS)) { + pmd_t *pmd; + + pmd = pmd_alloc(mm, pud, addr); + WARN_ON(addr & (sz - 1)); + return (pte_t *)pmd; + } + + pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr, + sz, pte, pte_val(*pte)); + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); + if (!pgd_present(*pgd)) + return NULL; + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + return NULL; + + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + return NULL; + + if (pte_cont(pmd_pte(*pmd))) { + pmd = pmd_offset( + pud, (addr & CONT_PMD_MASK)); + return (pte_t *)pmd; + } + if (pmd_huge(*pmd)) + return (pte_t *)pmd; + pte = pte_offset_kernel(pmd, addr); + if (pte_present(*pte) && pte_cont(*pte)) { + pte = pte_offset_kernel( + pmd, (addr & CONT_PTE_MASK)); + return pte; + } + return NULL; +} + +pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + size_t pagesize = huge_page_size(hstate_vma(vma)); + + if (pagesize == CONT_PTE_SIZE) { + entry = pte_mkcont(entry); + } else if (pagesize == CONT_PMD_SIZE) { + entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); + } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); + } + return entry; +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte; + + if (pte_cont(*ptep)) { + int ncontig, i; + size_t pgsize; + pte_t *cpte; + bool is_dirty = false; + + cpte = huge_pte_offset(mm, addr); + ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); + /* save the 1st pte to return */ + pte = ptep_get_and_clear(mm, addr, cpte); + for (i = 1; i < ncontig; ++i) { + /* + * If HW_AFDBM is enabled, then the HW could + * turn on the dirty bit for any of the page + * in the set, so check them all. + */ + ++cpte; + if (pte_dirty(ptep_get_and_clear(mm, addr, cpte))) + is_dirty = true; + } + if (is_dirty) + return pte_mkdirty(pte); + else + return pte; + } else { + return ptep_get_and_clear(mm, addr, ptep); + } +} + +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + pte_t *cpte; + + if (pte_cont(pte)) { + int ncontig, i, changed = 0; + size_t pgsize = 0; + unsigned long pfn = pte_pfn(pte); + /* Select all bits except the pfn */ + pgprot_t hugeprot = + __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ + pte_val(pte)); + + cpte = huge_pte_offset(vma->vm_mm, addr); + pfn = pte_pfn(*cpte); + ncontig = find_num_contig(vma->vm_mm, addr, cpte, + *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) { + changed = ptep_set_access_flags(vma, addr, cpte, + pfn_pte(pfn, + hugeprot), + dirty); + pfn += pgsize >> PAGE_SHIFT; + } + return changed; + } else { + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + } +} + +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + if (pte_cont(*ptep)) { + int ncontig, i; + pte_t *cpte; + size_t pgsize = 0; + + cpte = huge_pte_offset(mm, addr); + ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) + ptep_set_wrprotect(mm, addr, cpte); + } else { + ptep_set_wrprotect(mm, addr, ptep); + } +} + +void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + if (pte_cont(*ptep)) { + int ncontig, i; + pte_t *cpte; + size_t pgsize = 0; + + cpte = huge_pte_offset(vma->vm_mm, addr); + ncontig = find_num_contig(vma->vm_mm, addr, cpte, + *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) + ptep_clear_flush(vma, addr, cpte); + } else { + ptep_clear_flush(vma, addr, ptep); + } +} + static __init int setup_hugepagesz(char *opt) { unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else if (ps == (PAGE_SIZE * CONT_PTES)) { + hugetlb_add_hstate(CONT_PTE_SHIFT); + } else if (ps == (PMD_SIZE * CONT_PMDS)) { + hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); } else { - pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); return 0; } return 1; } __setup("hugepagesz=", setup_hugepagesz); + +#ifdef CONFIG_ARM64_64K_PAGES +static __init int add_default_hugepagesz(void) +{ + if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) + hugetlb_add_hstate(CONT_PMD_SHIFT); + return 0; +} +arch_initcall(add_default_hugepagesz); +#endif diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 685c262e0be8..b0eb06423d5e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -96,9 +96,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, struct address_space *mapping, pgoff_t idx, unsigned long address); -#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); -#endif extern int hugepages_treat_as_movable; extern int sysctl_hugetlb_shm_group; From da604e8646f553a4a2c7613491112b0c9b3ec400 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 18 Dec 2015 16:01:47 +0000 Subject: [PATCH 637/797] arm64: remove irq_count and do_softirq_own_stack() sysrq_handle_reboot() re-enables interrupts while on the irq stack. The irq_stack implementation wrongly assumed this would only ever happen via the softirq path, allowing it to update irq_count late, in do_softirq_own_stack(). This means if an irq occurs in sysrq_handle_reboot(), during emergency_restart() the stack will be corrupted, as irq_count wasn't updated. Lose the optimisation, and instead of moving the adding/subtracting of irq_count into irq_stack_entry/irq_stack_exit, remove it, and compare sp_el0 (struct thread_info) with sp & ~(THREAD_SIZE - 1). This tells us if we are on a task stack, if so, we can safely switch to the irq stack. Finally, remove do_softirq_own_stack(), we don't need it anymore. Reported-by: Will Deacon Signed-off-by: James Morse [will: use get_thread_info macro] Signed-off-by: Will Deacon (cherry picked from commit d224a69e3d80fe08f285d1f41d21b590bae4fa9f) Signed-off-by: Alex Shi --- arch/arm64/include/asm/irq.h | 2 -- arch/arm64/kernel/entry.S | 19 +++++++++--------- arch/arm64/kernel/irq.c | 38 +----------------------------------- 3 files changed, 11 insertions(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 3bece4379bd9..b77197d941fc 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -11,8 +11,6 @@ #include #include -#define __ARCH_HAS_DO_SOFTIRQ - struct pt_regs; DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 0667fb7d8bb1..c0db321db7e1 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -181,19 +181,20 @@ alternative_endif .macro irq_stack_entry mov x19, sp // preserve the original sp - this_cpu_ptr irq_stack, x25, x26 - /* - * Check the lowest address on irq_stack for the irq_count value, - * incremented by do_softirq_own_stack if we have re-enabled irqs - * while on the irq_stack. + * Compare sp with the current thread_info, if the top + * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and + * should switch to the irq stack. */ - ldr x26, [x25] - cbnz x26, 9998f // recursive use? + and x25, x19, #~(THREAD_SIZE - 1) + cmp x25, tsk + b.ne 9998f - /* switch to the irq stack */ + this_cpu_ptr irq_stack, x25, x26 mov x26, #IRQ_STACK_START_SP add x26, x25, x26 + + /* switch to the irq stack */ mov sp, x26 /* @@ -405,10 +406,10 @@ el1_irq: bl trace_hardirqs_off #endif + get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT - get_thread_info tsk ldr w24, [tsk, #TI_PREEMPT] // get preempt count cbnz w24, 1f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index ff7ebb710e51..2386b26c0712 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -25,24 +25,14 @@ #include #include #include -#include #include #include unsigned long irq_err_count; -/* - * irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. - * irq_stack[0] is used as irq_count, a non-zero value indicates the stack - * is in use, and el?_irq() shouldn't switch to it. This is used to detect - * recursive use of the irq_stack, it is lazily updated by - * do_softirq_own_stack(), which is called on the irq_stack, before - * re-enabling interrupts to process softirqs. - */ +/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */ DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16); -#define IRQ_COUNT() (*per_cpu(irq_stack, smp_processor_id())) - int arch_show_interrupts(struct seq_file *p, int prec) { show_ipi_list(p, prec); @@ -66,29 +56,3 @@ void __init init_IRQ(void) if (!handle_arch_irq) panic("No interrupt controller found."); } - -/* - * do_softirq_own_stack() is called from irq_exit() before __do_softirq() - * re-enables interrupts, at which point we may re-enter el?_irq(). We - * increase irq_count here so that el1_irq() knows that it is already on the - * irq stack. - * - * Called with interrupts disabled, so we don't worry about moving cpu, or - * being interrupted while modifying irq_count. - * - * This function doesn't actually switch stack. - */ -void do_softirq_own_stack(void) -{ - int cpu = smp_processor_id(); - - WARN_ON_ONCE(!irqs_disabled()); - - if (on_irq_stack(current_stack_pointer, cpu)) { - IRQ_COUNT()++; - __do_softirq(); - IRQ_COUNT()--; - } else { - __do_softirq(); - } -} From 76f2d0af233200abc487122d0002f3c14d796676 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Tue, 15 Dec 2015 17:33:39 +0900 Subject: [PATCH 638/797] arm64: ftrace: modify a stack frame in a safe way Function graph tracer modifies a return address (LR) in a stack frame by calling ftrace_prepare_return() in a traced function's function prologue. The current code does this modification before preserving an original address at ftrace_push_return_trace() and there is always a small window of inconsistency when an interrupt occurs. This doesn't matter, as far as an interrupt stack is introduced, because stack tracer won't be invoked in an interrupt context. But it would be better to proactively minimize such a window by moving the LR modification after ftrace_push_return_trace(). Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon (cherry picked from commit 79fdee9b6355c9720f14717e1ad66af51bb331b5) Signed-off-by: Alex Shi --- arch/arm64/kernel/ftrace.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 8f7005bc35bd..ebecf9aa33d1 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -129,23 +129,20 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, * on other archs. It's unlikely on AArch64. */ old = *parent; - *parent = return_hooker; trace.func = self_addr; trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - *parent = old; + if (!ftrace_graph_entry(&trace)) return; - } err = ftrace_push_return_trace(old, self_addr, &trace.depth, frame_pointer); - if (err == -EBUSY) { - *parent = old; + if (err == -EBUSY) return; - } + else + *parent = return_hooker; } #ifdef CONFIG_DYNAMIC_FTRACE From 30e9fa2678d1ab96894aa1ee670c5a804fe5a29b Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Tue, 15 Dec 2015 17:33:40 +0900 Subject: [PATCH 639/797] arm64: pass a task parameter to unwind_frame() Function graph tracer modifies a return address (LR) in a stack frame to hook a function's return. This will result in many useless entries (return_to_handler) showing up in a call stack list. We will fix this problem in a later patch ("arm64: ftrace: fix a stack tracer's output under function graph tracer"). But since real return addresses are saved in ret_stack[] array in struct task_struct, unwind functions need to be notified of, in addition to a stack pointer address, which task is being traced in order to find out real return addresses. This patch extends unwind functions' interfaces by adding an extra argument of a pointer to task_struct. Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon (cherry picked from commit fe13f95b720075327a761fe6ddb45b0c90cab504) Signed-off-by: Alex Shi --- arch/arm64/include/asm/stacktrace.h | 6 ++++-- arch/arm64/kernel/perf_callchain.c | 2 +- arch/arm64/kernel/process.c | 2 +- arch/arm64/kernel/return_address.c | 2 +- arch/arm64/kernel/stacktrace.c | 8 ++++---- arch/arm64/kernel/time.c | 2 +- arch/arm64/kernel/traps.c | 2 +- 7 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 7318f6d54aa9..6fb61c5090b4 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -16,14 +16,16 @@ #ifndef __ASM_STACKTRACE_H #define __ASM_STACKTRACE_H +struct task_struct; + struct stackframe { unsigned long fp; unsigned long sp; unsigned long pc; }; -extern int unwind_frame(struct stackframe *frame); -extern void walk_stackframe(struct stackframe *frame, +extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); +extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 3aa74830cc69..797220da912b 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -165,7 +165,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, frame.sp = regs->sp; frame.pc = regs->pc; - walk_stackframe(&frame, callchain_trace, entry); + walk_stackframe(current, &frame, callchain_trace, entry); } unsigned long perf_instruction_pointer(struct pt_regs *regs) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index f75b540bc3b4..98bf5461d4b6 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -348,7 +348,7 @@ unsigned long get_wchan(struct task_struct *p) do { if (frame.sp < stack_page || frame.sp >= stack_page + THREAD_SIZE || - unwind_frame(&frame)) + unwind_frame(p, &frame)) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index 6c4fd2810ecb..07b37ac05be4 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -44,7 +44,7 @@ void *return_address(unsigned int level) frame.sp = current_stack_pointer; frame.pc = (unsigned long)return_address; /* dummy */ - walk_stackframe(&frame, save_return_addr, &data); + walk_stackframe(current, &frame, save_return_addr, &data); if (!data.level) return data.addr; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index b9fd3a8abfc1..f7ee597ec883 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -36,7 +36,7 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ -int notrace unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; @@ -99,7 +99,7 @@ int notrace unwind_frame(struct stackframe *frame) return 0; } -void notrace walk_stackframe(struct stackframe *frame, +void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data) { while (1) { @@ -107,7 +107,7 @@ void notrace walk_stackframe(struct stackframe *frame, if (fn(frame, data)) break; - ret = unwind_frame(frame); + ret = unwind_frame(tsk, frame); if (ret < 0) break; } @@ -159,7 +159,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) frame.pc = (unsigned long)save_stack_trace_tsk; } - walk_stackframe(&frame, save_trace, &data); + walk_stackframe(tsk, &frame, save_trace, &data); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 13339b6ffc1a..6e5c521f123a 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -53,7 +53,7 @@ unsigned long profile_pc(struct pt_regs *regs) frame.sp = regs->sp; frame.pc = regs->pc; do { - int ret = unwind_frame(&frame); + int ret = unwind_frame(NULL, &frame); if (ret < 0) return 0; } while (in_lock_functions(frame.pc)); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8a0084541f84..937008523fa5 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -177,7 +177,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) int ret; dump_backtrace_entry(where); - ret = unwind_frame(&frame); + ret = unwind_frame(tsk, &frame); if (ret < 0) break; stack = frame.sp; From 1f18836b775f8f041dcad620c1b5d18c22c25c06 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Tue, 15 Dec 2015 17:33:41 +0900 Subject: [PATCH 640/797] arm64: ftrace: fix a stack tracer's output under function graph tracer Function graph tracer modifies a return address (LR) in a stack frame to hook a function return. This will result in many useless entries (return_to_handler) showing up in a) a stack tracer's output b) perf call graph (with perf record -g) c) dump_backtrace (at panic et al.) For example, in case of a), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ echo 1 > /proc/sys/kernel/stack_trace_enabled $ cat /sys/kernel/debug/tracing/stack_trace Depth Size Location (54 entries) ----- ---- -------- 0) 4504 16 gic_raise_softirq+0x28/0x150 1) 4488 80 smp_cross_call+0x38/0xb8 2) 4408 48 return_to_handler+0x0/0x40 3) 4360 32 return_to_handler+0x0/0x40 ... In case of b), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ perf record -e mem:XXX:x -ag -- sleep 10 $ perf report ... | | |--0.22%-- 0x550f8 | | | 0x10888 | | | el0_svc_naked | | | sys_openat | | | return_to_handler | | | return_to_handler ... In case of c), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ echo c > /proc/sysrq-trigger ... Call trace: [] sysrq_handle_crash+0x24/0x30 [] return_to_handler+0x0/0x40 [] return_to_handler+0x0/0x40 ... This patch replaces such entries with real addresses preserved in current->ret_stack[] at unwind_frame(). This way, we can cover all the cases. Reviewed-by: Jungseok Lee Signed-off-by: AKASHI Takahiro [will: fixed minor context changes conflicting with irq stack bits] Signed-off-by: Will Deacon (cherry picked from commit 20380bb390a443b2c5c8800cec59743faf8151b4) Signed-off-by: Alex Shi --- arch/arm64/include/asm/ftrace.h | 2 ++ arch/arm64/include/asm/stacktrace.h | 3 +++ arch/arm64/kernel/perf_callchain.c | 3 +++ arch/arm64/kernel/process.c | 3 +++ arch/arm64/kernel/return_address.c | 3 +++ arch/arm64/kernel/stacktrace.c | 17 +++++++++++++++++ arch/arm64/kernel/time.c | 3 +++ arch/arm64/kernel/traps.c | 26 ++++++++++++++++++++------ 8 files changed, 54 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index c5534facf941..3c60f37e48ab 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -28,6 +28,8 @@ struct dyn_arch_ftrace { extern unsigned long ftrace_graph_call; +extern void return_to_handler(void); + static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 6fb61c5090b4..801a16dbbdf6 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -22,6 +22,9 @@ struct stackframe { unsigned long fp; unsigned long sp; unsigned long pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + unsigned int graph; +#endif }; extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 797220da912b..ff4665462a02 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -164,6 +164,9 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif walk_stackframe(current, &frame, callchain_trace, entry); } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 98bf5461d4b6..88d742ba19d5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -344,6 +344,9 @@ unsigned long get_wchan(struct task_struct *p) frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = p->curr_ret_stack; +#endif stack_page = (unsigned long)task_stack_page(p); do { if (frame.sp < stack_page || diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index 07b37ac05be4..1718706fde83 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -43,6 +43,9 @@ void *return_address(unsigned int level) frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.pc = (unsigned long)return_address; /* dummy */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif walk_stackframe(current, &frame, save_return_addr, &data); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index f7ee597ec883..4fad9787ab46 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -17,6 +17,7 @@ */ #include #include +#include #include #include @@ -66,6 +67,19 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->fp = *(unsigned long *)(fp); frame->pc = *(unsigned long *)(fp + 8); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (tsk && tsk->ret_stack && + (frame->pc == (unsigned long)return_to_handler)) { + /* + * This is a case where function graph tracer has + * modified a return address (LR) in a stack frame + * to hook a function return. + * So replace it to an original value. + */ + frame->pc = tsk->ret_stack[frame->graph--].ret; + } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + /* * Check whether we are going to walk through from interrupt stack * to task stack. @@ -158,6 +172,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) frame.sp = current_stack_pointer; frame.pc = (unsigned long)save_stack_trace_tsk; } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = tsk->curr_ret_stack; +#endif walk_stackframe(tsk, &frame, save_trace, &data); if (trace->nr_entries < trace->max_entries) diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 6e5c521f123a..59779699a1a4 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -52,6 +52,9 @@ unsigned long profile_pc(struct pt_regs *regs) frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = -1; /* no task info */ +#endif do { int ret = unwind_frame(NULL, &frame); if (ret < 0) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 937008523fa5..bdc293f6adc4 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -147,17 +147,14 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + int skip; pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); if (!tsk) tsk = current; - if (regs) { - frame.fp = regs->regs[29]; - frame.sp = regs->sp; - frame.pc = regs->pc; - } else if (tsk == current) { + if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.pc = (unsigned long)dump_backtrace; @@ -169,14 +166,31 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.sp = thread_saved_sp(tsk); frame.pc = thread_saved_pc(tsk); } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = tsk->curr_ret_stack; +#endif + skip = !!regs; pr_emerg("Call trace:\n"); while (1) { unsigned long where = frame.pc; unsigned long stack; int ret; - dump_backtrace_entry(where); + /* skip until specified stack frame */ + if (!skip) { + dump_backtrace_entry(where); + } else if (frame.fp == regs->regs[29]) { + skip = 0; + /* + * Mostly, this is the case where this function is + * called in panic/abort. As exception handler's + * stack frame does not contain the corresponding pc + * at which an exception has taken place, use regs->pc + * instead. + */ + dump_backtrace_entry(regs->pc); + } ret = unwind_frame(tsk, &frame); if (ret < 0) break; From d2b08280e2c1f40fa209aaaaf86c9b730de28204 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Dec 2015 16:44:27 +0000 Subject: [PATCH 641/797] arm64: traps: address fallout from printk -> pr_* conversion Commit ac7b406c1a9d ("arm64: Use pr_* instead of printk") was a fairly mindless s/printk/pr_*/ change driven by a complaint from checkpatch. As is usual with such changes, this has led to some odd behaviour on arm64: * syslog now picks up the "pr_emerg" line from dump_backtrace, but not the actual trace, which leads to a bunch of "kernel:Call trace:" lines in the log * __{pte,pmd,pgd}_error print at KERN_CRIT, as opposed to KERN_ERR which is used by other architectures. This patch restores the original printk behaviour for dump_backtrace and downgrade the pgtable error macros to KERN_ERR. Signed-off-by: Will Deacon (cherry picked from commit c9cd0ed925c0b927283d4739bfe689eb9d1e9dfd) Signed-off-by: Alex Shi --- arch/arm64/kernel/traps.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index bdc293f6adc4..cbedd724f48e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -171,7 +171,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) #endif skip = !!regs; - pr_emerg("Call trace:\n"); + printk("Call trace:\n"); while (1) { unsigned long where = frame.pc; unsigned long stack; @@ -482,22 +482,22 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __pte_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pte %016lx.\n", file, line, val); + pr_err("%s:%d: bad pte %016lx.\n", file, line, val); } void __pmd_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); + pr_err("%s:%d: bad pmd %016lx.\n", file, line, val); } void __pud_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); + pr_err("%s:%d: bad pud %016lx.\n", file, line, val); } void __pgd_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); + pr_err("%s:%d: bad pgd %016lx.\n", file, line, val); } /* GENERIC_BUG traps */ From 12037cecc2cfc56e01851c0fbf605be9e05bbe95 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2016 10:18:51 +0100 Subject: [PATCH 642/797] arm64: module: fix relocation of movz instruction with negative immediate The test whether a movz instruction with a signed immediate should be turned into a movn instruction (i.e., when the immediate is negative) is flawed, since the value of imm is always positive. Also, the subsequent bounds check is incorrect since the limit update never executes, due to the fact that the imm_type comparison will always be false for negative signed immediates. Let's fix this by performing the sign test on sval directly, and replacing the bounds check with a simple comparison against U16_MAX. Signed-off-by: Ard Biesheuvel [will: tidied up use of sval, renamed MOVK enum value to MOVKZ] Signed-off-by: Will Deacon (cherry picked from commit b24a557527f97ad88619d5bd4c8017c635056d69) Signed-off-by: Alex Shi --- arch/arm64/kernel/module.c | 51 ++++++++++++++------------------------ 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f4bc779e62e8..03464ab0fff2 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -30,9 +30,6 @@ #include #include -#define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX -#define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 - void *module_alloc(unsigned long size) { void *p; @@ -110,16 +107,20 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) return 0; } +enum aarch64_insn_movw_imm_type { + AARCH64_INSN_IMM_MOVNZ, + AARCH64_INSN_IMM_MOVKZ, +}; + static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, enum aarch64_insn_imm_type imm_type) + int lsb, enum aarch64_insn_movw_imm_type imm_type) { - u64 imm, limit = 0; + u64 imm; s64 sval; u32 insn = le32_to_cpu(*(u32 *)place); sval = do_reloc(op, place, val); - sval >>= lsb; - imm = sval & 0xffff; + imm = sval >> lsb; if (imm_type == AARCH64_INSN_IMM_MOVNZ) { /* @@ -128,7 +129,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, * immediate is less than zero. */ insn &= ~(3 << 29); - if ((s64)imm >= 0) { + if (sval >= 0) { /* >=0: Set the instruction to MOVZ (opcode 10b). */ insn |= 2 << 29; } else { @@ -140,29 +141,13 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, */ imm = ~imm; } - imm_type = AARCH64_INSN_IMM_MOVK; } /* Update the instruction with the new encoding. */ - insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm); *(u32 *)place = cpu_to_le32(insn); - /* Shift out the immediate field. */ - sval >>= 16; - - /* - * For unsigned immediates, the overflow check is straightforward. - * For signed immediates, the sign bit is actually the bit past the - * most significant bit of the field. - * The AARCH64_INSN_IMM_16 immediate type is unsigned. - */ - if (imm_type != AARCH64_INSN_IMM_16) { - sval++; - limit++; - } - - /* Check the upper bits depending on the sign of the immediate. */ - if ((u64)sval > limit) + if (imm > U16_MAX) return -ERANGE; return 0; @@ -267,25 +252,25 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, overflow_check = false; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_SABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, @@ -302,7 +287,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G0_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G0: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, @@ -311,7 +296,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G1_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G1: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, @@ -320,7 +305,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G2_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G2: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, From 3fd9316702a82b498fcf7055f9781589ea6c1e1c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2016 10:18:52 +0100 Subject: [PATCH 643/797] arm64: module: avoid undefined shift behavior in reloc_data() Compilers may engage the improbability drive when encountering shifts by a distance that is a multiple of the size of the operand type. Since the required bounds check is very simple here, we can get rid of all the fuzzy masking, shifting and comparing, and use the documented bounds directly. Reported-by: David Binderman Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon (cherry picked from commit f930896967fa3f9ab16a6f87267b92798308d48f) Signed-off-by: Alex Shi --- arch/arm64/kernel/module.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 03464ab0fff2..93e970231ca9 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -72,15 +72,18 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val) static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) { - u64 imm_mask = (1 << len) - 1; s64 sval = do_reloc(op, place, val); switch (len) { case 16: *(s16 *)place = sval; + if (sval < S16_MIN || sval > U16_MAX) + return -ERANGE; break; case 32: *(s32 *)place = sval; + if (sval < S32_MIN || sval > U32_MAX) + return -ERANGE; break; case 64: *(s64 *)place = sval; @@ -89,21 +92,6 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) pr_err("Invalid length (%d) for data relocation\n", len); return 0; } - - /* - * Extract the upper value bits (including the sign bit) and - * shift them to bit 0. - */ - sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1); - - /* - * Overflow has occurred if the value is not representable in - * len bits (i.e the bottom len bits are not sign-extended and - * the top bits are not all zero). - */ - if ((u64)(sval + 1) > 2) - return -ERANGE; - return 0; } From 87e4c1f363cfd0ed3a673d47f229725a6b0946d7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 Jan 2016 15:36:59 +0000 Subject: [PATCH 644/797] arm64: mm: move pgd_cache initialisation to pgtable_cache_init Initialising the suppport for EFI runtime services requires us to allocate a pgd off the back of an early_initcall. On systems where the PGD_SIZE is smaller than PAGE_SIZE (e.g. 64k pages and 48-bit VA), the pgd_cache isn't initialised at this stage, and we panic with a NULL dereference during boot: Unable to handle kernel NULL pointer dereference at virtual address 00000000 __create_mapping.isra.5+0x84/0x350 create_pgd_mapping+0x20/0x28 efi_create_mapping+0x5c/0x6c arm_enable_runtime_services+0x154/0x1e4 do_one_initcall+0x8c/0x190 kernel_init_freeable+0x84/0x1ec kernel_init+0x10/0xe0 ret_from_fork+0x10/0x50 This patch fixes the problem by initialising the pgd_cache earlier, in the pgtable_cache_init callback, which sounds suspiciously like what it was intended for. Reported-by: Dennis Chen Signed-off-by: Will Deacon (cherry picked from commit 39b5be9b4233a9f212b98242bddf008f379b5122) Signed-off-by: Alex Shi --- arch/arm64/include/asm/pgtable.h | 3 ++- arch/arm64/mm/pgd.c | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index fd3d7c177c5f..76ff5d93c6c3 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -682,7 +682,8 @@ extern int kern_addr_valid(unsigned long addr); #include -#define pgtable_cache_init() do { } while (0) +void pgd_cache_init(void); +#define pgtable_cache_init pgd_cache_init /* * On AArch64, the cache coherency is handled via the set_pte_at() function. diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index cb3ba1b812e7..ae11d4e03d0e 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -46,14 +46,14 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } -static int __init pgd_cache_init(void) +void __init pgd_cache_init(void) { + if (PGD_SIZE == PAGE_SIZE) + return; + /* * Naturally aligned pgds required by the architecture. */ - if (PGD_SIZE != PAGE_SIZE) - pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE, - SLAB_PANIC, NULL); - return 0; + pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE, + SLAB_PANIC, NULL); } -core_initcall(pgd_cache_init); From ec567e8f53127f17d757c1ac30cfe09db6d1ea66 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 5 Jan 2016 17:33:34 +0000 Subject: [PATCH 645/797] arm64: entry: remove pointless SPSR mode check In work_pending, we may skip work if the stacked SPSR value represents anything other than an EL0 context. We then immediately invoke the kernel_exit 0 macro as part of ret_to_user, assuming a return to EL0. This is somewhat confusing. We use work_pending as part of the ret_to_user/ret_fast_syscall state machine. We only use ret_fast_syscall in the return from an SVC issued from EL0. We use ret_to_user for return from EL0 exception handlers and also for return from ret_from_fork in the case the task was not a kernel thread (i.e. it is a user task). Thus in all cases the stacked SPSR value must represent an EL0 context, and the check is redundant. This patch removes it, along with the now unused no_work_pending label. Cc: Chris Metcalf Acked-by: Catalin Marinas Signed-off-by: Mark Rutland Signed-off-by: Will Deacon (cherry picked from commit ee03353bc04f8e460cc4e3da80d9721d9ecb89f1) Signed-off-by: Alex Shi --- arch/arm64/kernel/entry.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c0db321db7e1..1f7f5a2b61bf 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -676,10 +676,7 @@ ret_fast_syscall_trace: work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ - ldr x2, [sp, #S_PSTATE] mov x0, sp // 'regs' - tst x2, #PSR_MODE_MASK // user mode regs? - b.ne no_work_pending // returning to kernel enable_irq // enable interrupts for do_notify_resume() bl do_notify_resume b ret_to_user @@ -698,7 +695,6 @@ ret_to_user: and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending enable_step_tsk x1, x2 -no_work_pending: kernel_exit 0 ENDPROC(ret_to_user) From 91a6481661c6e9d8e503540decf6382a604c7893 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 23 Dec 2015 10:29:28 +0100 Subject: [PATCH 646/797] efi: stub: define DISABLE_BRANCH_PROFILING for all architectures This moves the DISABLE_BRANCH_PROFILING define from the x86 specific to the general CFLAGS definition for the stub. This fixes build errors when building for arm64 with CONFIG_PROFILE_ALL_BRANCHES_ENABLED. Reviewed-by: Matt Fleming Reported-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon (cherry picked from commit b523e185bba36164ca48a190f5468c140d815414) Signed-off-by: Alex Shi --- drivers/firmware/efi/libstub/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 3c0467d3688c..c0ddd1b8dca3 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -8,7 +8,7 @@ cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ -fPIC -fno-strict-aliasing -mno-red-zone \ - -mno-mmx -mno-sse -DDISABLE_BRANCH_PROFILING + -mno-mmx -mno-sse cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ @@ -16,7 +16,7 @@ cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt -KBUILD_CFLAGS := $(cflags-y) \ +KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ $(call cc-option,-ffreestanding) \ $(call cc-option,-fno-stack-protector) From 1ebc63c2d5ebd9578b625a049b6f83e9181caf8d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 6 Jan 2016 11:05:27 +0000 Subject: [PATCH 647/797] arm64: head.S: use memset to clear BSS Currently we use an open-coded memzero to clear the BSS. As it is a trivial implementation, it is sub-optimal. Our optimised memset doesn't use the stack, is position-independent, and for the memzero case can use of DC ZVA to clear large blocks efficiently. In __mmap_switched the MMU is on and there are no live caller-saved registers, so we can safely call an uninstrumented memset. This patch changes __mmap_switched to use memset when clearing the BSS. We use the __pi_memset alias so as to avoid any instrumentation in all kernel configurations. Cc: Catalin Marinas Cc: Marc Zyngier Reviewed-by: Ard Biesheuvel Signed-off-by: Mark Rutland Signed-off-by: Will Deacon (cherry picked from commit 2a803c4db615d85126c5c7afd5849a3cfde71422) Signed-off-by: Alex Shi --- arch/arm64/kernel/head.S | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 17ce7285bb12..917d98108b3f 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -415,14 +415,13 @@ ENDPROC(__create_page_tables) */ .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: - adr_l x6, __bss_start - adr_l x7, __bss_stop + // Clear BSS + adr_l x0, __bss_start + mov x1, xzr + adr_l x2, __bss_stop + sub x2, x2, x0 + bl __pi_memset -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: adr_l sp, initial_sp, x4 mov x4, sp and x4, x4, #~(THREAD_SIZE - 1) From a9bd748299179a8d8f8fcd937c74ab321981ab4d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 6 May 2016 12:03:29 -0400 Subject: [PATCH 648/797] Revert: "powerpc/tm: Check for already reclaimed tasks" This reverts commit e924c60db1b4891e45d15a33474ac5fab62cf029 which was commit 7f821fc9c77a9b01fe7b1d6e72717b33d8d64142 upstream. It shouldn't have been applied as the original was already in 4.4. Reported-by: Jiri Slaby Cc: Michael Neuling Cc: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/process.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ef2ad2d682da..646bf4d222c1 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -551,24 +551,6 @@ static void tm_reclaim_thread(struct thread_struct *thr, msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1; } - /* - * Use the current MSR TM suspended bit to track if we have - * checkpointed state outstanding. - * On signal delivery, we'd normally reclaim the checkpointed - * state to obtain stack pointer (see:get_tm_stackpointer()). - * This will then directly return to userspace without going - * through __switch_to(). However, if the stack frame is bad, - * we need to exit this thread which calls __switch_to() which - * will again attempt to reclaim the already saved tm state. - * Hence we need to check that we've not already reclaimed - * this state. - * We do this using the current MSR, rather tracking it in - * some specific thread_struct bit, as it has the additional - * benifit of checking for a potential TM bad thing exception. - */ - if (!MSR_TM_SUSPENDED(mfmsr())) - return; - /* * Use the current MSR TM suspended bit to track if we have * checkpointed state outstanding. From 2349384312b4192c5200d16f6089921fb4ffb7d8 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Tue, 5 Apr 2016 10:23:48 +0530 Subject: [PATCH 649/797] RDMA/iw_cxgb4: Fix bar2 virt addr calculation for T4 chips commit 32cc92c7b5e52357a0a24010bae9eb257fa75d3e upstream. For T4, kernel mode qps don't use the user doorbell. User mode qps during flow control db ringing are forced into kernel, where user doorbell is treated as kernel doorbell and proper bar2 offset in bar2 virtual space is calculated, which incase of T4 is a bogus address, causing a kernel panic due to illegal write during doorbell ringing. In case of T4, kernel mode qp bar2 virtual address should be 0. Added T4 check during bar2 virtual address calculation to return 0. Fixed Bar2 range checks based on bar2 physical address. The below oops will be fixed <1>BUG: unable to handle kernel paging request at 000000000002aa08 <1>IP: [] c4iw_uld_control+0x4e0/0x880 [iw_cxgb4] <4>PGD 1416a8067 PUD 15bf35067 PMD 0 <4>Oops: 0002 [#1] SMP <4>last sysfs file: /sys/devices/pci0000:00/0000:00:03.0/0000:02:00.4/infiniband/cxgb4_0/node_guid <4>CPU 5 <4>Modules linked in: rdma_ucm rdma_cm ib_cm ib_sa ib_mad ib_uverbs ip6table_filter ip6_tables ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables bridge autofs4 target_core_iblock target_core_file target_core_pscsi target_core_mod configfs bnx2fc cnic uio fcoe libfcoe libfc scsi_transport_fc scsi_tgt 8021q garp stp llc cpufreq_ondemand acpi_cpufreq freq_table mperf vhost_net macvtap macvlan tun kvm uinput microcode iTCO_wdt iTCO_vendor_support sg joydev serio_raw i2c_i801 i2c_core lpc_ich mfd_core e1000e ptp pps_core ioatdma dca i7core_edac edac_core shpchp ext3 jbd mbcache sd_mod crc_t10dif pata_acpi ata_generic ata_piix iw_cxgb4 iw_cm ib_core ib_addr cxgb4 ipv6 dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan] <4> Supermicro X8ST3/X8ST3 <4>RIP: 0010:[] [] c4iw_uld_control+0x4e0/0x880 [iw_cxgb4] <4>RSP: 0000:ffff880155a03db0 EFLAGS: 00010006 <4>RAX: 000000000000001d RBX: ffff88013ae5fc00 RCX: ffff880155adb180 <4>RDX: 000000000002aa00 RSI: 0000000000000001 RDI: ffff88013ae5fdf8 <4>RBP: ffff880155a03e10 R08: 0000000000000000 R09: 0000000000000001 <4>R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 <4>R13: 000000000000001d R14: ffff880156414ab0 R15: ffffe8ffffc05b88 <4>FS: 0000000000000000(0000) GS:ffff8800282a0000(0000) knlGS:0000000000000000 <4>CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b <4>CR2: 000000000002aa08 CR3: 000000015bd0e000 CR4: 00000000000007e0 <4>DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 <4>DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 <4>Process cxgb4 (pid: 394, threadinfo ffff880155a00000, task ffff880156414ab0) <4>Stack: <4> ffff880156415068 ffff880155adb180 ffff880155a03df0 ffffffffa00a344b <4> 00000000000003e8 ffff880155920000 0000000000000004 ffff880155920000 <4> ffff88015592d438 ffffffffa00a3860 ffff880155a03fd8 ffffe8ffffc05b88 <4>Call Trace: <4> [] ? enable_txq_db+0x2b/0x80 [cxgb4] <4> [] ? process_db_full+0x0/0xa0 [cxgb4] <4> [] process_db_full+0x46/0xa0 [cxgb4] <4> [] worker_thread+0x170/0x2a0 <4> [] ? autoremove_wake_function+0x0/0x40 <4> [] ? worker_thread+0x0/0x2a0 <4> [] kthread+0x9e/0xc0 <4> [] child_rip+0xa/0x20 <4> [] ? kthread+0x0/0xc0 <4> [] ? child_rip+0x0/0x20 <4>Code: e9 ba 00 00 00 66 0f 1f 44 00 00 44 8b 05 29 07 02 00 45 85 c0 0f 85 71 02 00 00 8b 83 70 01 00 00 45 0f b7 ed c1 e0 0f 44 09 e8 <89> 42 08 0f ae f8 66 c7 83 82 01 00 00 00 00 44 0f b7 ab dc 01 <1>RIP [] c4iw_uld_control+0x4e0/0x880 [iw_cxgb4] <4> RSP <4>CR2: 000000000002aa08` Based on original work by Bharat Potnuri Fixes: 74217d4c6a4fb0d8 ("iw_cxgb4: support for bar2 qid densities exceeding the page size") Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/cq.c | 2 +- drivers/infiniband/hw/cxgb4/qp.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index de9cd6901752..bc147582bed9 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -162,7 +162,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, &cq->bar2_qid, user ? &cq->bar2_pa : NULL); - if (user && !cq->bar2_va) { + if (user && !cq->bar2_pa) { pr_warn(MOD "%s: cqid %u not in BAR2 range.\n", pci_name(rdev->lldi.pdev), cq->cqid); ret = -EINVAL; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index aa515afee724..53aa7515f542 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -185,6 +185,10 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, if (pbar2_pa) *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK; + + if (is_t4(rdev->lldi.adapter_type)) + return NULL; + return rdev->bar2_kva + bar2_qoffset; } @@ -270,7 +274,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, /* * User mode must have bar2 access. */ - if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) { + if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) { pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n", pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); goto free_dma; From c6a012ba56536cac022045cab504d76d342d8c91 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Jan 2016 14:52:02 +0100 Subject: [PATCH 650/797] ipvs: handle ip_vs_fill_iph_skb_off failure commit 3f20efba41916ee17ce82f0fdd02581ada2872b2 upstream. ip_vs_fill_iph_skb_off() may not find an IP header, and gcc has determined that ip_vs_sip_fill_param() then incorrectly accesses the protocol fields: net/netfilter/ipvs/ip_vs_pe_sip.c: In function 'ip_vs_sip_fill_param': net/netfilter/ipvs/ip_vs_pe_sip.c:76:5: error: 'iph.protocol' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (iph.protocol != IPPROTO_UDP) ^ net/netfilter/ipvs/ip_vs_pe_sip.c:81:10: error: 'iph.len' may be used uninitialized in this function [-Werror=maybe-uninitialized] dataoff = iph.len + sizeof(struct udphdr); ^ This adds a check for the ip_vs_fill_iph_skb_off() return code before looking at the ip header data returned from it. Signed-off-by: Arnd Bergmann Fixes: b0e010c527de ("ipvs: replace ip_vs_fill_ip4hdr with ip_vs_fill_iph_skb_off") Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_pe_sip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 1b8d594e493a..c4e9ca016a88 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -70,10 +70,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) const char *dptr; int retc; - ip_vs_fill_iph_skb(p->af, skb, false, &iph); + retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph); /* Only useful with UDP */ - if (iph.protocol != IPPROTO_UDP) + if (!retc || iph.protocol != IPPROTO_UDP) return -EINVAL; /* todo: IPv6 fragments: * I think this only should be done for the first fragment. /HS From ba5e7e673624b6099640111e7366be850cf5dbe7 Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Sat, 5 Mar 2016 12:10:02 +0100 Subject: [PATCH 651/797] ipvs: correct initial offset of Call-ID header search in SIP persistence engine commit 7617a24f83b5d67f4dab1844956be1cebc44aec8 upstream. The IPVS SIP persistence engine is not able to parse the SIP header "Call-ID" when such header is inserted in the first positions of the SIP message. When IPVS is configured with "--pe sip" option, like for example: ipvsadm -A -u 1.2.3.4:5060 -s rr --pe sip -p 120 -o some particular messages (see below for details) do not create entries in the connection template table, which can be listed with: ipvsadm -Lcn --persistent-conn Problematic SIP messages are SIP responses having "Call-ID" header positioned just after message first line: SIP/2.0 200 OK [Call-ID header here] [rest of the headers] When "Call-ID" header is positioned down (after a few other headers) it is correctly recognized. This is due to the data offset used in get_callid function call inside ip_vs_pe_sip.c file: since dptr already points to the start of the SIP message, the value of dataoff should be initially 0. Otherwise the header is searched starting from some bytes after the first character of the SIP message. Fixes: 758ff0338722 ("IPVS: sip persistence engine") Signed-off-by: Marco Angaroni Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_pe_sip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index c4e9ca016a88..0a6eb5c0d9e9 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) dptr = skb->data + dataoff; datalen = skb->len - dataoff; - if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) + if (get_callid(dptr, 0, datalen, &matchoff, &matchlen)) return -EINVAL; /* N.B: pe_data is only set on success, From f94ad404f8934e0cae299dd6520707dc02bbf2fc Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 5 Mar 2016 15:03:22 +0200 Subject: [PATCH 652/797] ipvs: drop first packet to redirect conntrack commit f719e3754ee2f7275437e61a6afd520181fdd43b upstream. Jiri Bohac is reporting for a problem where the attempt to reschedule existing connection to another real server needs proper redirect for the conntrack used by the IPVS connection. For example, when IPVS connection is created to NAT-ed real server we alter the reply direction of conntrack. If we later decide to select different real server we can not alter again the conntrack. And if we expire the old connection, the new connection is left without conntrack. So, the only way to redirect both the IPVS connection and the Netfilter's conntrack is to drop the SYN packet that hits existing connection, to wait for the next jiffie to expire the old connection and its conntrack and to rely on client's retransmission to create new connection as usually. Jiri Bohac provided a fix that drops all SYNs on rescheduling, I extended his patch to do such drops only for connections that use conntrack. Here is the original report from Jiri Bohac: Since commit dc7b3eb900aa ("ipvs: Fix reuse connection if real server is dead"), new connections to dead servers are redistributed immediately to new servers. The old connection is expired using ip_vs_conn_expire_now() which sets the connection timer to expire immediately. However, before the timer callback, ip_vs_conn_expire(), is run to clean the connection's conntrack entry, the new redistributed connection may already be established and its conntrack removed instead. Fix this by dropping the first packet of the new connection instead, like we do when the destination server is not available. The timer will have deleted the old conntrack entry long before the first packet of the new connection is retransmitted. Fixes: dc7b3eb900aa ("ipvs: Fix reuse connection if real server is dead") Signed-off-by: Jiri Bohac Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- include/net/ip_vs.h | 17 +++++++++++++++ net/netfilter/ipvs/ip_vs_core.c | 37 +++++++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 0816c872b689..a6cc576fd467 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1588,6 +1588,23 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) } #endif /* CONFIG_IP_VS_NFCT */ +/* Really using conntrack? */ +static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp, + struct sk_buff *skb) +{ +#ifdef CONFIG_IP_VS_NFCT + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + if (!(cp->flags & IP_VS_CONN_F_NFCT)) + return false; + ct = nf_ct_get(skb, &ctinfo); + if (ct && !nf_ct_is_untracked(ct)) + return true; +#endif + return false; +} + static inline int ip_vs_dest_conn_overhead(struct ip_vs_dest *dest) { diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index f57b4dcdb233..4da560005b0e 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1757,15 +1757,34 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int cp = pp->conn_in_get(ipvs, af, skb, &iph); conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); - if (conn_reuse_mode && !iph.fragoffs && - is_new_conn(skb, &iph) && cp && - ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && - unlikely(!atomic_read(&cp->dest->weight))) || - unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) { - if (!atomic_read(&cp->n_control)) - ip_vs_conn_expire_now(cp); - __ip_vs_conn_put(cp); - cp = NULL; + if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) { + bool uses_ct = false, resched = false; + + if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && + unlikely(!atomic_read(&cp->dest->weight))) { + resched = true; + uses_ct = ip_vs_conn_uses_conntrack(cp, skb); + } else if (is_new_conn_expected(cp, conn_reuse_mode)) { + uses_ct = ip_vs_conn_uses_conntrack(cp, skb); + if (!atomic_read(&cp->n_control)) { + resched = true; + } else { + /* Do not reschedule controlling connection + * that uses conntrack while it is still + * referenced by controlled connection(s). + */ + resched = !uses_ct; + } + } + + if (resched) { + if (!atomic_read(&cp->n_control)) + ip_vs_conn_expire_now(cp); + __ip_vs_conn_put(cp); + if (uses_ct) + return NF_DROP; + cp = NULL; + } } if (unlikely(!cp)) { From 8cc8381397b44d8888c02ad1995496d709cd541a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 22 Jan 2016 16:48:46 +0200 Subject: [PATCH 653/797] mfd: intel-lpss: Remove clock tree on error path commit 84cb36cac581c915ef4e8b70abb73e084325df92 upstream. We forgot to remove the clock tree if something goes wrong in ->probe(). Add a call to intel_lpss_unregister_clock() on error path in ->probe() to fix the potential issue. Fixes: 4b45efe85263 (mfd: Add support for Intel Sunrisepoint LPSS devices) Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/intel-lpss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c index 6255513f54c7..68aa31ae553a 100644 --- a/drivers/mfd/intel-lpss.c +++ b/drivers/mfd/intel-lpss.c @@ -445,6 +445,7 @@ int intel_lpss_probe(struct device *dev, err_remove_ltr: intel_lpss_debugfs_remove(lpss); intel_lpss_ltr_hide(lpss); + intel_lpss_unregister_clock(lpss); err_clk_register: ida_simple_remove(&intel_lpss_devid_ida, lpss->devid); From be0860081ab64f99bcdaeaaf106778b1a16a3198 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 14 Jan 2016 13:42:32 -0500 Subject: [PATCH 654/797] nbd: ratelimit error msgs after socket close commit da6ccaaa79caca4f38b540b651238f87215217a2 upstream. Make the "Attempted send on closed socket" error messages generated in nbd_request_handler() ratelimited. When the nbd socket is shutdown, the nbd_request_handler() function emits an error message for every request remaining in its queue. If the queue is large, this will spam a large amount of messages to the log. There's no need for a separate error message for each request, so this patch ratelimits it. In the specific case this was found, the system was virtual and the error messages were logged to the serial port, which overwhelmed it. Fixes: 4d48a542b427 ("nbd: fix I/O hang on disconnected nbds") Signed-off-by: Dan Streetman Signed-off-by: Markus Pargmann Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 93b3f99b6865..8f1ce6d57a08 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -618,8 +618,8 @@ static void nbd_request_handler(struct request_queue *q) req, req->cmd_type); if (unlikely(!nbd->sock)) { - dev_err(disk_to_dev(nbd->disk), - "Attempted send on closed socket\n"); + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Attempted send on closed socket\n"); req->errors++; nbd_end_request(nbd, req); spin_lock_irq(q->queue_lock); From 5dd660ee0ebedf9aea7bbf7360584668af3cecce Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 10 Mar 2016 10:45:32 +0300 Subject: [PATCH 655/797] ata: ahci_xgene: dereferencing uninitialized pointer in probe commit 8134233e8d346aaa1c929dc510e75482ae318bce upstream. If the call to acpi_get_object_info() fails then "info" hasn't been initialized. In that situation, we already know that "version" should be XGENE_AHCI_V1 so we don't actually need to dereference "info". Fixes: c9802a4be661 ('ata: ahci_xgene: Add AHCI Support for 2nd HW version of APM X-Gene SoC AHCI SATA Host controller.') Signed-off-by: Dan Carpenter Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci_xgene.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index e2c6d9e0c5ac..e916bff6cee8 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -739,9 +739,9 @@ static int xgene_ahci_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "%s: Error reading device info. Assume version1\n", __func__); version = XGENE_AHCI_V1; - } - if (info->valid & ACPI_VALID_CID) + } else if (info->valid & ACPI_VALID_CID) { version = XGENE_AHCI_V2; + } } } #endif From c94897e820a85375c4cb7447ac429f6d1550b331 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 23 Feb 2016 05:16:17 -0800 Subject: [PATCH 656/797] mwifiex: fix corner case association failure commit a6139b6271f9f95377fe3486aed6120c9142779b upstream. This patch corrects the error case in association path by returning -1. Earlier "media_connected" used to remain on in this error case causing failure for further association attempts. Signed-off-by: Amitkumar Karwar Fixes: b887664d882ee4 ('mwifiex: channel switch handling for station') Signed-off-by: Cathy Luo Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/sta_ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mwifiex/sta_ioctl.c b/drivers/net/wireless/mwifiex/sta_ioctl.c index a6c8a4f7bfe9..d6c4f0f60839 100644 --- a/drivers/net/wireless/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/mwifiex/sta_ioctl.c @@ -313,6 +313,7 @@ int mwifiex_bss_start(struct mwifiex_private *priv, struct cfg80211_bss *bss, mwifiex_dbg(adapter, ERROR, "Attempt to reconnect on csa closed chan(%d)\n", bss_desc->channel); + ret = -1; goto done; } From d65bf4e2407824aecba454c1b0bc7908a5113047 Mon Sep 17 00:00:00 2001 From: Krzysztof Halasa Date: Fri, 11 Mar 2016 12:32:14 +0100 Subject: [PATCH 657/797] CNS3xxx: Fix PCI cns3xxx_write_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 88e9da9a2a70b6f1a171fbf30a681d6bc4031c4d upstream. The "where" offset was added twice, fix it. Signed-off-by: Krzysztof Hałasa Fixes: 498a92d42596 ("ARM: cns3xxx: pci: avoid potential stack overflow") Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-cns3xxx/pcie.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c index 47905a50e075..318394ed5c7a 100644 --- a/arch/arm/mach-cns3xxx/pcie.c +++ b/arch/arm/mach-cns3xxx/pcie.c @@ -220,13 +220,13 @@ static void cns3xxx_write_config(struct cns3xxx_pcie *cnspci, u32 mask = (0x1ull << (size * 8)) - 1; int shift = (where % 4) * 8; - v = readl_relaxed(base + (where & 0xffc)); + v = readl_relaxed(base); v &= ~(mask << shift); v |= (val & mask) << shift; - writel_relaxed(v, base + (where & 0xffc)); - readl_relaxed(base + (where & 0xffc)); + writel_relaxed(v, base); + readl_relaxed(base); } static void __init cns3xxx_pcie_hw_init(struct cns3xxx_pcie *cnspci) From f0e92143b8e2e6fa1e854385667427011cfe1059 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 21 Jan 2016 21:53:09 +0100 Subject: [PATCH 658/797] clk-divider: make sure read-only dividers do not write to their register commit 50359819794b4a16ae35051cd80f2dab025f6019 upstream. Commit e6d5e7d90be9 ("clk-divider: Fix READ_ONLY when divider > 1") removed the special ops struct for read-only clocks and instead opted to handle them inside the regular ops. On the rk3368 this results in breakage as aclkm now gets set a value. While it is the same divider value, the A53 core still doesn't like it, which can result in the cpu ending up in a hang. The reason being that "ACLKENMasserts one clock cycle before the rising edge of ACLKM" and the clock should only be touched when STANDBYWFIL2 is asserted. To fix this, reintroduce the read-only ops but do include the round_rate callback. That way no writes that may be unsafe are done to the divider register in any case. The Rockchip use of the clk_divider_ops is adapted to this split again, as is the nxp, lpc18xx-ccu driver that was included since the original commit. On lpc18xx-ccu the divider seems to always be read-only so only uses the new ops now. Fixes: e6d5e7d90be9 ("clk-divider: Fix READ_ONLY when divider > 1") Reported-by: Zhang Qing Signed-off-by: Heiko Stuebner Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk-divider.c | 11 ++++++++++- drivers/clk/nxp/clk-lpc18xx-ccu.c | 2 +- drivers/clk/rockchip/clk.c | 4 +++- include/linux/clk-provider.h | 1 + 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index 3ace102a2a0a..bbf206e3da0d 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -422,6 +422,12 @@ const struct clk_ops clk_divider_ops = { }; EXPORT_SYMBOL_GPL(clk_divider_ops); +const struct clk_ops clk_divider_ro_ops = { + .recalc_rate = clk_divider_recalc_rate, + .round_rate = clk_divider_round_rate, +}; +EXPORT_SYMBOL_GPL(clk_divider_ro_ops); + static struct clk *_register_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, @@ -445,7 +451,10 @@ static struct clk *_register_divider(struct device *dev, const char *name, return ERR_PTR(-ENOMEM); init.name = name; - init.ops = &clk_divider_ops; + if (clk_divider_flags & CLK_DIVIDER_READ_ONLY) + init.ops = &clk_divider_ro_ops; + else + init.ops = &clk_divider_ops; init.flags = flags | CLK_IS_BASIC; init.parent_names = (parent_name ? &parent_name: NULL); init.num_parents = (parent_name ? 1 : 0); diff --git a/drivers/clk/nxp/clk-lpc18xx-ccu.c b/drivers/clk/nxp/clk-lpc18xx-ccu.c index 13aabbb3acbe..558da89555af 100644 --- a/drivers/clk/nxp/clk-lpc18xx-ccu.c +++ b/drivers/clk/nxp/clk-lpc18xx-ccu.c @@ -222,7 +222,7 @@ static void lpc18xx_ccu_register_branch_gate_div(struct lpc18xx_clk_branch *bran div->width = 1; div_hw = &div->hw; - div_ops = &clk_divider_ops; + div_ops = &clk_divider_ro_ops; } branch->gate.reg = branch->offset + reg_base; diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c index be6c7fd8315d..e37eee819df9 100644 --- a/drivers/clk/rockchip/clk.c +++ b/drivers/clk/rockchip/clk.c @@ -90,7 +90,9 @@ static struct clk *rockchip_clk_register_branch(const char *name, div->width = div_width; div->lock = lock; div->table = div_table; - div_ops = &clk_divider_ops; + div_ops = (div_flags & CLK_DIVIDER_READ_ONLY) + ? &clk_divider_ro_ops + : &clk_divider_ops; } clk = clk_register_composite(NULL, name, parent_names, num_parents, diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index c56988ac63f7..7cd0171963ae 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -384,6 +384,7 @@ struct clk_divider { #define CLK_DIVIDER_MAX_AT_ZERO BIT(6) extern const struct clk_ops clk_divider_ops; +extern const struct clk_ops clk_divider_ro_ops; unsigned long divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate, unsigned int val, const struct clk_div_table *table, From e6ce6ce062650eda95bc2b44420a4a7151c42d8a Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Mon, 1 Feb 2016 16:18:40 +0800 Subject: [PATCH 659/797] soc: rockchip: power-domain: fix err handle while probing commit 1d961f11a108af9f7fbe89cc950a8d16ddbdbb28 upstream. If we fail to probe the driver, we should not directly break from the for_each_available_child_of_node since it calls of_node_get while iterating. This patch add of_node_put to fix the unbalanced call pair. Fixes: 7c696693a4f5 ("soc: rockchip: power-domain: Add power domain driver") Signed-off-by: Shawn Lin Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- drivers/soc/rockchip/pm_domains.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c index 534c58937a56..4a65c5bda146 100644 --- a/drivers/soc/rockchip/pm_domains.c +++ b/drivers/soc/rockchip/pm_domains.c @@ -419,6 +419,7 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev) if (error) { dev_err(dev, "failed to handle node %s: %d\n", node->name, error); + of_node_put(node); goto err_out; } } From c7ea1f7642d7d77503804086af9b2336621b31e8 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Tue, 2 Feb 2016 11:37:50 +0800 Subject: [PATCH 660/797] clk: rockchip: free memory in error cases when registering clock branches commit 2467b6745e0ae9c6cdccff24c4cceeb14b1cce3f upstream. Add free memeory if rockchip_clk_register_branch fails. Fixes: a245fecbb806 ("clk: rockchip: add basic infrastructure...") Signed-off-by: Shawn Lin Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c index e37eee819df9..9b6c8188efac 100644 --- a/drivers/clk/rockchip/clk.c +++ b/drivers/clk/rockchip/clk.c @@ -70,7 +70,7 @@ static struct clk *rockchip_clk_register_branch(const char *name, if (gate_offset >= 0) { gate = kzalloc(sizeof(*gate), GFP_KERNEL); if (!gate) - return ERR_PTR(-ENOMEM); + goto err_gate; gate->flags = gate_flags; gate->reg = base + gate_offset; @@ -82,7 +82,7 @@ static struct clk *rockchip_clk_register_branch(const char *name, if (div_width > 0) { div = kzalloc(sizeof(*div), GFP_KERNEL); if (!div) - return ERR_PTR(-ENOMEM); + goto err_div; div->flags = div_flags; div->reg = base + muxdiv_offset; @@ -102,6 +102,11 @@ static struct clk *rockchip_clk_register_branch(const char *name, flags); return clk; +err_div: + kfree(gate); +err_gate: + kfree(mux); + return ERR_PTR(-ENOMEM); } static struct clk *rockchip_clk_register_frac_branch(const char *name, From cf5281ef53856c5c9e4e9b0728805416502e466a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Sun, 7 Feb 2016 22:13:03 +0100 Subject: [PATCH 661/797] clk: meson: Fix meson_clk_register_clks() signature type mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bb473593c8099302bfd7befc23de67df907e3a99 upstream. As preparation for arm64 based mesongxbb, which pulls in this code once enabling ARCH_MESON, fix a size_t vs. unsigned int type mismatch. The loop uses a local unsigned int variable, so adopt that type, matching the header. Fixes: 7a29a869434e ("clk: meson: Add support for Meson clock controller") Signed-off-by: Andreas Färber Acked-by: Carlo Caione Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/meson/clkc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/meson/clkc.c b/drivers/clk/meson/clkc.c index c83ae1367abc..d920d410b51d 100644 --- a/drivers/clk/meson/clkc.c +++ b/drivers/clk/meson/clkc.c @@ -198,7 +198,7 @@ meson_clk_register_fixed_rate(const struct clk_conf *clk_conf, } void __init meson_clk_register_clks(const struct clk_conf *clk_confs, - size_t nr_confs, + unsigned int nr_confs, void __iomem *clk_base) { unsigned int i; From faaf496612c39c8ca6d46fec5a6af78b85689f65 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 22 Feb 2016 11:43:39 +0000 Subject: [PATCH 662/797] clk: qcom: msm8960: fix ce3_core clk enable register commit 732d6913691848db9fabaa6a25b4d6fad10ddccf upstream. This patch corrects the enable register offset which is actually 0x36cc instead of 0x36c4 Signed-off-by: Srinivas Kandagatla Fixes: 5f775498bdc4 ("clk: qcom: Fully support apq8064 global clock control") Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/gcc-msm8960.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/gcc-msm8960.c b/drivers/clk/qcom/gcc-msm8960.c index 66c18bc97857..2c83c03309cb 100644 --- a/drivers/clk/qcom/gcc-msm8960.c +++ b/drivers/clk/qcom/gcc-msm8960.c @@ -2769,7 +2769,7 @@ static struct clk_branch ce3_core_clk = { .halt_reg = 0x2fdc, .halt_bit = 5, .clkr = { - .enable_reg = 0x36c4, + .enable_reg = 0x36cc, .enable_mask = BIT(4), .hw.init = &(struct clk_init_data){ .name = "ce3_core_clk", From 5a9a5671011a3732daf1d300d5205aaae82ee558 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 24 Feb 2016 09:39:11 +0100 Subject: [PATCH 663/797] clk: versatile: sp810: support reentrance commit ec7957a6aa0aaf981fb8356dc47a2cdd01cde03c upstream. Despite care take to allocate clocks state containers the SP810 driver actually just supports creating one instance: all clocks registered for every instance will end up with the exact same name and __clk_init() will fail. Rename the timclken<0> .. timclken to sp810__ so every clock on every instance gets a unique name. This is necessary for the RealView PBA8 which has two SP810 blocks: the second block will not register its clocks unless every clock on every instance is unique and results in boot logs like this: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at ../drivers/clk/versatile/clk-sp810.c:137 clk_sp810_of_setup+0x110/0x154() Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.5.0-rc2-00030-g352718fc39f6-dirty #225 Hardware name: ARM RealView Machine (Device Tree Support) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x84/0x9c) [] (dump_stack) from [] (warn_slowpath_common+0x74/0xb0) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [] (warn_slowpath_null) from [] (clk_sp810_of_setup+0x110/0x154) [] (clk_sp810_of_setup) from [] (of_clk_init+0x12c/0x1c8) [] (of_clk_init) from [] (time_init+0x20/0x2c) [] (time_init) from [] (start_kernel+0x244/0x3c4) [] (start_kernel) from [<7000807c>] (0x7000807c) ---[ end trace cb88537fdc8fa200 ]--- Cc: Michael Turquette Cc: Pawel Moll Fixes: 6e973d2c4385 "clk: vexpress: Add separate SP810 driver" Signed-off-by: Linus Walleij Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/versatile/clk-sp810.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/versatile/clk-sp810.c b/drivers/clk/versatile/clk-sp810.c index a1cdef6b0f90..897c36c1754a 100644 --- a/drivers/clk/versatile/clk-sp810.c +++ b/drivers/clk/versatile/clk-sp810.c @@ -92,6 +92,7 @@ static void __init clk_sp810_of_setup(struct device_node *node) int num = ARRAY_SIZE(parent_names); char name[12]; struct clk_init_data init; + static int instance; int i; bool deprecated; @@ -118,7 +119,7 @@ static void __init clk_sp810_of_setup(struct device_node *node) deprecated = !of_find_property(node, "assigned-clock-parents", NULL); for (i = 0; i < ARRAY_SIZE(sp810->timerclken); i++) { - snprintf(name, ARRAY_SIZE(name), "timerclken%d", i); + snprintf(name, sizeof(name), "sp810_%d_%d", instance, i); sp810->timerclken[i].sp810 = sp810; sp810->timerclken[i].channel = i; @@ -139,5 +140,6 @@ static void __init clk_sp810_of_setup(struct device_node *node) } of_clk_add_provider(node, clk_sp810_timerclken_of_get, sp810); + instance++; } CLK_OF_DECLARE(sp810, "arm,sp810", clk_sp810_of_setup); From 0d50da4683464e150961142341c69ea5a578974a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 1 Mar 2016 17:26:48 -0800 Subject: [PATCH 664/797] clk: qcom: msm8960: Fix ce3_src register offset commit 0f75e1a370fd843c9e508fc1ccf0662833034827 upstream. The offset seems to have been copied from the sata clk. Fix it so that enabling the crypto engine source clk works. Tested-by: Srinivas Kandagatla Tested-by: Bjorn Andersson Fixes: 5f775498bdc4 ("clk: qcom: Fully support apq8064 global clock control") Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/gcc-msm8960.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/gcc-msm8960.c b/drivers/clk/qcom/gcc-msm8960.c index 2c83c03309cb..bdc4b2d07a23 100644 --- a/drivers/clk/qcom/gcc-msm8960.c +++ b/drivers/clk/qcom/gcc-msm8960.c @@ -2753,7 +2753,7 @@ static struct clk_rcg ce3_src = { }, .freq_tbl = clk_tbl_ce3, .clkr = { - .enable_reg = 0x2c08, + .enable_reg = 0x36c0, .enable_mask = BIT(7), .hw.init = &(struct clk_init_data){ .name = "ce3_src", From 8e9a156140f9fb568ffcbdaaf390862bbfb09d83 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 14 Mar 2016 15:29:44 +0100 Subject: [PATCH 665/797] lpfc: fix misleading indentation commit aeb6641f8ebdd61939f462a8255b316f9bfab707 upstream. gcc-6 complains about the indentation of the lpfc_destroy_vport_work_array() call in lpfc_online(), which clearly doesn't look right: drivers/scsi/lpfc/lpfc_init.c: In function 'lpfc_online': drivers/scsi/lpfc/lpfc_init.c:2880:3: warning: statement is indented as if it were guarded by... [-Wmisleading-indentation] lpfc_destroy_vport_work_array(phba, vports); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/lpfc/lpfc_init.c:2863:2: note: ...this 'if' clause, but it is not if (vports != NULL) ^~ Looking at the patch that introduced this code, it's clear that the behavior is correct and the indentation is wrong. This fixes the indentation and adds curly braces around the previous if() block for clarity, as that is most likely what caused the code to be misindented in the first place. Signed-off-by: Arnd Bergmann Fixes: 549e55cd2a1b ("[SCSI] lpfc 8.2.2 : Fix locking around HBA's port_list") Reviewed-by: Sebastian Herbszt Reviewed-by: Hannes Reinecke Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_init.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index db9446c612da..b0d92b84bcdc 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -2855,7 +2855,7 @@ lpfc_online(struct lpfc_hba *phba) } vports = lpfc_create_vport_work_array(phba); - if (vports != NULL) + if (vports != NULL) { for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { struct Scsi_Host *shost; shost = lpfc_shost_from_vport(vports[i]); @@ -2872,7 +2872,8 @@ lpfc_online(struct lpfc_hba *phba) } spin_unlock_irq(shost->host_lock); } - lpfc_destroy_vport_work_array(phba, vports); + } + lpfc_destroy_vport_work_array(phba, vports); lpfc_unblock_mgmt_io(phba); return 0; From ee22885fc1fb49f98bb59e0825cc4ef3b1004f00 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 12 Apr 2016 19:37:44 +0200 Subject: [PATCH 666/797] ath9k: ar5008_hw_cmn_spur_mitigate: add missing mask_m & mask_p initialisation commit de478a61389cacafe94dc8b035081b681b878f9d upstream. by moving common code to ar5008_hw_cmn_spur_mitigate i forgot to move mask_m & mask_p initialisation. This coused a performance regression on ar9281. Fixes: f911085ffa88 ("ath9k: split ar5008_hw_spur_mitigate and reuse common code in ar9002_hw_spur_mitigate.") Reported-by: Gustav Frederiksen Tested-by: Gustav Frederiksen Signed-off-by: Oleksij Rempel Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ar5008_phy.c | 8 +++----- drivers/net/wireless/ath/ath9k/ar9002_phy.c | 5 ----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c index 8f8793004b9f..1b271b99c49e 100644 --- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c @@ -274,6 +274,9 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, }; static const int inc[4] = { 0, 100, 0, 0 }; + memset(&mask_m, 0, sizeof(int8_t) * 123); + memset(&mask_p, 0, sizeof(int8_t) * 123); + cur_bin = -6000; upper = bin + 100; lower = bin - 100; @@ -424,14 +427,9 @@ static void ar5008_hw_spur_mitigate(struct ath_hw *ah, int tmp, new; int i; - int8_t mask_m[123]; - int8_t mask_p[123]; int cur_bb_spur; bool is2GHz = IS_CHAN_2GHZ(chan); - memset(&mask_m, 0, sizeof(int8_t) * 123); - memset(&mask_p, 0, sizeof(int8_t) * 123); - for (i = 0; i < AR_EEPROM_MODAL_SPURS; i++) { cur_bb_spur = ah->eep_ops->get_spur_channel(ah, i, is2GHz); if (AR_NO_SPUR == cur_bb_spur) diff --git a/drivers/net/wireless/ath/ath9k/ar9002_phy.c b/drivers/net/wireless/ath/ath9k/ar9002_phy.c index db6624527d99..53d7445a5d12 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_phy.c @@ -178,14 +178,9 @@ static void ar9002_hw_spur_mitigate(struct ath_hw *ah, int i; struct chan_centers centers; - int8_t mask_m[123]; - int8_t mask_p[123]; int cur_bb_spur; bool is2GHz = IS_CHAN_2GHZ(chan); - memset(&mask_m, 0, sizeof(int8_t) * 123); - memset(&mask_p, 0, sizeof(int8_t) * 123); - ath9k_hw_get_channel_centers(ah, chan, ¢ers); freq = centers.synth_center; From fe9295e05bf878652e8d0e5caef53516d8de1789 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Apr 2016 13:47:08 +0200 Subject: [PATCH 667/797] mac80211: fix statistics leak if dev_alloc_name() fails commit e6436be21e77e3659b4ff7e357ab5a8342d132d2 upstream. In the case that dev_alloc_name() fails, e.g. because the name was given by the user and already exists, we need to clean up properly and free the per-CPU statistics. Fix that. Fixes: 5a490510ba5f ("mac80211: use per-CPU TX/RX statistics") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/iface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7a2b7915093b..bcb0a1b64556 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1750,7 +1750,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } @@ -1836,7 +1836,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = register_netdevice(ndev); if (ret) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } } From 8d2923930be15a5b295ace2029c76653dc4def13 Mon Sep 17 00:00:00 2001 From: Chunyu Hu Date: Tue, 3 May 2016 19:34:34 +0800 Subject: [PATCH 668/797] tracing: Don't display trigger file for events that can't be enabled commit 854145e0a8e9a05f7366d240e2f99d9c1ca6d6dd upstream. Currently register functions for events will be called through the 'reg' field of event class directly without any check when seting up triggers. Triggers for events that don't support register through debug fs (events under events/ftrace are for trace-cmd to read event format, and most of them don't have a register function except events/ftrace/functionx) can't be enabled at all, and an oops will be hit when setting up trigger for those events, so just not creating them is an easy way to avoid the oops. Link: http://lkml.kernel.org/r/1462275274-3911-1-git-send-email-chuhu@redhat.com Fixes: 85f2b08268c01 ("tracing: Add basic event trigger framework") Signed-off-by: Chunyu Hu Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d202d991edae..996f0fd34312 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2107,8 +2107,13 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) trace_create_file("filter", 0644, file->dir, file, &ftrace_event_filter_fops); - trace_create_file("trigger", 0644, file->dir, file, - &event_trigger_fops); + /* + * Only event directories that can be enabled should have + * triggers. + */ + if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) + trace_create_file("trigger", 0644, file->dir, file, + &event_trigger_fops); trace_create_file("format", 0444, file->dir, call, &ftrace_event_format_fops); From f3b51a03bea6dc4cda740481d48b2ff49abdced5 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 25 Apr 2016 16:52:38 -0700 Subject: [PATCH 669/797] MD: make bio mergeable commit 9c573de3283af007ea11c17bde1e4568d9417328 upstream. blk_queue_split marks bio unmergeable, which makes sense for normal bio. But if dispatching the bio to underlayer disk, the blk_queue_split checks are invalid, hence it's possible the bio becomes mergeable. In the reported bug, this bug causes trim against raid0 performance slash https://bugzilla.kernel.org/show_bug.cgi?id=117051 Reported-and-tested-by: Park Ju Hyung Fixes: 6ac45aeb6bca(block: avoid to merge splitted bio) Cc: Ming Lei Cc: Neil Brown Reviewed-by: Jens Axboe Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index b1e1f6b95782..c57fdf847b47 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -293,6 +293,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) * go away inside make_request */ sectors = bio_sectors(bio); + /* bio could be mergeable after passing to underlayer */ + bio->bi_rw &= ~REQ_NOMERGE; mddev->pers->make_request(mddev, bio); cpu = part_stat_lock(); From fe21a25e8c0cc97a080cc73c135e92ddce61a660 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 May 2016 12:46:42 -0700 Subject: [PATCH 670/797] Minimal fix-up of bad hashing behavior of hash_64() commit 689de1d6ca95b3b5bd8ee446863bf81a4883ea25 upstream. This is a fairly minimal fixup to the horribly bad behavior of hash_64() with certain input patterns. In particular, because the multiplicative value used for the 64-bit hash was intentionally bit-sparse (so that the multiply could be done with shifts and adds on architectures without hardware multipliers), some bits did not get spread out very much. In particular, certain fairly common bit ranges in the input (roughly bits 12-20: commonly with the most information in them when you hash things like byte offsets in files or memory that have block factors that mean that the low bits are often zero) would not necessarily show up much in the result. There's a bigger patch-series brewing to fix up things more completely, but this is the fairly minimal fix for the 64-bit hashing problem. It simply picks a much better constant multiplier, spreading the bits out a lot better. NOTE! For 32-bit architectures, the bad old hash_64() remains the same for now, since 64-bit multiplies are expensive. The bigger hashing cleanup will replace the 32-bit case with something better. The new constants were picked by George Spelvin who wrote that bigger cleanup series. I just picked out the constants and part of the comment from that series. Cc: George Spelvin Cc: Thomas Gleixner Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/hash.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/include/linux/hash.h b/include/linux/hash.h index 1afde47e1528..79c52fa81cac 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -32,12 +32,28 @@ #error Wordsize not 32 or 64 #endif +/* + * The above primes are actively bad for hashing, since they are + * too sparse. The 32-bit one is mostly ok, the 64-bit one causes + * real problems. Besides, the "prime" part is pointless for the + * multiplicative hash. + * + * Although a random odd number will do, it turns out that the golden + * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice + * properties. + * + * These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2. + * (See Knuth vol 3, section 6.4, exercise 9.) + */ +#define GOLDEN_RATIO_32 0x61C88647 +#define GOLDEN_RATIO_64 0x61C8864680B583EBull + static __always_inline u64 hash_64(u64 val, unsigned int bits) { u64 hash = val; -#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64 - hash = hash * GOLDEN_RATIO_PRIME_64; +#if BITS_PER_LONG == 64 + hash = hash * GOLDEN_RATIO_64; #else /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ u64 n = hash; From d27e2ddc40b632db8f84c4d3236e8191d0eabc69 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 5 May 2016 16:22:15 -0700 Subject: [PATCH 671/797] mm, cma: prevent nr_isolated_* counters from going negative commit 14af4a5e9b26ad251f81c174e8a43f3e179434a5 upstream. /proc/sys/vm/stat_refresh warns nr_isolated_anon and nr_isolated_file go increasingly negative under compaction: which would add delay when should be none, or no delay when should delay. The bug in compaction was due to a recent mmotm patch, but much older instance of the bug was also noticed in isolate_migratepages_range() which is used for CMA and gigantic hugepage allocations. The bug is caused by putback_movable_pages() in an error path decrementing the isolated counters without them being previously incremented by acct_isolated(). Fix isolate_migratepages_range() by removing the error-path putback, thus reaching acct_isolated() with migratepages still isolated, and leaving putback to caller like most other places do. Fixes: edc2ca612496 ("mm, compaction: move pageblock checks up from isolate_migratepages_range()") [vbabka@suse.cz: expanded the changelog] Signed-off-by: Hugh Dickins Signed-off-by: Vlastimil Babka Acked-by: Joonsoo Kim Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/compaction.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index de3e1e71cd9f..7881e072dc33 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -880,16 +880,8 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, pfn = isolate_migratepages_block(cc, pfn, block_end_pfn, ISOLATE_UNEVICTABLE); - /* - * In case of fatal failure, release everything that might - * have been isolated in the previous iteration, and signal - * the failure back to caller. - */ - if (!pfn) { - putback_movable_pages(&cc->migratepages); - cc->nr_migratepages = 0; + if (!pfn) break; - } if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) break; From 851375cc493de34a1443d85c46b026d8aeda715a Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 5 May 2016 16:22:23 -0700 Subject: [PATCH 672/797] mm/zswap: provide unique zpool name commit 32a4e169039927bfb6ee9f0ccbbe3a8aaf13a4bc upstream. Instead of using "zswap" as the name for all zpools created, add an atomic counter and use "zswap%x" with the counter number for each zpool created, to provide a unique name for each new zpool. As zsmalloc, one of the zpool implementations, requires/expects a unique name for each pool created, zswap should provide a unique name. The zsmalloc pool creation does not fail if a new pool with a conflicting name is created, unless CONFIG_ZSMALLOC_STAT is enabled; in that case, zsmalloc pool creation fails with -ENOMEM. Then zswap will be unable to change its compressor parameter if its zpool is zsmalloc; it also will be unable to change its zpool parameter back to zsmalloc, if it has any existing old zpool using zsmalloc with page(s) in it. Attempts to change the parameters will result in failure to create the zpool. This changes zswap to provide a unique name for each zpool creation. Fixes: f1c54846ee45 ("zswap: dynamic pool creation") Signed-off-by: Dan Streetman Reported-by: Sergey Senozhatsky Reviewed-by: Sergey Senozhatsky Cc: Dan Streetman Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/zswap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/zswap.c b/mm/zswap.c index bf14508afd64..340261946fda 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -170,6 +170,8 @@ static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; static LIST_HEAD(zswap_pools); /* protects zswap_pools list modification */ static DEFINE_SPINLOCK(zswap_pools_lock); +/* pool counter to provide unique names to zpool */ +static atomic_t zswap_pools_count = ATOMIC_INIT(0); /* used by param callback function */ static bool zswap_init_started; @@ -565,6 +567,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) static struct zswap_pool *zswap_pool_create(char *type, char *compressor) { struct zswap_pool *pool; + char name[38]; /* 'zswap' + 32 char (max) num + \0 */ gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; pool = kzalloc(sizeof(*pool), GFP_KERNEL); @@ -573,7 +576,10 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) return NULL; } - pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops); + /* unique name for each pool specifically required by zsmalloc */ + snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); + + pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops); if (!pool->zpool) { pr_err("%s zpool not available\n", type); goto error; From 63e9a60f4357e700a181980d424ffeaff32d5340 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 22 Apr 2016 09:26:52 +0200 Subject: [PATCH 673/797] ARM: EXYNOS: Properly skip unitialized parent clock in power domain on commit a0a966b83873f33778710a4fc59240244b0734a5 upstream. We want to skip reparenting a clock on turning on power domain, if we do not have the parent yet. The parent is obtained when turning the domain off. However due to a typo, the loop is continued on IS_ERR() of clock being reparented, not on the IS_ERR() of the parent. Theoretically this could lead to OOPS on first turn on of a power domain, if there was no turn off before. Practically that should never happen because all power domains are turned on by default (reset value, bootloader does not turn off them usually) so the first action will be always turn off. Fixes: 29e5eea06bc1 ("ARM: EXYNOS: Get current parent clock for power domain on/off") Reported-by: Vladimir Zapolskiy Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-exynos/pm_domains.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c index 7c21760f590f..875a2bab64f6 100644 --- a/arch/arm/mach-exynos/pm_domains.c +++ b/arch/arm/mach-exynos/pm_domains.c @@ -92,7 +92,7 @@ static int exynos_pd_power(struct generic_pm_domain *domain, bool power_on) if (IS_ERR(pd->clk[i])) break; - if (IS_ERR(pd->clk[i])) + if (IS_ERR(pd->pclk[i])) continue; /* Skip on first power up */ if (clk_set_parent(pd->clk[i], pd->pclk[i])) pr_err("%s: error setting parent to clock%d\n", From dd86efc570e528de7931f9eb039fa0c91bb3c1bd Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 20 Apr 2016 13:34:31 +0000 Subject: [PATCH 674/797] ARM: SoCFPGA: Fix secondary CPU startup in thumb2 kernel commit 5616f36713ea77f57ae908bf2fef641364403c9f upstream. The secondary CPU starts up in ARM mode. When the kernel is compiled in thumb2 mode we have to explicitly compile the secondary startup trampoline in ARM mode, otherwise the CPU will go to Nirvana. Signed-off-by: Sascha Hauer Reported-by: Steffen Trumtrar Suggested-by: Ard Biesheuvel Signed-off-by: Dinh Nguyen Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-socfpga/headsmp.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S index 5d94b7a2fb10..c160fa3007e9 100644 --- a/arch/arm/mach-socfpga/headsmp.S +++ b/arch/arm/mach-socfpga/headsmp.S @@ -13,6 +13,7 @@ #include .arch armv7-a + .arm ENTRY(secondary_trampoline) /* CPU1 will always fetch from 0x0 when it is brought out of reset. From 0f7ea0699ac02fb7c5d67e8eac8f8581912f4988 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 17 Mar 2016 16:51:59 +0000 Subject: [PATCH 675/797] xen: Fix page <-> pfn conversion on 32 bit systems commit 60901df3aed230d4565dca003f11b6a95fbf30d9 upstream. Commit 1084b1988d22dc165c9dbbc2b0e057f9248ac4db (xen: Add Xen specific page definition) caused a regression in 4.4. The xen functions to convert between pages and pfns fail due to an overflow on systems where a physical address may not fit in an unsigned long (e.g. x86 32 bit PAE systems). Rework the conversion to avoid overflow. This should also result in simpler object code. This bug manifested itself as disk corruption with Linux 4.4 when using blkfront in a Xen HVM x86 32 bit guest with more than 4 GiB of memory. Signed-off-by: Ross Lagerwall Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- include/xen/page.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/xen/page.h b/include/xen/page.h index 96294ac93755..9dc46cb8a0fd 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -15,9 +15,9 @@ */ #define xen_pfn_to_page(xen_pfn) \ - ((pfn_to_page(((unsigned long)(xen_pfn) << XEN_PAGE_SHIFT) >> PAGE_SHIFT))) + (pfn_to_page((unsigned long)(xen_pfn) >> (PAGE_SHIFT - XEN_PAGE_SHIFT))) #define page_to_xen_pfn(page) \ - (((page_to_pfn(page)) << PAGE_SHIFT) >> XEN_PAGE_SHIFT) + ((page_to_pfn(page)) << (PAGE_SHIFT - XEN_PAGE_SHIFT)) #define XEN_PFN_PER_PAGE (PAGE_SIZE / XEN_PAGE_SIZE) From 58d378b24b7f615b779ec20e7b7eebed8a3d8011 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 17 Mar 2016 16:52:00 +0000 Subject: [PATCH 676/797] xen/balloon: Fix crash when ballooning on x86 32 bit PAE commit dfd74a1edfaba5864276a2859190a8d242d18952 upstream. Commit 55b3da98a40dbb3776f7454daf0d95dde25c33d2 (xen/balloon: find non-conflicting regions to place hotplugged memory) caused a regression in 4.4. When ballooning on an x86 32 bit PAE system with close to 64 GiB of memory, the address returned by allocate_resource may be above 64 GiB. When using CONFIG_SPARSEMEM, this setup is limited to using physical addresses < 64 GiB. When adding memory at this address, it runs off the end of the mem_section array and causes a crash. Instead, fail the ballooning request. Signed-off-by: Ross Lagerwall Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/xen/balloon.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 12eab503efd1..364bc44610c1 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -152,6 +152,8 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq); static void balloon_process(struct work_struct *work); static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); +static void release_memory_resource(struct resource *resource); + /* When ballooning out (allocating memory to return to Xen) we don't really want the kernel to try too hard since that can trigger the oom killer. */ #define GFP_BALLOON \ @@ -268,6 +270,20 @@ static struct resource *additional_memory_resource(phys_addr_t size) return NULL; } +#ifdef CONFIG_SPARSEMEM + { + unsigned long limit = 1UL << (MAX_PHYSMEM_BITS - PAGE_SHIFT); + unsigned long pfn = res->start >> PAGE_SHIFT; + + if (pfn > limit) { + pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", + pfn, limit); + release_memory_resource(res); + return NULL; + } + } +#endif + return res; } From 11dc8042c691244a085c16396fce4a3a3e9bc186 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 4 May 2016 07:02:36 -0600 Subject: [PATCH 677/797] xen/evtchn: fix ring resize when binding new events commit 27e0e6385377c4dc68a4ddaf1a35a2dfa951f3c5 upstream. The copying of ring data was wrong for two cases: For a full ring nothing got copied at all (as in that case the canonicalized producer and consumer indexes are identical). And in case one or both of the canonicalized (after the resize) indexes would point into the second half of the buffer, the copied data ended up in the wrong (free) part of the new buffer. In both cases uninitialized data would get passed back to the caller. Fix this by simply copying the old ring contents twice: Once to the low half of the new buffer, and a second time to the high half. This addresses the inability to boot a HVM guest with 64 or more vCPUs. This regression was caused by 8620015499101090 (xen/evtchn: dynamically grow pending event channel ring). Reported-by: Konrad Rzeszutek Wilk Signed-off-by: Jan Beulich Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/xen/evtchn.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 38272ad24551..f4edd6df3df2 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -316,7 +316,6 @@ static int evtchn_resize_ring(struct per_user_data *u) { unsigned int new_size; evtchn_port_t *new_ring, *old_ring; - unsigned int p, c; /* * Ensure the ring is large enough to capture all possible @@ -346,20 +345,17 @@ static int evtchn_resize_ring(struct per_user_data *u) /* * Copy the old ring contents to the new ring. * - * If the ring contents crosses the end of the current ring, - * it needs to be copied in two chunks. + * To take care of wrapping, a full ring, and the new index + * pointing into the second half, simply copy the old contents + * twice. * * +---------+ +------------------+ - * |34567 12| -> | 1234567 | - * +-----p-c-+ +------------------+ + * |34567 12| -> |34567 1234567 12| + * +-----p-c-+ +-------c------p---+ */ - p = evtchn_ring_offset(u, u->ring_prod); - c = evtchn_ring_offset(u, u->ring_cons); - if (p < c) { - memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring)); - memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring)); - } else - memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring)); + memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring)); + memcpy(new_ring + u->ring_size, old_ring, + u->ring_size * sizeof(*u->ring)); u->ring = new_ring; u->ring_size = new_size; From bba1e81824e04c76c14fe614b3d440b1a4d65db9 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 12 Apr 2016 13:37:45 -0700 Subject: [PATCH 678/797] HID: wacom: Add support for DTK-1651 commit e1123fe975852cc0970b4e53ea65ca917e54c923 upstream. DTK-1651 is a display pen-only tablet Signed-off-by: Ping Cheng Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 3c0f47ac8e53..5c02d7bbc7f2 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -3449,6 +3449,10 @@ static const struct wacom_features wacom_features_0x33E = { "Wacom Intuos PT M 2", 21600, 13500, 2047, 63, INTUOSHT2, WACOM_INTUOS_RES, WACOM_INTUOS_RES, .touch_max = 16, .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; +static const struct wacom_features wacom_features_0x343 = + { "Wacom DTK1651", 34616, 19559, 1023, 0, + DTUS, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4, + WACOM_DTU_OFFSET, WACOM_DTU_OFFSET }; static const struct wacom_features wacom_features_HID_ANY_ID = { "Wacom HID", .type = HID_GENERIC }; @@ -3614,6 +3618,7 @@ const struct hid_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x33C) }, { USB_DEVICE_WACOM(0x33D) }, { USB_DEVICE_WACOM(0x33E) }, + { USB_DEVICE_WACOM(0x343) }, { USB_DEVICE_WACOM(0x4001) }, { USB_DEVICE_WACOM(0x4004) }, { USB_DEVICE_WACOM(0x5000) }, From 5844e4cdacc5e002dfceb2872352af20cff40742 Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Mon, 25 Apr 2016 17:01:56 +0300 Subject: [PATCH 679/797] HID: Fix boot delay for Creative SB Omni Surround 5.1 with quirk commit 567a44ecb44eb2584ddb93e962cfb133ce77e0bb upstream. Needed for v2 of the device firmware, otherwise kernel will stuck for few seconds and throw "usb_submit_urb(ctrl) failed: -1" early on system boot. Signed-off-by: Nazar Mokrynskyi Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 8b78a7f1f779..909ab0176ef2 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -255,6 +255,7 @@ #define USB_DEVICE_ID_CORSAIR_K90 0x1b02 #define USB_VENDOR_ID_CREATIVELABS 0x041e +#define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c #define USB_DEVICE_ID_PRODIKEYS_PCMIDI 0x2801 #define USB_VENDOR_ID_CVTOUCH 0x1ff7 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 7dd0953cd70f..dc8e6adf95a4 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -70,6 +70,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK, HID_QUIRK_NOGET }, { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_AXIS_295, HID_QUIRK_NOGET }, { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_ELAN, HID_ANY_ID, HID_QUIRK_ALWAYS_POLL }, From 5fd407ad088227ee030e93246cffe757541483f0 Mon Sep 17 00:00:00 2001 From: Knut Wohlrab Date: Mon, 25 Apr 2016 14:08:25 -0700 Subject: [PATCH 680/797] Input: zforce_ts - fix dual touch recognition commit 6984ab1ab35f422292b7781c65284038bcc0f6a6 upstream. A wrong decoding of the touch coordinate message causes a wrong touch ID. Touch ID for dual touch must be 0 or 1. According to the actual Neonode nine byte touch coordinate coding, the state is transported in the lower nibble and the touch ID in the higher nibble of payload byte five. Signed-off-by: Knut Wohlrab Signed-off-by: Oleksij Rempel Signed-off-by: Dirk Behme Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/zforce_ts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c index 9bbadaaf6bc3..7b3845aa5983 100644 --- a/drivers/input/touchscreen/zforce_ts.c +++ b/drivers/input/touchscreen/zforce_ts.c @@ -370,8 +370,8 @@ static int zforce_touch_event(struct zforce_ts *ts, u8 *payload) point.coord_x = point.coord_y = 0; } - point.state = payload[9 * i + 5] & 0x03; - point.id = (payload[9 * i + 5] & 0xfc) >> 2; + point.state = payload[9 * i + 5] & 0x0f; + point.id = (payload[9 * i + 5] & 0xf0) >> 4; /* determine touch major, minor and orientation */ point.area_major = max(payload[9 * i + 6], From 898149d10b855a0d0a5a9f8f05e4359970919eb9 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 5 May 2016 16:22:26 -0700 Subject: [PATCH 681/797] proc: prevent accessing /proc//environ until it's ready commit 8148a73c9901a8794a50f950083c00ccf97d43b3 upstream. If /proc//environ gets read before the envp[] array is fully set up in create_{aout,elf,elf_fdpic,flat}_tables(), we might end up trying to read more bytes than are actually written, as env_start will already be set but env_end will still be zero, making the range calculation underflow, allowing to read beyond the end of what has been written. Fix this as it is done for /proc//cmdline by testing env_end for zero. It is, apparently, intentionally set last in create_*_tables(). This bug was found by the PaX size_overflow plugin that detected the arithmetic underflow of 'this_len = env_end - (env_start + src)' when env_end is still zero. The expected consequence is that userland trying to access /proc//environ of a not yet fully set up process may get inconsistent data as we're in the middle of copying in the environment variables. Fixes: https://forums.grsecurity.net/viewtopic.php?f=3&t=4363 Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=116461 Signed-off-by: Mathias Krause Cc: Emese Revfy Cc: Pax Team Cc: Al Viro Cc: Mateusz Guzik Cc: Alexey Dobriyan Cc: Cyrill Gorcunov Cc: Jarod Wilson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index b7de324bec11..e8bbf6cdb437 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -954,7 +954,8 @@ static ssize_t environ_read(struct file *file, char __user *buf, int ret = 0; struct mm_struct *mm = file->private_data; - if (!mm) + /* Ensure the process spawned far enough to have an environment. */ + if (!mm || !mm->env_end) return 0; page = (char *)__get_free_page(GFP_TEMPORARY); From 24b8a175a66946ccb4ca227df52f517e1d8f5ef6 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 5 May 2016 16:22:12 -0700 Subject: [PATCH 682/797] mm: update min_free_kbytes from khugepaged after core initialization commit bc22af74f271ef76b2e6f72f3941f91f0da3f5f8 upstream. Khugepaged attempts to raise min_free_kbytes if its set too low. However, on boot khugepaged sets min_free_kbytes first from subsys_initcall(), and then the mm 'core' over-rides min_free_kbytes after from init_per_zone_wmark_min(), via a module_init() call. Khugepaged used to use a late_initcall() to set min_free_kbytes (such that it occurred after the core initialization), however this was removed when the initialization of min_free_kbytes was integrated into the starting of the khugepaged thread. The fix here is simply to invoke the core initialization using a core_initcall() instead of module_init(), such that the previous initialization ordering is restored. I didn't restore the late_initcall() since start_stop_khugepaged() already sets min_free_kbytes via set_recommended_min_free_kbytes(). This was noticed when we had a number of page allocation failures when moving a workload to a kernel with this new initialization ordering. On an 8GB system this restores min_free_kbytes back to 67584 from 11365 when CONFIG_TRANSPARENT_HUGEPAGE=y is set and either CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y or CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y. Fixes: 79553da293d3 ("thp: cleanup khugepaged startup") Signed-off-by: Jason Baron Acked-by: Kirill A. Shutemov Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c69531afbd8f..6cf5cadeaef7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6193,7 +6193,7 @@ int __meminit init_per_zone_wmark_min(void) setup_per_zone_inactive_ratio(); return 0; } -module_init(init_per_zone_wmark_min) +core_initcall(init_per_zone_wmark_min) /* * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so From f9d46494887e1494c3a6b40434ab425f74b15148 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sat, 12 Mar 2016 11:12:59 +0100 Subject: [PATCH 683/797] batman-adv: fix DAT candidate selection (must use vid) commit 2871734e85e920503d49b3a8bc0afbe0773b6036 upstream. Now that DAT is VLAN aware, it must use the VID when computing the DHT address of the candidate nodes where an entry is going to be stored/retrieved. Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware") Signed-off-by: Antonio Quartulli [sven@narfation.org: fix conflicts with current version] Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/distributed-arp-table.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index a49c705fb86b..5f19133c5530 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -553,6 +553,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT + * @vid: VLAN identifier * * An originator O is selected if and only if its DHT_ID value is one of three * closest values (from the LEFT, with wrap around if needed) then the hash @@ -561,7 +562,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * -batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) +batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, + unsigned short vid) { int select; batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; @@ -577,7 +579,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) return NULL; dat.ip = ip_dst; - dat.vid = 0; + dat.vid = vid; ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat, BATADV_DAT_ADDR_MAX); @@ -597,6 +599,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @ip: the DHT key + * @vid: VLAN identifier * @packet_subtype: unicast4addr packet subtype to use * * This function copies the skb with pskb_copy() and is sent as unicast packet @@ -607,7 +610,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, - int packet_subtype) + unsigned short vid, int packet_subtype) { int i; bool ret = false; @@ -616,7 +619,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *tmp_skb; struct batadv_dat_candidate *cand; - cand = batadv_dat_select_candidates(bat_priv, ip); + cand = batadv_dat_select_candidates(bat_priv, ip, vid); if (!cand) goto out; @@ -1004,7 +1007,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, ret = true; } else { /* Send the request to the DHT */ - ret = batadv_dat_send_data(bat_priv, skb, ip_dst, + ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_GET); } out: @@ -1132,8 +1135,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, /* Send the ARP reply to the candidates for both the IP addresses that * the node obtained from the ARP reply */ - batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); - batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); } /** From c6865db3a49a8f80052489fc6e1848df56f12ade Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 26 Feb 2016 17:56:13 +0100 Subject: [PATCH 684/797] batman-adv: Check skb size before using encapsulated ETH+VLAN header commit c78296665c3d81f040117432ab9e1cb125521b0c upstream. The encapsulated ethernet and VLAN header may be outside the received ethernet frame. Thus the skb buffer size has to be checked before it can be parsed to find out if it encapsulates another batman-adv packet. Fixes: 420193573f11 ("batman-adv: softif bridge loop avoidance") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/soft-interface.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index ac4d08de5df4..720f1a5b81ac 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -407,11 +407,17 @@ void batadv_interface_rx(struct net_device *soft_iface, */ nf_reset(skb); + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto dropped; + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, VLAN_ETH_HLEN)) + goto dropped; + vhdr = (struct vlan_ethhdr *)skb->data; if (vhdr->h_vlan_encapsulated_proto != ethertype) @@ -423,8 +429,6 @@ void batadv_interface_rx(struct net_device *soft_iface, } /* skb->dev & skb->pkt_type are set here */ - if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) - goto dropped; skb->protocol = eth_type_trans(skb, soft_iface); /* should not be necessary anymore as we use skb_pull_rcsum() From e426a835c1da093b2804825bfb4bfd503492e543 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 11 Mar 2016 14:04:49 +0100 Subject: [PATCH 685/797] batman-adv: Fix broadcast/ogm queue limit on a removed interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c4fdb6cff2aa0ae740c5f19b6f745cbbe786d42f upstream. When removing a single interface while a broadcast or ogm packet is still pending then we will free the forward packet without releasing the queue slots again. This patch is supposed to fix this issue. Fixes: 6d5808d4ae1b ("batman-adv: Add missing hardif_free_ref in forw_packet_free") Signed-off-by: Linus Lüssing [sven@narfation.org: fix conflicts with current version] Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/send.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index f664324805eb..0e0c3b8ed927 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -630,6 +630,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->bcast_queue_left); + batadv_forw_packet_free(forw_packet); } } @@ -657,6 +660,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->batman_queue_left); + batadv_forw_packet_free(forw_packet); } } From 639ddeaee4f4111d80699452de5b70db29e96054 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 20 Mar 2016 12:27:53 +0100 Subject: [PATCH 686/797] batman-adv: Reduce refcnt of removed router when updating route commit d1a65f1741bfd9c69f9e4e2ad447a89b6810427d upstream. _batadv_update_route rcu_derefences orig_ifinfo->router outside of a spinlock protected region to print some information messages to the debug log. But this pointer is not checked again when the new pointer is assigned in the spinlock protected region. Thus is can happen that the value of orig_ifinfo->router changed in the meantime and thus the reference counter of the wrong router gets reduced after the spinlock protected region. Just rcu_dereferencing the value of orig_ifinfo->router inside the spinlock protected region (which also set the new pointer) is enough to get the correct old router object. Fixes: e1a5382f978b ("batman-adv: Make orig_node->router an rcu protected pointer") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/routing.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 3207667e69de..d8a2f33e60e5 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -104,6 +104,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, neigh_node = NULL; spin_lock_bh(&orig_node->neigh_list_lock); + /* curr_router used earlier may not be the current orig_ifinfo->router + * anymore because it was dereferenced outside of the neigh_list_lock + * protected region. After the new best neighbor has replace the current + * best neighbor the reference counter needs to decrease. Consequently, + * the code needs to ensure the curr_router variable contains a pointer + * to the replaced best neighbor. + */ + curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + rcu_assign_pointer(orig_ifinfo->router, neigh_node); spin_unlock_bh(&orig_node->neigh_list_lock); batadv_orig_ifinfo_free_ref(orig_ifinfo); From 4bc9468f1680e799e3036a6e816ed9ecfc7d98a3 Mon Sep 17 00:00:00 2001 From: Howard Cochran Date: Thu, 10 Mar 2016 01:12:39 -0500 Subject: [PATCH 687/797] writeback: Fix performance regression in wb_over_bg_thresh() commit 74d369443325063a5f0260e63971decb950fd8fa upstream. Commit 947e9762a8dd ("writeback: update wb_over_bg_thresh() to use wb_domain aware operations") unintentionally changed this function's meaning from "are there more dirty pages than the background writeback threshold" to "are there more dirty pages than the writeback threshold". The background writeback threshold is typically half of the writeback threshold, so this had the effect of raising the number of dirty pages required to cause a writeback worker to perform background writeout. This can cause a very severe performance regression when a BDI uses BDI_CAP_STRICTLIMIT because balance_dirty_pages() and the writeback worker can now disagree on whether writeback should be initiated. For example, in a system having 1GB of RAM, a single spinning disk, and a "pass-through" FUSE filesystem mounted over the disk, application code mmapped a 128MB file on the disk and was randomly dirtying pages in that mapping. Because FUSE uses strictlimit and has a default max_ratio of only 1%, in balance_dirty_pages, thresh is ~200, bg_thresh is ~100, and the dirty_freerun_ceiling is the average of those, ~150. So, it pauses the dirtying processes when we have 151 dirty pages and wakes up a background writeback worker. But the worker tests the wrong threshold (200 instead of 100), so it does not initiate writeback and just returns. Thus, balance_dirty_pages keeps looping, sleeping and then waking up the worker who will do nothing. It remains stuck in this state until the few dirty pages that we have finally expire and we write them back for that reason. Then the whole process repeats, resulting in near-zero throughput through the FUSE BDI. The fix is to call the parameterized variant of wb_calc_thresh, so that the worker will do writeback if the bg_thresh is exceeded which was the behavior before the referenced commit. Fixes: 947e9762a8dd ("writeback: update wb_over_bg_thresh() to use wb_domain aware operations") Signed-off-by: Howard Cochran Acked-by: Tejun Heo Signed-off-by: Miklos Szeredi Tested-by Sedat Dilek Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- mm/page-writeback.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d15d88c8efa1..e40c9364582d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1899,7 +1899,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb) if (gdtc->dirty > gdtc->bg_thresh) return true; - if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(gdtc)) + if (wb_stat(wb, WB_RECLAIMABLE) > + wb_calc_thresh(gdtc->wb, gdtc->bg_thresh)) return true; if (mdtc) { @@ -1913,7 +1914,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb) if (mdtc->dirty > mdtc->bg_thresh) return true; - if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(mdtc)) + if (wb_stat(wb, WB_RECLAIMABLE) > + wb_calc_thresh(mdtc->wb, mdtc->bg_thresh)) return true; } From a7ebd7f5d87b33f36041239f3c2087a0572db4fb Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 3 May 2016 20:29:39 +0100 Subject: [PATCH 688/797] MAINTAINERS: Remove asterisk from EFI directory names commit e8dfe6d8f6762d515fcd4f30577f7bfcf7659887 upstream. Mark reported that having asterisks on the end of directory names confuses get_maintainer.pl when it encounters subdirectories, and that my name does not appear when run on drivers/firmware/efi/libstub. Reported-by: Mark Rutland Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1462303781-8686-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4c3e1d2ac31b..ab65bbecb159 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4097,8 +4097,8 @@ F: Documentation/efi-stub.txt F: arch/ia64/kernel/efi.c F: arch/x86/boot/compressed/eboot.[ch] F: arch/x86/include/asm/efi.h -F: arch/x86/platform/efi/* -F: drivers/firmware/efi/* +F: arch/x86/platform/efi/ +F: drivers/firmware/efi/ F: include/linux/efi*.h EFI VARIABLE FILESYSTEM From 73c1fd0aa105bdea4768f9a11c850574fb9091f9 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 6 May 2016 11:33:39 +0800 Subject: [PATCH 689/797] x86/tsc: Read all ratio bits from MSR_PLATFORM_INFO commit 886123fb3a8656699dff40afa0573df359abeb18 upstream. Currently we read the tsc radio: ratio = (MSR_PLATFORM_INFO >> 8) & 0x1f; Thus we get bit 8-12 of MSR_PLATFORM_INFO, however according to the SDM (35.5), the ratio bits are bit 8-15. Ignoring the upper bits can result in an incorrect tsc ratio, which causes the TSC calibration and the Local APIC timer frequency to be incorrect. Fix this problem by masking 0xff instead. [ tglx: Massaged changelog ] Fixes: 7da7c1561366 "x86, tsc: Add static (MSR) TSC calibration on Intel Atom SoCs" Signed-off-by: Chen Yu Cc: "Rafael J. Wysocki" Cc: Bin Gao Cc: Len Brown Link: http://lkml.kernel.org/r/1462505619-5516-1-git-send-email-yu.c.chen@intel.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc_msr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 92ae6acac8a7..6aa0f4d9eea6 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -92,7 +92,7 @@ unsigned long try_msr_calibrate_tsc(void) if (freq_desc_tables[cpu_index].msr_plat) { rdmsr(MSR_PLATFORM_INFO, lo, hi); - ratio = (lo >> 8) & 0x1f; + ratio = (lo >> 8) & 0xff; } else { rdmsr(MSR_IA32_PERF_STATUS, lo, hi); ratio = (hi >> 8) & 0x1f; From ddd5c3139de87400a6c6601ad3f54621e9d238fb Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Tue, 16 Feb 2016 11:45:33 -0800 Subject: [PATCH 690/797] fs/pnode.c: treat zero mnt_group_id-s as unequal commit 7ae8fd0351f912b075149a1e03a017be8b903b9a upstream. propagate_one(m) calculates "type" argument for copy_tree() like this: > if (m->mnt_group_id == last_dest->mnt_group_id) { > type = CL_MAKE_SHARED; > } else { > type = CL_SLAVE; > if (IS_MNT_SHARED(m)) > type |= CL_MAKE_SHARED; > } The "type" argument then governs clone_mnt() behavior with respect to flags and mnt_master of new mount. When we iterate through a slave group, it is possible that both current "m" and "last_dest" are not shared (although, both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison above erroneously makes new mount shared and sets its mnt_master to last_source->mnt_master. The patch fixes the problem by handling zero mnt_group_id-s as though they are unequal. The similar problem exists in the implementation of "else" clause above when we have to ascend upward in the master/slave tree by calling: > last_source = last_source->mnt_master; > last_dest = last_source->mnt_parent; proper number of times. The last step is governed by "n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if both are zero. The patch fixes this case in the same way as the former one. [AV: don't open-code an obvious helper...] Signed-off-by: Maxim Patlasov Signed-off-by: Al Viro Cc: Seth Forshee Signed-off-by: Greg Kroah-Hartman --- fs/pnode.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/pnode.c b/fs/pnode.c index 6367e1e435c6..c524fdddc7fb 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master; static struct mountpoint *mp; static struct hlist_head *list; +static inline bool peers(struct mount *m1, struct mount *m2) +{ + return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id; +} + static int propagate_one(struct mount *m) { struct mount *child; @@ -212,7 +217,7 @@ static int propagate_one(struct mount *m) /* skip if mountpoint isn't covered by it */ if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) return 0; - if (m->mnt_group_id == last_dest->mnt_group_id) { + if (peers(m, last_dest)) { type = CL_MAKE_SHARED; } else { struct mount *n, *p; @@ -223,7 +228,7 @@ static int propagate_one(struct mount *m) last_source = last_source->mnt_master; last_dest = last_source->mnt_parent; } - if (n->mnt_group_id != last_dest->mnt_group_id) { + if (!peers(n, last_dest)) { last_source = last_source->mnt_master; last_dest = last_source->mnt_parent; } From b17580a3cb901c56e9b9a3dea4d12153f5fc879e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 5 May 2016 09:29:29 -0500 Subject: [PATCH 691/797] propogate_mnt: Handle the first propogated copy being a slave commit 5ec0811d30378ae104f250bfc9b3640242d81e3f upstream. When the first propgated copy was a slave the following oops would result: > BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 > IP: [] propagate_one+0xbe/0x1c0 > PGD bacd4067 PUD bac66067 PMD 0 > Oops: 0000 [#1] SMP > Modules linked in: > CPU: 1 PID: 824 Comm: mount Not tainted 4.6.0-rc5userns+ #1523 > Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 > task: ffff8800bb0a8000 ti: ffff8800bac3c000 task.ti: ffff8800bac3c000 > RIP: 0010:[] [] propagate_one+0xbe/0x1c0 > RSP: 0018:ffff8800bac3fd38 EFLAGS: 00010283 > RAX: 0000000000000000 RBX: ffff8800bb77ec00 RCX: 0000000000000010 > RDX: 0000000000000000 RSI: ffff8800bb58c000 RDI: ffff8800bb58c480 > RBP: ffff8800bac3fd48 R08: 0000000000000001 R09: 0000000000000000 > R10: 0000000000001ca1 R11: 0000000000001c9d R12: 0000000000000000 > R13: ffff8800ba713800 R14: ffff8800bac3fda0 R15: ffff8800bb77ec00 > FS: 00007f3c0cd9b7e0(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000000000000010 CR3: 00000000bb79d000 CR4: 00000000000006e0 > Stack: > ffff8800bb77ec00 0000000000000000 ffff8800bac3fd88 ffffffff811fbf85 > ffff8800bac3fd98 ffff8800bb77f080 ffff8800ba713800 ffff8800bb262b40 > 0000000000000000 0000000000000000 ffff8800bac3fdd8 ffffffff811f1da0 > Call Trace: > [] propagate_mnt+0x105/0x140 > [] attach_recursive_mnt+0x120/0x1e0 > [] graft_tree+0x63/0x70 > [] do_add_mount+0x9b/0x100 > [] do_mount+0x2aa/0xdf0 > [] ? strndup_user+0x4e/0x70 > [] SyS_mount+0x75/0xc0 > [] do_syscall_64+0x4b/0xa0 > [] entry_SYSCALL64_slow_path+0x25/0x25 > Code: 00 00 75 ec 48 89 0d 02 22 22 01 8b 89 10 01 00 00 48 89 05 fd 21 22 01 39 8e 10 01 00 00 0f 84 e0 00 00 00 48 8b 80 d8 00 00 00 <48> 8b 50 10 48 89 05 df 21 22 01 48 89 15 d0 21 22 01 8b 53 30 > RIP [] propagate_one+0xbe/0x1c0 > RSP > CR2: 0000000000000010 > ---[ end trace 2725ecd95164f217 ]--- This oops happens with the namespace_sem held and can be triggered by non-root users. An all around not pleasant experience. To avoid this scenario when finding the appropriate source mount to copy stop the walk up the mnt_master chain when the first source mount is encountered. Further rewrite the walk up the last_source mnt_master chain so that it is clear what is going on. The reason why the first source mount is special is that it it's mnt_parent is not a mount in the dest_mnt propagation tree, and as such termination conditions based up on the dest_mnt mount propgation tree do not make sense. To avoid other kinds of confusion last_dest is not changed when computing last_source. last_dest is only used once in propagate_one and that is above the point of the code being modified, so changing the global variable is meaningless and confusing. fixes: f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 ("smarter propagate_mnt()") Reported-by: Tycho Andersen Reviewed-by: Seth Forshee Tested-by: Seth Forshee Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/pnode.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/pnode.c b/fs/pnode.c index c524fdddc7fb..99899705b105 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -198,7 +198,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin) /* all accesses are serialized by namespace_sem */ static struct user_namespace *user_ns; -static struct mount *last_dest, *last_source, *dest_master; +static struct mount *last_dest, *first_source, *last_source, *dest_master; static struct mountpoint *mp; static struct hlist_head *list; @@ -221,20 +221,22 @@ static int propagate_one(struct mount *m) type = CL_MAKE_SHARED; } else { struct mount *n, *p; + bool done; for (n = m; ; n = p) { p = n->mnt_master; - if (p == dest_master || IS_MNT_MARKED(p)) { - while (last_dest->mnt_master != p) { - last_source = last_source->mnt_master; - last_dest = last_source->mnt_parent; - } - if (!peers(n, last_dest)) { - last_source = last_source->mnt_master; - last_dest = last_source->mnt_parent; - } + if (p == dest_master || IS_MNT_MARKED(p)) break; - } } + do { + struct mount *parent = last_source->mnt_parent; + if (last_source == first_source) + break; + done = parent->mnt_master == p; + if (done && peers(n, parent)) + break; + last_source = last_source->mnt_master; + } while (!done); + type = CL_SLAVE; /* beginning of peer group among the slaves? */ if (IS_MNT_SHARED(m)) @@ -286,6 +288,7 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, */ user_ns = current->nsproxy->mnt_ns->user_ns; last_dest = dest_mnt; + first_source = source_mnt; last_source = source_mnt; mp = dest_mp; list = tree_list; From 303fa967e0a3cf1f9116ceb009d10e196f899142 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 26 Apr 2016 12:15:01 +0100 Subject: [PATCH 692/797] ARM: cpuidle: Pass on arm_cpuidle_suspend()'s return value commit 625fe4f8ffc1b915248558481bb94249f6bd411c upstream. arm_cpuidle_suspend() may return -EOPNOTSUPP, or any value returned by the cpu_ops/cpuidle_ops suspend call. arm_enter_idle_state() doesn't update 'ret' with this value, meaning we always signal success to cpuidle_enter_state(), causing it to update the usage counters as if we succeeded. Fixes: 191de17aa3c1 ("ARM64: cpuidle: Replace cpu_suspend by the common ARM/ARM64 function") Signed-off-by: James Morse Acked-by: Lorenzo Pieralisi Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/cpuidle-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index 545069d5fdfb..e342565e8715 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -50,7 +50,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev, * call the CPU ops suspend protocol with idle index as a * parameter. */ - arm_cpuidle_suspend(idx); + ret = arm_cpuidle_suspend(idx); cpu_pm_exit(); } From f0f21f80609c7e1da91e34face5b86547bd7401a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 May 2016 13:32:34 +0530 Subject: [PATCH 693/797] ARC: Add missing io barriers to io{read,write}{16,32}be() commit e5bc0478ab6cf565619224536d75ecb2aedca43b upstream. While reviewing a different change to asm-generic/io.h Arnd spotted that ARC ioread32 and ioread32be both of which come from asm-generic versions are not symmetrical in terms of calling the io barriers. generic ioread32 -> ARC readl() [ has barriers] generic ioread32be -> __be32_to_cpu(__raw_readl()) [ lacks barriers] While generic ioread32be is being remediated to call readl(), that involves a swab32(), causing double swaps on ioread32be() on Big Endian systems. So provide our versions of big endian IO accessors to ensure io barrier calls while also keeping them optimal Suggested-by: Arnd Bergmann Acked-by: Arnd Bergmann Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/io.h | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index 27b17adea50d..cb69299a492e 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -13,6 +13,15 @@ #include #include +#ifdef CONFIG_ISA_ARCV2 +#include +#define __iormb() rmb() +#define __iowmb() wmb() +#else +#define __iormb() do { } while (0) +#define __iowmb() do { } while (0) +#endif + extern void __iomem *ioremap(unsigned long physaddr, unsigned long size); extern void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, unsigned long flags); @@ -22,6 +31,15 @@ extern void iounmap(const void __iomem *addr); #define ioremap_wc(phy, sz) ioremap(phy, sz) #define ioremap_wt(phy, sz) ioremap(phy, sz) +/* + * io{read,write}{16,32}be() macros + */ +#define ioread16be(p) ({ u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; }) +#define ioread32be(p) ({ u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; }) + +#define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force u16)cpu_to_be16(v), p); }) +#define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force u32)cpu_to_be32(v), p); }) + /* Change struct page to physical address */ #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) @@ -99,15 +117,6 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) } -#ifdef CONFIG_ISA_ARCV2 -#include -#define __iormb() rmb() -#define __iowmb() wmb() -#else -#define __iormb() do { } while (0) -#define __iowmb() do { } while (0) -#endif - /* * MMIO can also get buffered/optimized in micro-arch, so barriers needed * Based on ARM model for the typical use case From ac8fc72dec814226cfcb96cbe3023b89cc386428 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Thu, 5 May 2016 14:14:21 +0100 Subject: [PATCH 694/797] x86/sysfb_efi: Fix valid BAR address range check commit c10fcb14c7afd6688c7b197a814358fecf244222 upstream. The code for checking whether a BAR address range is valid will break out of the loop when a start address of 0x0 is encountered. This behaviour is wrong since by breaking out of the loop we may miss the BAR that describes the EFI frame buffer in a later iteration. Because of this bug I can't use video=efifb: boot parameter to get efifb on my new ThinkPad E550 for my old linux system hard disk with 3.10 kernel. In 3.10, efifb is the only choice due to DRM/I915 not supporting the GPU. This patch also add a trivial optimization to break out after we find the frame buffer address range without testing later BARs. Signed-off-by: Wang YanQing [ Rewrote changelog. ] Signed-off-by: Matt Fleming Reviewed-by: Peter Jones Cc: Ard Biesheuvel Cc: David Herrmann Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1462454061-21561-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/sysfb_efi.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c index b285d4e8c68e..5da924bbf0a0 100644 --- a/arch/x86/kernel/sysfb_efi.c +++ b/arch/x86/kernel/sysfb_efi.c @@ -106,14 +106,24 @@ static int __init efifb_set_system(const struct dmi_system_id *id) continue; for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { resource_size_t start, end; + unsigned long flags; + + flags = pci_resource_flags(dev, i); + if (!(flags & IORESOURCE_MEM)) + continue; + + if (flags & IORESOURCE_UNSET) + continue; + + if (pci_resource_len(dev, i) == 0) + continue; start = pci_resource_start(dev, i); - if (start == 0) - break; end = pci_resource_end(dev, i); if (screen_info.lfb_base >= start && screen_info.lfb_base < end) { found_bar = 1; + break; } } } From ee3e27f14e40bc3c95a175af482d6bbf35ab78bc Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 4 May 2016 13:48:56 +0800 Subject: [PATCH 695/797] ACPICA: Dispatcher: Update thread ID for recursive method calls commit 93d68841a23a5779cef6fb9aa0ef32e7c5bd00da upstream. ACPICA commit 7a3bd2d962f221809f25ddb826c9e551b916eb25 Set the mutex owner thread ID. Original patch from: Prarit Bhargava Link: https://bugzilla.kernel.org/show_bug.cgi?id=115121 Link: https://github.com/acpica/acpica/commit/7a3bd2d9 Signed-off-by: Prarit Bhargava Tested-by: Andy Lutomirski # On a Dell XPS 13 9350 Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/dsmethod.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c index bc32f3194afe..28c50c6b5f45 100644 --- a/drivers/acpi/acpica/dsmethod.c +++ b/drivers/acpi/acpica/dsmethod.c @@ -417,6 +417,9 @@ acpi_ds_begin_method_execution(struct acpi_namespace_node *method_node, obj_desc->method.mutex->mutex. original_sync_level = obj_desc->method.mutex->mutex.sync_level; + + obj_desc->method.mutex->mutex.thread_id = + acpi_os_get_thread_id(); } } From beac678d0908ee0a14200e1412f98a89b765c0aa Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sat, 30 Apr 2016 08:29:27 +1000 Subject: [PATCH 696/797] powerpc: Fix bad inline asm constraint in create_zero_mask() commit b4c112114aab9aff5ed4568ca5e662bb02cdfe74 upstream. In create_zero_mask() we have: addi %1,%2,-1 andc %1,%1,%2 popcntd %0,%1 using the "r" constraint for %2. r0 is a valid register in the "r" set, but addi X,r0,X turns it into an li: li r7,-1 andc r7,r7,r0 popcntd r4,r7 Fix this by using the "b" constraint, for which r0 is not a valid register. This was found with a kernel build using gcc trunk, narrowed down to when -frename-registers was enabled at -O2. It is just luck however that we aren't seeing this on older toolchains. Thanks to Segher for working with me to find this issue. Fixes: d0cebfa650a0 ("powerpc: word-at-a-time optimization for 64-bit Little Endian") Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/word-at-a-time.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index e4396a7d0f7c..4afe66aa1400 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -82,7 +82,7 @@ static inline unsigned long create_zero_mask(unsigned long bits) "andc %1,%1,%2\n\t" "popcntd %0,%1" : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask) - : "r" (bits)); + : "b" (bits)); return leading_zero_bits; } From a7fa0a478a625039ef0852e5606d1248cba093e4 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 1 Apr 2016 08:52:56 +0100 Subject: [PATCH 697/797] libahci: save port map for forced port map commit 2fd0f46cb1b82587c7ae4a616d69057fb9bd0af7 upstream. In usecases where force_port_map is used saved_port_map is never set, resulting in not programming the PORTS_IMPL register as part of initial config. This patch fixes this by setting it to port_map even in case where force_port_map is used, making it more inline with other parts of the code. Fixes: 566d1827df2e ("libata: disable forced PORTS_IMPL for >= AHCI 1.3") Signed-off-by: Srinivas Kandagatla Acked-by: Tejun Heo Reviewed-by: Andy Gross Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libahci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 998c6a85ad89..9628fa131757 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -467,6 +467,7 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) dev_info(dev, "forcing port_map 0x%x -> 0x%x\n", port_map, hpriv->force_port_map); port_map = hpriv->force_port_map; + hpriv->saved_port_map = port_map; } if (hpriv->mask_port_map) { From 6e337a05df8adfc54540ca2a2b9d621836697796 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 1 Apr 2016 08:52:57 +0100 Subject: [PATCH 698/797] ata: ahci-platform: Add ports-implemented DT bindings. commit 17dcc37e3e847bc0e67a5b1ec52471fcc6c18682 upstream. On some SOCs PORTS_IMPL register value is never programmed by the firmware and left at zero value. Which means that no sata ports are available for software. AHCI driver used to cope up with this by fabricating the port_map if the PORTS_IMPL register is read zero, but recent patch broke this workaround as zero value was valid for NVMe disks. This patch adds ports-implemented DT bindings as workaround for this issue in a way that DT can can override the PORTS_IMPL register in cases where the firmware did not program it already. Fixes: 566d1827df2e ("libata: disable forced PORTS_IMPL for >= AHCI 1.3") Signed-off-by: Srinivas Kandagatla Acked-by: Tejun Heo Reviewed-by: Andy Gross Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/ata/ahci-platform.txt | 4 ++++ drivers/ata/ahci_platform.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt index c2340eeeb97f..c000832a7fb9 100644 --- a/Documentation/devicetree/bindings/ata/ahci-platform.txt +++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt @@ -30,6 +30,10 @@ Optional properties: - target-supply : regulator for SATA target power - phys : reference to the SATA PHY node - phy-names : must be "sata-phy" +- ports-implemented : Mask that indicates which ports that the HBA supports + are available for software to use. Useful if PORTS_IMPL + is not programmed by the BIOS, which is true with + some embedded SOC's. Required properties when using sub-nodes: - #address-cells : number of cells to encode an address diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 04975b851c23..639adb1f8abd 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -51,6 +51,9 @@ static int ahci_probe(struct platform_device *pdev) if (rc) return rc; + of_property_read_u32(dev->of_node, + "ports-implemented", &hpriv->force_port_map); + if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci")) hpriv->flags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ; From c8f8a515ae418498e73fff210d0b0c23e2193e6a Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Mon, 18 Apr 2016 12:13:23 +0000 Subject: [PATCH 699/797] USB: serial: cp210x: add ID for Link ECU commit 1d377f4d690637a0121eac8701f84a0aa1e69a69 upstream. The Link ECU is an aftermarket ECU computer for vehicles that provides full tuning abilities as well as datalogging and displaying capabilities via the USB to Serial adapter built into the device. Signed-off-by: Mike Manning Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index bdc0f2f24f19..7f45d00bf2ff 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -140,6 +140,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */ { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */ { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */ + { USB_DEVICE(0x12B8, 0xEC60) }, /* Link G4 ECU */ + { USB_DEVICE(0x12B8, 0xEC62) }, /* Link G4+ ECU */ { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */ { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */ { USB_DEVICE(0x166A, 0x0201) }, /* Clipsal 5500PACA C-Bus Pascal Automation Controller */ From e5dd50f5729d6c94a0732fdfacac6ad7a1c0eb64 Mon Sep 17 00:00:00 2001 From: Jasem Mutlaq Date: Tue, 19 Apr 2016 10:38:27 +0300 Subject: [PATCH 700/797] USB: serial: cp210x: add Straizona Focusers device ids commit 613ac23a46e10d4d4339febdd534fafadd68e059 upstream. Adding VID:PID for Straizona Focusers to cp210x driver. Signed-off-by: Jasem Mutlaq Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 7f45d00bf2ff..a2b43a6e7fa7 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -108,6 +108,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */ { USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */ { USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */ + { USB_DEVICE(0x10C4, 0x82F4) }, /* Starizona MicroTouch */ { USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */ { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */ { USB_DEVICE(0x10C4, 0x8382) }, /* Cygnal Integrated Products, Inc. */ @@ -117,6 +118,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */ { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */ { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ + { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */ { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ From 6e9544fb236325423d5066ebdeed577fb92be315 Mon Sep 17 00:00:00 2001 From: Stanislav Meduna Date: Mon, 2 May 2016 16:05:11 +0100 Subject: [PATCH 701/797] nvmem: mxs-ocotp: fix buffer overflow in read commit d1306eb675ad7a9a760b6b8e8e189824b8db89e7 upstream. This patch fixes the issue where the mxs_ocotp_read is reading the ocotp in reg_size steps but decrements the remaining size by 1. The number of iterations is thus four times higher, overwriting the area behind the output buffer. Fixes: c01e9a11ab6f ("nvmem: add driver for ocotp in i.MX23 and i.MX28") Tested-by: Stefan Wahren Signed-off-by: Stanislav Meduna Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/mxs-ocotp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvmem/mxs-ocotp.c b/drivers/nvmem/mxs-ocotp.c index 8ba19bba3156..2bb3c5799ac4 100644 --- a/drivers/nvmem/mxs-ocotp.c +++ b/drivers/nvmem/mxs-ocotp.c @@ -94,7 +94,7 @@ static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size, if (ret) goto close_banks; - while (val_size) { + while (val_size >= reg_size) { if ((offset < OCOTP_DATA_OFFSET) || (offset % 16)) { /* fill up non-data register */ *buf = 0; @@ -103,7 +103,7 @@ static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size, } buf++; - val_size--; + val_size -= reg_size; offset += reg_size; } From c04e6e9730e5613ae2d5bd75ead2493eee0dabde Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 27 Apr 2016 10:17:51 +0200 Subject: [PATCH 702/797] gpu: ipu-v3: Fix imx-ipuv3-crtc module autoloading commit 503fe87bd0a8346ba9d8b7f49115dcd0a4185226 upstream. If of_node is set before calling platform_device_add, the driver core will try to use of: modalias matching, which fails because the device tree nodes don't have a compatible property set. This patch fixes imx-ipuv3-crtc module autoloading by setting the of_node property only after the platform modalias is set. Fixes: 304e6be652e2 ("gpu: ipu-v3: Assign of_node of child platform devices to corresponding ports") Reported-by: Dennis Gilmore Signed-off-by: Philipp Zabel Tested-By: Dennis Gilmore Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/ipu-v3/ipu-common.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index a0e28f3a278d..0585fd2031dd 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -1068,7 +1068,6 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base) goto err_register; } - pdev->dev.of_node = of_node; pdev->dev.parent = dev; ret = platform_device_add_data(pdev, ®->pdata, @@ -1079,6 +1078,12 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base) platform_device_put(pdev); goto err_register; } + + /* + * Set of_node only after calling platform_device_add. Otherwise + * the platform:imx-ipuv3-crtc modalias won't be used. + */ + pdev->dev.of_node = of_node; } return 0; From 3d2ef4c1a725f185db7c25d186567f207813e74d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 2 May 2016 18:54:39 -0400 Subject: [PATCH 703/797] drm/amdgpu: make sure vertical front porch is at least 1 commit 0126d4b9a516256f2432ca0dc78ab293a8255378 upstream. hw doesn't like a 0 value. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 1e0bba29e167..1cd6de575305 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -298,6 +298,10 @@ bool amdgpu_atombios_encoder_mode_fixup(struct drm_encoder *encoder, && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2))) adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2; + /* vertical FP must be at least 1 */ + if (mode->crtc_vsync_start == mode->crtc_vdisplay) + adjusted_mode->crtc_vsync_start++; + /* get the native mode for scaling */ if (amdgpu_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) amdgpu_panel_mode_fixup(encoder, adjusted_mode); From a71718ded5b74876097822f31416e6210795879a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 3 May 2016 12:44:29 +1000 Subject: [PATCH 704/797] drm/amdgpu: set metadata pointer to NULL after freeing. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0092d3edcb23fcdb8cbe4159ba94a534290ff982 upstream. Without this there was a double free of the metadata, which ended up freeing the fd table for me here, and taking out the machine more often than not. I reproduced with X.org + modesetting DDX + latest llvm/mesa, also required using dri3. Reviewed-by: Christian König Signed-off-by: Dave Airlie Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b8fbbd7699e4..73628c7599e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -540,6 +540,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, if (!metadata_size) { if (bo->metadata_size) { kfree(bo->metadata); + bo->metadata = NULL; bo->metadata_size = 0; } return 0; From d3cd04a8a94ab3fc02eef4f861aac0f494b2366e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 4 Apr 2016 14:54:59 +0900 Subject: [PATCH 705/797] iio: ak8975: Fix NULL pointer exception on early interrupt commit 07d2390e36ee5b3265e9cc8305f2a106c8721e16 upstream. In certain probe conditions the interrupt came right after registering the handler causing a NULL pointer exception because of uninitialized waitqueue: $ udevadm trigger i2c-gpio i2c-gpio-1: using pins 143 (SDA) and 144 (SCL) i2c-gpio i2c-gpio-3: using pins 53 (SDA) and 52 (SCL) Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = e8b38000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM Modules linked in: snd_soc_i2s(+) i2c_gpio(+) snd_soc_idma snd_soc_s3c_dma snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer snd soundcore ac97_bus spi_s3c64xx pwm_samsung dwc2 exynos_adc phy_exynos_usb2 exynosdrm exynos_rng rng_core rtc_s3c CPU: 0 PID: 717 Comm: data-provider-m Not tainted 4.6.0-rc1-next-20160401-00011-g1b8d87473b9e-dirty #101 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) (...) (__wake_up_common) from [] (__wake_up+0x38/0x4c) (__wake_up) from [] (ak8975_irq_handler+0x28/0x30) (ak8975_irq_handler) from [] (handle_irq_event_percpu+0x88/0x140) (handle_irq_event_percpu) from [] (handle_irq_event+0x44/0x68) (handle_irq_event) from [] (handle_edge_irq+0xf0/0x19c) (handle_edge_irq) from [] (generic_handle_irq+0x24/0x34) (generic_handle_irq) from [] (exynos_eint_gpio_irq+0x50/0x68) (exynos_eint_gpio_irq) from [] (handle_irq_event_percpu+0x88/0x140) (handle_irq_event_percpu) from [] (handle_irq_event+0x44/0x68) (handle_irq_event) from [] (handle_fasteoi_irq+0xb4/0x194) (handle_fasteoi_irq) from [] (generic_handle_irq+0x24/0x34) (generic_handle_irq) from [] (__handle_domain_irq+0x5c/0xb4) (__handle_domain_irq) from [] (gic_handle_irq+0x54/0x94) (gic_handle_irq) from [] (__irq_usr+0x50/0x80) The bug was reproduced on exynos4412-trats2 (with a max77693 device also using i2c-gpio) after building max77693 as a module. Fixes: 94a6d5cf7caa ("iio:ak8975 Implement data ready interrupt handling") Signed-off-by: Krzysztof Kozlowski Tested-by: Gregor Boirie Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/ak8975.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index b13936dacc78..fd780bbcd07e 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -462,6 +462,8 @@ static int ak8975_setup_irq(struct ak8975_data *data) int rc; int irq; + init_waitqueue_head(&data->data_ready_queue); + clear_bit(0, &data->flags); if (client->irq) irq = client->irq; else @@ -477,8 +479,6 @@ static int ak8975_setup_irq(struct ak8975_data *data) return rc; } - init_waitqueue_head(&data->data_ready_queue); - clear_bit(0, &data->flags); data->eoc_irq = irq; return rc; From 0f5c3afc750715fb644d9b234a7b05afb11dfe54 Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Tue, 5 Apr 2016 15:03:48 +0200 Subject: [PATCH 706/797] iio: ak8975: fix maybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 05be8d4101d960bad271d32b4f6096af1ccb1534 upstream. If i2c_device_id *id is NULL and acpi_match_device returns NULL too, then chipset may be unitialized when accessing &ak_def_array[chipset] in ak8975_probe. Therefore initialize chipset to AK_MAX_TYPE, which will return an error when not changed. This patch fixes the following maybe-uninitialized warning: drivers/iio/magnetometer/ak8975.c: In function ‘ak8975_probe’: drivers/iio/magnetometer/ak8975.c:788:14: warning: ‘chipset’ may be used uninitialized in this function [-Wmaybe-uninitialized] data->def = &ak_def_array[chipset]; Signed-off-by: Richard Leitner Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/ak8975.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index fd780bbcd07e..f2a7f72f7aa6 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -732,7 +732,7 @@ static int ak8975_probe(struct i2c_client *client, int eoc_gpio; int err; const char *name = NULL; - enum asahi_compass_chipset chipset; + enum asahi_compass_chipset chipset = AK_MAX_TYPE; /* Grab and set up the supplied GPIO. */ if (client->dev.platform_data) From 6b5f7a680d9804f0f441229ce1278efe6f22f8a5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 2 May 2016 18:53:27 -0400 Subject: [PATCH 707/797] drm/radeon: make sure vertical front porch is at least 1 commit 3104b8128d4d646a574ed9d5b17c7d10752cd70b upstream. hw doesn't like a 0 value. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_encoders.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index adf74f4366bb..0b04b9282f56 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -310,6 +310,10 @@ static bool radeon_atom_mode_fixup(struct drm_encoder *encoder, && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2))) adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2; + /* vertical FP must be at least 1 */ + if (mode->crtc_vsync_start == mode->crtc_vdisplay) + adjusted_mode->crtc_vsync_start++; + /* get the native mode for scaling */ if (radeon_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) { radeon_panel_mode_fixup(encoder, adjusted_mode); From cf26f675dbd9369a2f28555a6d241208cdc71c6e Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 18 Apr 2016 10:04:21 +0300 Subject: [PATCH 708/797] drm/i915/ddi: Fix eDP VDD handling during booting and suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5eaa60c7109b40f17ac81090bc8b90482da76cd1 upstream. The driver's VDD on/off logic assumes that whenever the VDD is on we also hold an AUX power domain reference. Since BIOS can leave the VDD on during booting and resuming and on DDI platforms we won't take a corresponding power reference, the above assumption won't hold on those platforms and an eventual delayed VDD off work will do an extraneous AUX power domain put resulting in a refcount underflow. Fix this the same way we did this for non-DDI DP encoders: commit 6d93c0c41760c0 ("drm/i915: fix VDD state tracking after system resume") At the same time call the DP encoder suspend handler the same way as the non-DDI DP encoders do to flush any pending VDD off work. Leaving the work running may cause a HW access where we don't expect this (at a point where power domains are suspended already). While at it remove an unnecessary function call indirection. This fixed for me AUX refcount underflow problems on BXT during suspend/resume. CC: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1460963062-13211-4-git-send-email-imre.deak@intel.com (cherry picked from commit bf93ba67e9c05882f05b7ca2d773cfc8bf462c2a) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_ddi.c | 10 +++------- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- drivers/gpu/drm/i915/intel_drv.h | 2 ++ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 7e6158b889da..241252de7186 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -3188,12 +3188,6 @@ void intel_ddi_get_config(struct intel_encoder *encoder, intel_ddi_clock_get(encoder, pipe_config); } -static void intel_ddi_destroy(struct drm_encoder *encoder) -{ - /* HDMI has nothing special to destroy, so we can go with this. */ - intel_dp_encoder_destroy(encoder); -} - static bool intel_ddi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { @@ -3212,7 +3206,8 @@ static bool intel_ddi_compute_config(struct intel_encoder *encoder, } static const struct drm_encoder_funcs intel_ddi_funcs = { - .destroy = intel_ddi_destroy, + .reset = intel_dp_encoder_reset, + .destroy = intel_dp_encoder_destroy, }; static struct intel_connector * @@ -3284,6 +3279,7 @@ void intel_ddi_init(struct drm_device *dev, enum port port) intel_encoder->post_disable = intel_ddi_post_disable; intel_encoder->get_hw_state = intel_ddi_get_hw_state; intel_encoder->get_config = intel_ddi_get_config; + intel_encoder->suspend = intel_dp_encoder_suspend; intel_dig_port->port = port; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 78b8ec84d576..e55a82a99e7f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5035,7 +5035,7 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder) kfree(intel_dig_port); } -static void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) +void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); @@ -5077,7 +5077,7 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) edp_panel_vdd_schedule_off(intel_dp); } -static void intel_dp_encoder_reset(struct drm_encoder *encoder) +void intel_dp_encoder_reset(struct drm_encoder *encoder) { struct intel_dp *intel_dp; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0d00f07b7163..f34a219ec5c4 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1204,6 +1204,8 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, void intel_dp_start_link_train(struct intel_dp *intel_dp); void intel_dp_stop_link_train(struct intel_dp *intel_dp); void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); +void intel_dp_encoder_reset(struct drm_encoder *encoder); +void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); void intel_dp_encoder_destroy(struct drm_encoder *encoder); int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc); bool intel_dp_compute_config(struct intel_encoder *encoder, From fa26a3c6c25bceed402055e06c7e0a2e4e13ebe5 Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Wed, 20 Apr 2016 15:39:02 +0300 Subject: [PATCH 709/797] drm/i915: Fix eDP low vswing for Broadwell MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 992e7a41f9fcc7bcd10e7d346aee5ed7a2c241cb upstream. It was noticed on bug #94087 that module parameter i915.edp_vswing=2 that should override the VBT setting to use default voltage swing (400 mV) was not applied for Broadwell. This patch provides a fix for this by checking if default i.e. higher voltage swing is requested to be used and applies the DDI translations table for DP instead of eDP (low vswing) table. v2: Combine two if statements into one (Jani) v3: Change dev_priv->edp_low_vswing to use dev_priv->vbt.edp.low_vswing Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94087 Signed-off-by: Mika Kahola Link: http://patchwork.freedesktop.org/patch/msgid/1461155942-7749-1-git-send-email-mika.kahola@intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit 00983519214b61c1b9371ec2ed55a4dde773e384) [Jani: s/dev_priv->vbt.edp.low_vswing/dev_priv->edp_low_vswing/ to backport] Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_ddi.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 241252de7186..3c6b07683bd9 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -464,9 +464,17 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port, } else if (IS_BROADWELL(dev)) { ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - ddi_translations_edp = bdw_ddi_translations_edp; + + if (dev_priv->edp_low_vswing) { + ddi_translations_edp = bdw_ddi_translations_edp; + n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); + } else { + ddi_translations_edp = bdw_ddi_translations_dp; + n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + } + ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); + n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); hdmi_default_entry = 7; From bc631165a1b6583b3e96404fec4ddc8efb2f4392 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 20 Apr 2016 16:43:56 +0300 Subject: [PATCH 710/797] drm/i915: Make RPS EI/thresholds multiple of 25 on SNB-BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4ea3959018d09edfa36a9e7b5ccdbd4ec4b99e49 upstream. Somehow my SNB GT1 (Dell XPS 8300) gets very unhappy around GPU hangs if the RPS EI/thresholds aren't suitably aligned. It seems like scheduling/timer interupts stop working somehow and things get stuck eg. in usleep_range(). I bisected the problem down to commit 8a5864377b12 ("drm/i915/skl: Restructured the gen6_set_rps_thresholds function") I observed that before all the values were at least multiples of 25, but afterwards they are not. And rounding things up to the next multiple of 25 does seem to help, so lets' do that. I also tried roundup(..., 5) but that wasn't sufficient. Also I have no idea if we might need this sort of thing on gen9+ as well. These are the original EI/thresholds: LOW_POWER GEN6_RP_UP_EI 12500 GEN6_RP_UP_THRESHOLD 11800 GEN6_RP_DOWN_EI 25000 GEN6_RP_DOWN_THRESHOLD 21250 BETWEEN GEN6_RP_UP_EI 10250 GEN6_RP_UP_THRESHOLD 9225 GEN6_RP_DOWN_EI 25000 GEN6_RP_DOWN_THRESHOLD 18750 HIGH_POWER GEN6_RP_UP_EI 8000 GEN6_RP_UP_THRESHOLD 6800 GEN6_RP_DOWN_EI 25000 GEN6_RP_DOWN_THRESHOLD 15000 These are after 8a5864377b12: LOW_POWER GEN6_RP_UP_EI 12500 GEN6_RP_UP_THRESHOLD 11875 GEN6_RP_DOWN_EI 25000 GEN6_RP_DOWN_THRESHOLD 21250 BETWEEN GEN6_RP_UP_EI 10156 GEN6_RP_UP_THRESHOLD 9140 GEN6_RP_DOWN_EI 25000 GEN6_RP_DOWN_THRESHOLD 18750 HIGH_POWER GEN6_RP_UP_EI 7812 GEN6_RP_UP_THRESHOLD 6640 GEN6_RP_DOWN_EI 25000 GEN6_RP_DOWN_THRESHOLD 15000 And these are what we have after this patch: LOW_POWER GEN6_RP_UP_EI 12500 GEN6_RP_UP_THRESHOLD 11875 GEN6_RP_DOWN_EI 25000 GEN6_RP_DOWN_THRESHOLD 21250 BETWEEN GEN6_RP_UP_EI 10175 GEN6_RP_UP_THRESHOLD 9150 GEN6_RP_DOWN_EI 25000 GEN6_RP_DOWN_THRESHOLD 18750 HIGH_POWER GEN6_RP_UP_EI 7825 GEN6_RP_UP_THRESHOLD 6650 GEN6_RP_DOWN_EI 25000 GEN6_RP_DOWN_THRESHOLD 15000 Cc: Akash Goel Cc: Chris Wilson Testcase: igt/kms_pipe_crc_basic/hang-read-crc-pipe-B Fixes: 8a5864377b12 ("drm/i915/skl: Restructured the gen6_set_rps_thresholds function") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1461159836-9108-1-git-send-email-ville.syrjala@linux.intel.com Acked-by: Chris Wilson Reviewed-by: Patrik Jakobsson (cherry picked from commit 8a292d016d1cc4938ff14b4df25328230b08a408) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_reg.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bc7b8faba84d..7e461dca564c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2838,7 +2838,14 @@ enum skl_disp_power_wells { #define GEN6_RP_STATE_CAP (MCHBAR_MIRROR_BASE_SNB + 0x5998) #define BXT_RP_STATE_CAP 0x138170 -#define INTERVAL_1_28_US(us) (((us) * 100) >> 7) +/* + * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS + * 8300) freezing up around GPU hangs. Looks as if even + * scheduling/timer interrupts start misbehaving if the RPS + * EI/thresholds are "bad", leading to a very sluggish or even + * frozen machine. + */ +#define INTERVAL_1_28_US(us) roundup(((us) * 100) >> 7, 25) #define INTERVAL_1_33_US(us) (((us) * 3) >> 2) #define INTERVAL_0_833_US(us) (((us) * 6) / 5) #define GT_INTERVAL_FROM_US(dev_priv, us) (IS_GEN9(dev_priv) ? \ From 8e1001c5638e244ab9a2ddddf5466b05ddf6af77 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 21 Apr 2016 16:48:32 +0530 Subject: [PATCH 711/797] drm/i915: Fake HDMI live status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 60b3143c7cac7e8d2ca65c0b347466c5776395d1 upstream. This patch does the following: - Fakes live status of HDMI as connected (even if that's not). While testing certain (monitor + cable) combinations with various intel platforms, it seems that live status register doesn't work reliably on some older devices. So limit the live_status check for HDMI detection, only for platforms from gen7 onwards. V2: restrict faking live_status to certain platforms V3: (Ville) - keep the debug message for !live_status case - fix indentation of comment - remove "warning" from the debug message (Jani) - Change format of fix details in the commit message Fixes: 237ed86c693d ("drm/i915: Check live status before reading edid") Suggested-by: Ville Syrjala Signed-off-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/1461237606-16491-1-git-send-email-shashank.sharma@intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit 4f4a8185011773f7520d9916c6857db946e7f9d1) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_hdmi.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index e6c035b0fc1c..4b8ed9f2dabc 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1388,8 +1388,16 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) hdmi_to_dig_port(intel_hdmi)); } - if (!live_status) - DRM_DEBUG_KMS("Live status not up!"); + if (!live_status) { + DRM_DEBUG_KMS("HDMI live status down\n"); + /* + * Live status register is not reliable on all intel platforms. + * So consider live_status only for certain platforms, for + * others, read EDID to determine presence of sink. + */ + if (INTEL_INFO(dev_priv)->gen < 7 || IS_IVYBRIDGE(dev_priv)) + live_status = true; + } intel_hdmi_unset_edid(connector); From dfa11d586248a21ce2c7fae02c02964c3a4a8379 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 23 Mar 2016 21:07:39 -0700 Subject: [PATCH 712/797] ACPI / processor: Request native thermal interrupt handling via _OSC commit a21211672c9a1d730a39aa65d4a5b3414700adfb upstream. There are several reports of freeze on enabling HWP (Hardware PStates) feature on Skylake-based systems by the Intel P-states driver. The root cause is identified as the HWP interrupts causing BIOS code to freeze. HWP interrupts use the thermal LVT which can be handled by Linux natively, but on the affected Skylake-based systems SMM will respond to it by default. This is a problem for several reasons: - On the affected systems the SMM thermal LVT handler is broken (it will crash when invoked) and a BIOS update is necessary to fix it. - With thermal interrupt handled in SMM we lose all of the reporting features of the arch/x86/kernel/cpu/mcheck/therm_throt driver. - Some thermal drivers like x86-package-temp depend on the thermal threshold interrupts signaled via the thermal LVT. - The HWP interrupts are useful for debugging and tuning performance (if the kernel can handle them). The native handling of thermal interrupts needs to be enabled because of that. This requires some way to tell SMM that the OS can handle thermal interrupts. That can be done by using _OSC/_PDC in processor scope very early during ACPI initialization. The meaning of _OSC/_PDC bit 12 in processor scope is whether or not the OS supports native handling of interrupts for Collaborative Processor Performance Control (CPPC) notifications. Since on HWP-capable systems CPPC is a firmware interface to HWP, setting this bit effectively tells the firmware that the OS will handle thermal interrupts natively going forward. For details on _OSC/_PDC refer to: http://www.intel.com/content/www/us/en/standards/processor-vendor-specific-acpi-specification.html To implement the _OSC/_PDC handshake as described, introduce a new function, acpi_early_processor_osc(), that walks the ACPI namespace looking for ACPI processor objects and invokes _OSC for them with bit 12 in the capabilities buffer set and terminates the namespace walk on the first success. Also modify intel_thermal_interrupt() to clear HWP status bits in the HWP_STATUS MSR to acknowledge HWP interrupts (which prevents them from firing continuously). Signed-off-by: Srinivas Pandruvada [ rjw: Subject & changelog, function rename ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 3 ++ drivers/acpi/acpi_processor.c | 52 ++++++++++++++++++++++++ drivers/acpi/bus.c | 3 ++ drivers/acpi/internal.h | 6 +++ 4 files changed, 64 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 2c5aaf8c2e2f..05538582a809 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -385,6 +385,9 @@ static void intel_thermal_interrupt(void) { __u64 msr_val; + if (static_cpu_has(X86_FEATURE_HWP)) + wrmsrl_safe(MSR_HWP_STATUS, 0); + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); /* Check for violation of core thermal thresholds*/ diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 6979186dbd4b..9f77943653fb 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -491,6 +491,58 @@ static void acpi_processor_remove(struct acpi_device *device) } #endif /* CONFIG_ACPI_HOTPLUG_CPU */ +#ifdef CONFIG_X86 +static bool acpi_hwp_native_thermal_lvt_set; +static acpi_status __init acpi_hwp_native_thermal_lvt_osc(acpi_handle handle, + u32 lvl, + void *context, + void **rv) +{ + u8 sb_uuid_str[] = "4077A616-290C-47BE-9EBD-D87058713953"; + u32 capbuf[2]; + struct acpi_osc_context osc_context = { + .uuid_str = sb_uuid_str, + .rev = 1, + .cap.length = 8, + .cap.pointer = capbuf, + }; + + if (acpi_hwp_native_thermal_lvt_set) + return AE_CTRL_TERMINATE; + + capbuf[0] = 0x0000; + capbuf[1] = 0x1000; /* set bit 12 */ + + if (ACPI_SUCCESS(acpi_run_osc(handle, &osc_context))) { + if (osc_context.ret.pointer && osc_context.ret.length > 1) { + u32 *capbuf_ret = osc_context.ret.pointer; + + if (capbuf_ret[1] & 0x1000) { + acpi_handle_info(handle, + "_OSC native thermal LVT Acked\n"); + acpi_hwp_native_thermal_lvt_set = true; + } + } + kfree(osc_context.ret.pointer); + } + + return AE_OK; +} + +void __init acpi_early_processor_osc(void) +{ + if (boot_cpu_has(X86_FEATURE_HWP)) { + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, + acpi_hwp_native_thermal_lvt_osc, + NULL, NULL, NULL); + acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, + acpi_hwp_native_thermal_lvt_osc, + NULL, NULL); + } +} +#endif + /* * The following ACPI IDs are known to be suitable for representing as * processor devices. diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index a212cefae524..ca4f28432d87 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1004,6 +1004,9 @@ static int __init acpi_bus_init(void) goto error1; } + /* Set capability bits for _OSC under processor scope */ + acpi_early_processor_osc(); + /* * _OSC method may exist in module level code, * so it must be run after ACPI_FULL_INITIALIZATION diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 11d87bf67e73..0f3f41c13b38 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -130,6 +130,12 @@ void acpi_early_processor_set_pdc(void); static inline void acpi_early_processor_set_pdc(void) {} #endif +#ifdef CONFIG_X86 +void acpi_early_processor_osc(void); +#else +static inline void acpi_early_processor_osc(void) {} +#endif + /* -------------------------------------------------------------------------- Embedded Controller -------------------------------------------------------------------------- */ From f6ff7398220d7fda0f4d02b9c9755406d8169bc2 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 2 Feb 2016 16:57:18 -0800 Subject: [PATCH 713/797] lib/test-string_helpers.c: fix and improve string_get_size() tests commit 72676bb53f33fd0ef3a1484fc1ecfd306dc6ff40 upstream. Recently added commit 564b026fbd0d ("string_helpers: fix precision loss for some inputs") fixed precision issues for string_get_size() and broke tests. Fix and improve them: test both STRING_UNITS_2 and STRING_UNITS_10 at a time, better failure reporting, test small an huge values. Fixes: 564b026fbd0d28e9 ("string_helpers: fix precision loss for some inputs") Signed-off-by: Vitaly Kuznetsov Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: James Bottomley Cc: James Bottomley Cc: "James E.J. Bottomley" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/test-string_helpers.c | 67 ++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index 98866a770770..25b5cbfb7615 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c @@ -327,36 +327,67 @@ static __init void test_string_escape(const char *name, } #define string_get_size_maxbuf 16 -#define test_string_get_size_one(size, blk_size, units, exp_result) \ +#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2) \ do { \ - BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf); \ - __test_string_get_size((size), (blk_size), (units), \ - (exp_result)); \ + BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf); \ + BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf); \ + __test_string_get_size((size), (blk_size), (exp_result10), \ + (exp_result2)); \ } while (0) -static __init void __test_string_get_size(const u64 size, const u64 blk_size, - const enum string_size_units units, - const char *exp_result) +static __init void test_string_get_size_check(const char *units, + const char *exp, + char *res, + const u64 size, + const u64 blk_size) { - char buf[string_get_size_maxbuf]; - - string_get_size(size, blk_size, units, buf, sizeof(buf)); - if (!memcmp(buf, exp_result, strlen(exp_result) + 1)) + if (!memcmp(res, exp, strlen(exp) + 1)) return; - buf[sizeof(buf) - 1] = '\0'; - pr_warn("Test 'test_string_get_size_one' failed!\n"); - pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n", + res[string_get_size_maxbuf - 1] = '\0'; + + pr_warn("Test 'test_string_get_size' failed!\n"); + pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n", size, blk_size, units); - pr_warn("expected: '%s', got '%s'\n", exp_result, buf); + pr_warn("expected: '%s', got '%s'\n", exp, res); +} + +static __init void __test_string_get_size(const u64 size, const u64 blk_size, + const char *exp_result10, + const char *exp_result2) +{ + char buf10[string_get_size_maxbuf]; + char buf2[string_get_size_maxbuf]; + + string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10)); + string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2)); + + test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10, + size, blk_size); + + test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2, + size, blk_size); } static __init void test_string_get_size(void) { - test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB"); - test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB"); - test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B"); + /* small values */ + test_string_get_size_one(0, 512, "0 B", "0 B"); + test_string_get_size_one(1, 512, "512 B", "512 B"); + test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB"); + + /* normal values */ + test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB"); + test_string_get_size_one(500118192, 512, "256 GB", "238 GiB"); + test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB"); + + /* weird block sizes */ + test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB"); + + /* huge values */ + test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB"); + test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB"); } static int __init test_string_helpers_init(void) From 945b6ec05a475fc80bcb79ef006ee5c0263c7b3a Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 28 Jan 2016 15:19:23 -0800 Subject: [PATCH 714/797] drm/i915/skl: Fix DMC load on Skylake J0 and K0 commit a41c8882592fb80458959b10e37632ce030b68ca upstream. The driver does not load firmware for unknown steppings, so these new steppings must be added to the list. Cc: Rodrigo Vivi Signed-off-by: Mat Martineau Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1454023163-25469-1-git-send-email-mathew.j.martineau@linux.intel.com Cc: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_csr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 9e530a739354..fc28c512ece3 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -180,7 +180,8 @@ struct stepping_info { static const struct stepping_info skl_stepping_info[] = { {'A', '0'}, {'B', '0'}, {'C', '0'}, {'D', '0'}, {'E', '0'}, {'F', '0'}, - {'G', '0'}, {'H', '0'}, {'I', '0'} + {'G', '0'}, {'H', '0'}, {'I', '0'}, + {'J', '0'}, {'K', '0'} }; static struct stepping_info bxt_stepping_info[] = { From 4c2795dd50f98fa162cb53190eb557be44f92f58 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 May 2016 11:23:26 +0200 Subject: [PATCH 715/797] Linux 4.4.10 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0722cdf52152..5b5f462f834c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 9 +SUBLEVEL = 10 EXTRAVERSION = NAME = Blurry Fish Butt From 32b06020f36dd2dcfd7832ffd34a84f254b14e46 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Jan 2016 13:28:57 +0100 Subject: [PATCH 716/797] arm64: hide __efistub_ aliases from kallsyms Commit e8f3010f7326 ("arm64/efi: isolate EFI stub from the kernel proper") isolated the EFI stub code from the kernel proper by prefixing all of its symbols with __efistub_, and selectively allowing access to core kernel symbols from the stub by emitting __efistub_ aliases for functions and variables that the stub can access legally. As an unintended side effect, these aliases are emitted into the kallsyms symbol table, which means they may turn up in backtraces, e.g., ... PC is at __efistub_memset+0x108/0x200 LR is at fixup_init+0x3c/0x48 ... [] __efistub_memset+0x108/0x200 [] free_initmem+0x2c/0x40 [] kernel_init+0x20/0xe0 [] ret_from_fork+0x10/0x40 The backtrace in question has nothing to do with the EFI stub, but simply returns one of the several aliases of memset() that have been recorded in the kallsyms table. This is undesirable, since it may suggest to people who are not aware of this that the issue they are seeing is somehow EFI related. So hide the __efistub_ aliases from kallsyms, by emitting them as absolute linker symbols explicitly. The distinction between those and section relative symbols is completely irrelevant to these definitions, and to the final link we are performing when these definitions are being taken into account (the distinction is only relevant to symbols defined inside a section definition when performing a partial link), and so the resulting values are identical to the original ones. Since absolute symbols are ignored by kallsyms, this will result in these values to be omitted from its symbol table. After this patch, the backtrace generated from the same address looks like this: ... PC is at __memset+0x108/0x200 LR is at fixup_init+0x3c/0x48 ... [] __memset+0x108/0x200 [] free_initmem+0x2c/0x40 [] kernel_init+0x20/0xe0 [] ret_from_fork+0x10/0x40 Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon (cherry picked from commit 75feee3d9d51775072d3a04f47d4a439a4c4590e) Signed-off-by: Alex Shi --- arch/arm64/kernel/image.h | 40 ++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index bc2abb8b1599..999633bd7294 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -64,6 +64,16 @@ #ifdef CONFIG_EFI +/* + * Prevent the symbol aliases below from being emitted into the kallsyms + * table, by forcing them to be absolute symbols (which are conveniently + * ignored by scripts/kallsyms) rather than section relative symbols. + * The distinction is only relevant for partial linking, and only for symbols + * that are defined within a section declaration (which is not the case for + * the definitions below) so the resulting values will be identical. + */ +#define KALLSYMS_HIDE(sym) ABSOLUTE(sym) + /* * The EFI stub has its own symbol namespace prefixed by __efistub_, to * isolate it from the kernel proper. The following symbols are legally @@ -73,25 +83,25 @@ * linked at. The routines below are all implemented in assembler in a * position independent manner */ -__efistub_memcmp = __pi_memcmp; -__efistub_memchr = __pi_memchr; -__efistub_memcpy = __pi_memcpy; -__efistub_memmove = __pi_memmove; -__efistub_memset = __pi_memset; -__efistub_strlen = __pi_strlen; -__efistub_strcmp = __pi_strcmp; -__efistub_strncmp = __pi_strncmp; -__efistub___flush_dcache_area = __pi___flush_dcache_area; +__efistub_memcmp = KALLSYMS_HIDE(__pi_memcmp); +__efistub_memchr = KALLSYMS_HIDE(__pi_memchr); +__efistub_memcpy = KALLSYMS_HIDE(__pi_memcpy); +__efistub_memmove = KALLSYMS_HIDE(__pi_memmove); +__efistub_memset = KALLSYMS_HIDE(__pi_memset); +__efistub_strlen = KALLSYMS_HIDE(__pi_strlen); +__efistub_strcmp = KALLSYMS_HIDE(__pi_strcmp); +__efistub_strncmp = KALLSYMS_HIDE(__pi_strncmp); +__efistub___flush_dcache_area = KALLSYMS_HIDE(__pi___flush_dcache_area); #ifdef CONFIG_KASAN -__efistub___memcpy = __pi_memcpy; -__efistub___memmove = __pi_memmove; -__efistub___memset = __pi_memset; +__efistub___memcpy = KALLSYMS_HIDE(__pi_memcpy); +__efistub___memmove = KALLSYMS_HIDE(__pi_memmove); +__efistub___memset = KALLSYMS_HIDE(__pi_memset); #endif -__efistub__text = _text; -__efistub__end = _end; -__efistub__edata = _edata; +__efistub__text = KALLSYMS_HIDE(_text); +__efistub__end = KALLSYMS_HIDE(_end); +__efistub__edata = KALLSYMS_HIDE(_edata); #endif From b87cf8adbe0d3b1998a7dafd01b70e9b118f641d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 15 Jan 2016 16:55:37 -0800 Subject: [PATCH 717/797] arch/arm64/include/asm/pgtable.h: add pmd_mkclean for THP MADV_FREE needs pmd_dirty and pmd_mkclean for detecting recent overwrite of the contents since MADV_FREE syscall is called for THP page. This patch adds pmd_mkclean for THP page MADV_FREE support. Signed-off-by: Minchan Kim Cc: "James E.J. Bottomley" Cc: "Kirill A. Shutemov" Cc: Shaohua Li Cc: Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Gang Cc: Chris Zankel Cc: Daniel Micay Cc: Darrick J. Wong Cc: David S. Miller Cc: Helge Deller Cc: Hugh Dickins Cc: Ivan Kokshaysky Cc: Jason Evans Cc: Johannes Weiner Cc: KOSAKI Motohiro Cc: Kirill A. Shutemov Cc: Matt Turner Cc: Max Filippov Cc: Mel Gorman Cc: Michael Kerrisk Cc: Michal Hocko Cc: Mika Penttil Cc: Ralf Baechle Cc: Richard Henderson Cc: Rik van Riel Cc: Roland Dreier Cc: Russell King Cc: Shaohua Li Cc: Will Deacon Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 05ee26d9e7e29ab026995eab79be3c6e8351908c) Signed-off-by: Alex Shi --- arch/arm64/include/asm/pgtable.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 76ff5d93c6c3..2daf88970731 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -369,6 +369,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, #define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) #define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK)) From 035fdc46d48ae8a7cbf7199c74bac1de36cca626 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 11 Jan 2016 14:50:21 +0100 Subject: [PATCH 718/797] arm64: kasan: ensure that the KASAN zero page is mapped read-only When switching from the early KASAN shadow region, which maps the entire shadow space read-write, to the permanent KASAN shadow region, which uses a zero page to shadow regions that are not subject to instrumentation, the lowest level table kasan_zero_pte[] may be reused unmodified, which means that the mappings of the zero page that it contains will still be read-write. So update it explicitly to map the zero page read only when we activate the permanent mapping. Acked-by: Andrey Ryabinin Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon (cherry picked from commit 7b1af9795773d745c2a8c7d4ca5f2936e8b6adfb) Signed-off-by: Alex Shi --- arch/arm64/mm/kasan_init.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index cf038c7d9fa9..cab7a5be40aa 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1) void __init kasan_init(void) { struct memblock_region *reg; + int i; /* * We are going to perform proper setup of shadow memory. @@ -155,6 +156,14 @@ void __init kasan_init(void) pfn_to_nid(virt_to_pfn(start))); } + /* + * KAsan may reuse the contents of kasan_zero_pte directly, so we + * should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(&kasan_zero_pte[i], + pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO)); + memset(kasan_zero_page, 0, PAGE_SIZE); cpu_set_ttbr1(__pa(swapper_pg_dir)); flush_tlb_all(); From 8c226342ae45c7a2029af59cf81edc9147a60d56 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sun, 24 Jan 2016 15:24:12 +0900 Subject: [PATCH 719/797] arm64: Fix an enum typo in mm/dump.c This patch fixes a typo in mm/dump.c: "MODUELS_END_NR" should be "MODULES_END_NR". Signed-off-by: Masanari Iida Signed-off-by: Will Deacon (cherry picked from commit b3122023df935cf14bf951da98ca598d71b9f826) Signed-off-by: Alex Shi --- arch/arm64/mm/dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 5a22a119a74c..0adbebbc2803 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -46,7 +46,7 @@ enum address_markers_idx { PCI_START_NR, PCI_END_NR, MODULES_START_NR, - MODUELS_END_NR, + MODULES_END_NR, KERNEL_SPACE_NR, }; From d2779548f7c7156686d4d88ce4ae904460952a8c Mon Sep 17 00:00:00 2001 From: William Cohen Date: Thu, 21 Jan 2016 22:56:26 -0500 Subject: [PATCH 720/797] Eliminate the .eh_frame sections from the aarch64 vmlinux and kernel modules By default the aarch64 gcc generates .eh_frame sections. Unlike .debug_frame sections, the .eh_frame sections are loaded into memory when the associated code is loaded. On an example kernel being built with this default the .eh_frame section in vmlinux used an extra 1.7MB of memory. The x86 disables the creation of the .eh_frame section. The aarch64 should probably do the same to save some memory. Signed-off-by: William Cohen Signed-off-by: Will Deacon (cherry picked from commit 728dabd6d1751cf5e0f8e0535891393da62396e9) Signed-off-by: Alex Shi Conflicts: pick 67dfa1751 arm64: errata: Add -mpc-relative-literal-loads in arch/arm64/Makefile --- arch/arm64/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b6c90e5006e4..548a2939d7e6 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -28,6 +28,7 @@ endif KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_AFLAGS += $(lseinstr) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) From 7c584b74f039645457bb762f5171e2de515720e4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:44:55 +0000 Subject: [PATCH 721/797] asm-generic: Fix local variable shadow in __set_fixmap_offset Currently __set_fixmap_offset is a macro function which has a local variable called 'addr'. If a caller passes a 'phys' parameter which is derived from a variable also called 'addr', the local variable will shadow this, and the compiler will complain about the use of an uninitialized variable. To avoid the issue with namespace clashes, 'addr' is prefixed with a liberal sprinkling of underscores. Turning __set_fixmap_offset into a static inline breaks the build for several architectures. Fixing this properly requires updates to a number of architectures to make them agree on the prototype of __set_fixmap (it could be done as a subsequent patch series). Signed-off-by: Mark Rutland Cc: Arnd Bergmann [catalin.marinas@arm.com: squashed the original function patch and macro fixup] Signed-off-by: Catalin Marinas (cherry picked from commit 3694bd76781b76c4f8d2ecd85018feeb1609f0e5) Signed-off-by: Alex Shi --- include/asm-generic/fixmap.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index 1cbb8338edf3..827e4d3bbc7a 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -70,12 +70,12 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) #endif /* Return a pointer with offset calculated */ -#define __set_fixmap_offset(idx, phys, flags) \ -({ \ - unsigned long addr; \ - __set_fixmap(idx, phys, flags); \ - addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \ - addr; \ +#define __set_fixmap_offset(idx, phys, flags) \ +({ \ + unsigned long ________addr; \ + __set_fixmap(idx, phys, flags); \ + ________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \ + ________addr; \ }) #define set_fixmap_offset(idx, phys) \ From c8403d828ed9741a4a9d820c829d71211685b659 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:44:56 +0000 Subject: [PATCH 722/797] arm64: mm: specialise pagetable allocators We pass a size parameter to early_alloc and late_alloc, but these are only ever used to allocate single pages. In late_alloc we always allocate a single page. Both allocators provide us with zeroed pages (such that all entries are invalid), but we have no barriers between allocating a page and adding that page to existing (live) tables. A concurrent page table walk may see stale data, leading to a number of issues. This patch specialises the two allocators for page tables. The size parameter is removed and the necessary dsb(ishst) is folded into each. To make it clear that the functions are intended for use for page table allocation, they are renamed to {early,late}_pgtable_alloc, with the related function pointed renamed to pgtable_alloc. As the dsb(ishst) is now in the allocator, the existing barrier for the zero page is redundant and thus is removed. The previously missing include of barrier.h is added. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 21ab99c289d350f4ae454bc069870009db6df20e) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 52 +++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c5bd5bca8e3d..3ed128c96618 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -62,15 +63,18 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static void __init *early_alloc(unsigned long sz) +static void __init *early_pgtable_alloc(void) { phys_addr_t phys; void *ptr; - phys = memblock_alloc(sz, sz); + phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); BUG_ON(!phys); ptr = __va(phys); - memset(ptr, 0, sz); + memset(ptr, 0, PAGE_SIZE); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); return ptr; } @@ -95,12 +99,12 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { pte_t *pte; if (pmd_none(*pmd) || pmd_sect(*pmd)) { - pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + pte = pgtable_alloc(); if (pmd_sect(*pmd)) split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); @@ -130,7 +134,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { pmd_t *pmd; unsigned long next; @@ -139,7 +143,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_sect(*pud)) { - pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + pmd = pgtable_alloc(); if (pud_sect(*pud)) { /* * need to have the 1G of mappings continue to be @@ -174,7 +178,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, } } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, alloc); + prot, pgtable_alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); @@ -195,13 +199,13 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); + pud = pgtable_alloc(); pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -234,7 +238,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, } } } else { - alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); + alloc_init_pmd(mm, pud, addr, next, phys, prot, + pgtable_alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -247,7 +252,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { unsigned long addr, length, end, next; @@ -265,18 +270,18 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); + alloc_init_pud(mm, pgd, addr, next, phys, prot, pgtable_alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void *late_alloc(unsigned long size) +static void *late_pgtable_alloc(void) { - void *ptr; - - BUG_ON(size > PAGE_SIZE); - ptr = (void *)__get_free_page(PGALLOC_GFP); + void *ptr = (void *)__get_free_page(PGALLOC_GFP); BUG_ON(!ptr); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); return ptr; } @@ -289,7 +294,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, return; } __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, - size, prot, early_alloc); + size, prot, early_pgtable_alloc); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, @@ -297,7 +302,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, pgprot_t prot) { __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, - late_alloc); + late_pgtable_alloc); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -310,7 +315,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, } return __create_mapping(&init_mm, pgd_offset_k(virt), - phys, virt, size, prot, late_alloc); + phys, virt, size, prot, late_pgtable_alloc); } #ifdef CONFIG_DEBUG_RODATA @@ -458,15 +463,12 @@ void __init paging_init(void) fixup_executable(); /* allocate the zero page. */ - zero_page = early_alloc(PAGE_SIZE); + zero_page = early_pgtable_alloc(); bootmem_init(); empty_zero_page = virt_to_page(zero_page); - /* Ensure the zero page is visible to the page table walker */ - dsb(ishst); - /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. From a4593c91bbb59e89df1aefe677493ff364a7ddb2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:44:57 +0000 Subject: [PATCH 723/797] arm64: mm: place empty_zero_page in bss Currently the zero page is set up in paging_init, and thus we cannot use the zero page earlier. We use the zero page as a reserved TTBR value from which no TLB entries may be allocated (e.g. when uninstalling the idmap). To enable such usage earlier (as may be required for invasive changes to the kernel page tables), and to minimise the time that the idmap is active, we need to be able to use the zero page before paging_init. This patch follows the example set by x86, by allocating the zero page at compile time, in .bss. This means that the zero page itself is available immediately upon entry to start_kernel (as we zero .bss before this), and also means that the zero page takes up no space in the raw Image binary. The associated struct page is allocated in bootmem_init, and remains unavailable until this time. Outside of arch code, the only users of empty_zero_page assume that the empty_zero_page symbol refers to the zeroed memory itself, and that ZERO_PAGE(x) must be used to acquire the associated struct page, following the example of x86. This patch also brings arm64 inline with these assumptions. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 5227cfa71f9e8574373f4d0e9e754942d76cdf67) Signed-off-by: Alex Shi --- arch/arm64/include/asm/mmu_context.h | 2 +- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/arm64/kernel/head.S | 1 + arch/arm64/mm/mmu.c | 9 +-------- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 24165784b803..600eacb9f7d5 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -48,7 +48,7 @@ static inline void contextidr_thread_switch(struct task_struct *next) */ static inline void cpu_set_reserved_ttbr0(void) { - unsigned long ttbr = page_to_phys(empty_zero_page); + unsigned long ttbr = virt_to_phys(empty_zero_page); asm( " msr ttbr0_el1, %0 // set TTBR0\n" diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 2daf88970731..8a76e603d737 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -123,8 +123,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val); * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 917d98108b3f..53b9f9f128c2 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -421,6 +421,7 @@ __mmap_switched: adr_l x2, __bss_stop sub x2, x2, x0 bl __pi_memset + dsb ishst // Make zero page visible to PTW adr_l sp, initial_sp, x4 mov x4, sp diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3ed128c96618..e4932aa6c6e9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -49,7 +49,7 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); * Empty_zero_page is a special page that is used for zero-initialized data * and COW. */ -struct page *empty_zero_page; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, @@ -457,18 +457,11 @@ void fixup_init(void) */ void __init paging_init(void) { - void *zero_page; - map_mem(); fixup_executable(); - /* allocate the zero page. */ - zero_page = early_pgtable_alloc(); - bootmem_init(); - empty_zero_page = virt_to_page(zero_page); - /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. From 0dedc3948e315aff0f382a58c0d387547d8a35b5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:44:58 +0000 Subject: [PATCH 724/797] arm64: unify idmap removal We currently open-code the removal of the idmap and restoration of the current task's MMU state in a few places. Before introducing yet more copies of this sequence, unify these to call a new helper, cpu_uninstall_idmap. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Lorenzo Pieralisi Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 9e8e865bbe294a69666a1996bda3e87825b258c0) Signed-off-by: Alex Shi --- arch/arm64/include/asm/mmu_context.h | 25 +++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 1 + arch/arm64/kernel/smp.c | 4 +--- arch/arm64/kernel/suspend.c | 20 ++++---------------- arch/arm64/mm/mmu.c | 4 +--- 5 files changed, 32 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 600eacb9f7d5..b1b2514d8883 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) @@ -89,6 +90,30 @@ static inline void cpu_set_default_tcr_t0sz(void) : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); } +/* + * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm. + * + * The idmap lives in the same VA range as userspace, but uses global entries + * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from + * speculative TLB fetches, we must temporarily install the reserved page + * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ. + * + * If current is a not a user task, the mm covers the TTBR1_EL1 page tables, + * which should not be installed in TTBR0_EL1. In this case we can leave the + * reserved page tables in place. + */ +static inline void cpu_uninstall_idmap(void) +{ + struct mm_struct *mm = current->active_mm; + + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_default_tcr_t0sz(); + + if (mm != &init_mm) + cpu_switch_mm(mm->pgd, mm); +} + /* * It would be nice to return ASIDs back to the allocator, but unfortunately * that introduces a race with a generation rollover where we could erroneously diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 8119479147db..f6621ba071f9 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -62,6 +62,7 @@ #include #include #include +#include phys_addr_t __fdt_pointer __initdata; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b1adc51b2c2e..68e7f79630d4 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -149,9 +149,7 @@ asmlinkage void secondary_start_kernel(void) * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + cpu_uninstall_idmap(); preempt_disable(); trace_hardirqs_off(); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 1095aa483a1c..66055392f445 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -60,7 +60,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - struct mm_struct *mm = current->active_mm; int ret; unsigned long flags; @@ -87,22 +86,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) ret = __cpu_suspend_enter(arg, fn); if (ret == 0) { /* - * We are resuming from reset with TTBR0_EL1 set to the - * idmap to enable the MMU; set the TTBR0 to the reserved - * page tables to prevent speculative TLB allocations, flush - * the local tlb and set the default tcr_el1.t0sz so that - * the TTBR0 address space set-up is properly restored. - * If the current active_mm != &init_mm we entered cpu_suspend - * with mappings in TTBR0 that must be restored, so we switch - * them back to complete the address space configuration - * restoration before returning. + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); - - if (mm != &init_mm) - cpu_switch_mm(mm->pgd, mm); + cpu_uninstall_idmap(); /* * Restore per-cpu offset before any kernel diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e4932aa6c6e9..dcc06b23b37f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -466,9 +466,7 @@ void __init paging_init(void) * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + cpu_uninstall_idmap(); } /* From 7ed029beef4adcab0ad59356579f6fea71655895 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:44:59 +0000 Subject: [PATCH 725/797] arm64: unmap idmap earlier During boot we leave the idmap in place until paging_init, as we previously had to wait for the zero page to become allocated and accessible. Now that we have a statically-allocated zero page, we can uninstall the idmap much earlier in the boot process, making it far easier to spot accidental use of physical addresses. This also brings the cold boot path in line with the secondary boot path. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 86ccce896cb0aa800a7a6dcd29b41ffc4eeb1a75) Signed-off-by: Alex Shi --- arch/arm64/kernel/setup.c | 6 ++++++ arch/arm64/mm/mmu.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index f6621ba071f9..cfed56f0ad26 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -314,6 +314,12 @@ void __init setup_arch(char **cmdline_p) */ local_async_enable(); + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. + */ + cpu_uninstall_idmap(); + efi_init(); arm64_memblock_init(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index dcc06b23b37f..8587ed9d81b6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -461,12 +461,6 @@ void __init paging_init(void) fixup_executable(); bootmem_init(); - - /* - * TTBR0 is only used for the identity mapping at this stage. Make it - * point to zero page to avoid speculatively fetching new entries. - */ - cpu_uninstall_idmap(); } /* From 34903bb8c69405ec6eb2b2d437fabd0571df94bb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:00 +0000 Subject: [PATCH 726/797] arm64: add function to install the idmap In some cases (e.g. when making invasive changes to the kernel page tables) we will need to execute code from the idmap. Add a new helper which may be used to install the idmap, complementing the existing cpu_uninstall_idmap. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 609116d202a8c5fd3fe393eb85373cbee906df68) Signed-off-by: Alex Shi --- arch/arm64/include/asm/mmu_context.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index b1b2514d8883..944f2730a940 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -74,7 +74,7 @@ static inline bool __cpu_uses_extended_idmap(void) /* * Set TCR.T0SZ to its default value (based on VA_BITS) */ -static inline void cpu_set_default_tcr_t0sz(void) +static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { unsigned long tcr; @@ -87,9 +87,12 @@ static inline void cpu_set_default_tcr_t0sz(void) " msr tcr_el1, %0 ;" " isb" : "=&r" (tcr) - : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); + : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); } +#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)) +#define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz) + /* * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm. * @@ -114,6 +117,15 @@ static inline void cpu_uninstall_idmap(void) cpu_switch_mm(mm->pgd, mm); } +static inline void cpu_install_idmap(void) +{ + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_idmap_tcr_t0sz(); + + cpu_switch_mm(idmap_pg_dir, &init_mm); +} + /* * It would be nice to return ASIDs back to the allocator, but unfortunately * that introduces a race with a generation rollover where we could erroneously From 34fc059805c7dfa19d0d9d1e008fe83f7744c0ed Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:01 +0000 Subject: [PATCH 727/797] arm64: mm: add code to safely replace TTBR1_EL1 If page tables are modified without suitable TLB maintenance, the ARM architecture permits multiple TLB entries to be allocated for the same VA. When this occurs, it is permitted that TLB conflict aborts are raised in response to synchronous data/instruction accesses, and/or and amalgamation of the TLB entries may be used as a result of a TLB lookup. The presence of conflicting TLB entries may result in a variety of behaviours detrimental to the system (e.g. erroneous physical addresses may be used by I-cache fetches and/or page table walks). Some of these cases may result in unexpected changes of hardware state, and/or result in the (asynchronous) delivery of SError. To avoid these issues, we must avoid situations where conflicting entries may be allocated into TLBs. For user and module mappings we can follow a strict break-before-make approach, but this cannot work for modifications to the swapper page tables that cover the kernel text and data. Instead, this patch adds code which is intended to be executed from the idmap, which can safely unmap the swapper page tables as it only requires the idmap to be active. This enables us to uninstall the active TTBR1_EL1 entry, invalidate TLBs, then install a new TTBR1_EL1 entry without potentially unmapping code or data required for the sequence. This avoids the risk of conflict, but requires that updates are staged in a copy of the swapper page tables prior to being installed. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 50e1881ddde2a986c7d0d2150985239e5e3d7d96) Signed-off-by: Alex Shi --- arch/arm64/include/asm/mmu_context.h | 19 +++++++++++++++++++ arch/arm64/mm/proc.S | 28 ++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 944f2730a940..a00f7cf35bbd 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -126,6 +126,25 @@ static inline void cpu_install_idmap(void) cpu_switch_mm(idmap_pg_dir, &init_mm); } +/* + * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, + * avoiding the possibility of conflicting TLB entries being allocated. + */ +static inline void cpu_replace_ttbr1(pgd_t *pgd) +{ + typedef void (ttbr_replace_func)(phys_addr_t); + extern ttbr_replace_func idmap_cpu_replace_ttbr1; + ttbr_replace_func *replace_phys; + + phys_addr_t pgd_phys = virt_to_phys(pgd); + + replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1); + + cpu_install_idmap(); + replace_phys(pgd_phys); + cpu_uninstall_idmap(); +} + /* * It would be nice to return ASIDs back to the allocator, but unfortunately * that introduces a race with a generation rollover where we could erroneously diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c164d2cb35c0..0c19534a901e 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -140,6 +140,34 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) + .pushsection ".idmap.text", "ax" +/* + * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * + * This is the low-level counterpart to cpu_replace_ttbr1, and should not be + * called by anything else. It can only be executed from a TTBR0 mapping. + */ +ENTRY(idmap_cpu_replace_ttbr1) + mrs x2, daif + msr daifset, #0xf + + adrp x1, empty_zero_page + msr ttbr1_el1, x1 + isb + + tlbi vmalle1 + dsb nsh + isb + + msr ttbr1_el1, x0 + isb + + msr daif, x2 + + ret +ENDPROC(idmap_cpu_replace_ttbr1) + .popsection + /* * __cpu_setup * From 8b8513de45f823e10fce3b1563953be70483941b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:02 +0000 Subject: [PATCH 728/797] arm64: kasan: avoid TLB conflicts The page table modification performed during the KASAN init risks the allocation of conflicting TLB entries, as it swaps a set of valid global entries for another without suitable TLB maintenance. The presence of conflicting TLB entries can result in the delivery of synchronous TLB conflict aborts, or may result in the use of erroneous data being returned in response to a TLB lookup. This can affect explicit data accesses from software as well as translations performed asynchronously (e.g. as part of page table walks or speculative I-cache fetches), and can therefore result in a wide variety of problems. To avoid this, use cpu_replace_ttbr1 to swap the page tables. This ensures that when the new tables are installed there are no stale entries from the old tables which may conflict. As all updates are made to the tables while they are not active, the updates themselves are safe. At the same time, add the missing barrier to ensure that the tmp_pg_dir entries updated via memcpy are visible to the page table walkers at the point the tmp_pg_dir is installed. All other page table updates made as part of KASAN initialisation have the requisite barriers due to the use of the standard page table accessors. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Cc: Andrey Ryabinin Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit c1a88e9124a499939ebd8069d5e4d3937f019157) Signed-off-by: Alex Shi --- arch/arm64/mm/kasan_init.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index cab7a5be40aa..263b59020500 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -108,15 +109,6 @@ static void __init clear_pgds(unsigned long start, set_pgd(pgd_offset_k(start), __pgd(0)); } -static void __init cpu_set_ttbr1(unsigned long ttbr1) -{ - asm( - " msr ttbr1_el1, %0\n" - " isb" - : - : "r" (ttbr1)); -} - void __init kasan_init(void) { struct memblock_region *reg; @@ -130,8 +122,8 @@ void __init kasan_init(void) * setup will be finished. */ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); - cpu_set_ttbr1(__pa(tmp_pg_dir)); - flush_tlb_all(); + dsb(ishst); + cpu_replace_ttbr1(tmp_pg_dir); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -165,8 +157,7 @@ void __init kasan_init(void) pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO)); memset(kasan_zero_page, 0, PAGE_SIZE); - cpu_set_ttbr1(__pa(swapper_pg_dir)); - flush_tlb_all(); + cpu_replace_ttbr1(swapper_pg_dir); /* At this point kasan is fully initialized. Enable error messages */ init_task.kasan_depth = 0; From febef18a360df3f8b4b364167e8fe050b24ccf65 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:03 +0000 Subject: [PATCH 729/797] arm64: mm: move pte_* macros For pmd, pud, and pgd levels of table, functions including p?d_index and p?d_offset are defined after the p?d_page_vaddr function for the immediately higher level of table. The pte functions however are defined much earlier, even though several rely on the later definition of pmd_page_vaddr. While this isn't currently a problem as these are macros, it prevents the logical grouping of later C functions (which cannot rely on prototypes for functions not yet defined). Move these definitions after pmd_page_vaddr, for consistency with the placement of these functions for other levels of table. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 053520f7d3923cc6d37afb28f9887cb1e7d77454) Signed-off-by: Alex Shi --- arch/arm64/include/asm/pgtable.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 8a76e603d737..d439523a7910 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -136,16 +136,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -/* Find an entry in the third-level page table. */ -#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) - -#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) - -#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while (0) -#define pte_unmap_nested(pte) do { } while (0) - /* * The following only work if pte_present(). Undefined behaviour otherwise. */ @@ -447,6 +437,16 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK); } +/* Find an entry in the third-level page table. */ +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + +#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) + +#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) /* From 77ea11473be30e0b90b32deb650b6403ad291a12 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:04 +0000 Subject: [PATCH 730/797] arm64: mm: add functions to walk page tables by PA To allow us to walk tables allocated into the fixmap, we need to acquire the physical address of a page, rather than the virtual address in the linear map. This patch adds new p??_page_paddr and p??_offset_phys functions to acquire the physical address of a next-level table, and changes p??_offset* into macros which simply convert this to a linear map VA. This renders p??_page_vaddr unused, and hence they are removed. At the pgd level, a new pgd_offset_raw function is added to find the relevant PGD entry given the base of a PGD and a virtual address. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit dca56dca7124709f3dfca81afe61b4d98eb9cacf) Signed-off-by: Alex Shi --- arch/arm64/include/asm/pgtable.h | 39 +++++++++++++++++++------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d439523a7910..db608e7984c6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -432,15 +432,16 @@ static inline void pmd_clear(pmd_t *pmdp) set_pmd(pmdp, __pmd(0)); } -static inline pte_t *pmd_page_vaddr(pmd_t pmd) +static inline phys_addr_t pmd_page_paddr(pmd_t pmd) { - return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK); + return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK; } /* Find an entry in the third-level page table. */ #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) +#define pte_offset_phys(dir,addr) (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t)) +#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr)))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) @@ -475,21 +476,23 @@ static inline void pud_clear(pud_t *pudp) set_pud(pudp, __pud(0)); } -static inline pmd_t *pud_page_vaddr(pud_t pud) +static inline phys_addr_t pud_page_paddr(pud_t pud) { - return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); + return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK; } /* Find an entry in the second-level page table. */ #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); -} +#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t)) +#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr)))) #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) +#else + +#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) + #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 @@ -511,21 +514,23 @@ static inline void pgd_clear(pgd_t *pgdp) set_pgd(pgdp, __pgd(0)); } -static inline pud_t *pgd_page_vaddr(pgd_t pgd) +static inline phys_addr_t pgd_page_paddr(pgd_t pgd) { - return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); + return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK; } /* Find an entry in the frst-level page table. */ #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) -{ - return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); -} +#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t)) +#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr)))) #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) +#else + +#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) + #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) @@ -533,7 +538,9 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) -#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr)) +#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr)) + +#define pgd_offset(mm, addr) (pgd_offset_raw((mm)->pgd, (addr))) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) From 6f74a379d1593fe728141747c6c594758cc5d328 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:05 +0000 Subject: [PATCH 731/797] arm64: mm: avoid redundant __pa(__va(x)) When we "upgrade" to a section mapping, we free any table we made redundant by giving it back to memblock. To get the PA, we acquire the physical address and convert this to a VA, then subsequently convert this back to a PA. This works currently, but will not work if the tables are not accessed via linear map VAs (e.g. is we use fixmap slots). This patch uses {pmd,pud}_page_paddr to acquire the PA. This avoids the __pa(__va()) round trip, saving some work and avoiding reliance on the linear mapping. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 316b39db06718d59d82736df9fc65cf05b467cc7) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8587ed9d81b6..0b6c8727bcd1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -171,7 +171,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, if (!pmd_none(old_pmd)) { flush_tlb_all(); if (pmd_table(old_pmd)) { - phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); + phys_addr_t table = pmd_page_paddr(old_pmd); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } @@ -232,7 +232,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, if (!pud_none(old_pud)) { flush_tlb_all(); if (pud_table(old_pud)) { - phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); + phys_addr_t table = pud_page_paddr(old_pud); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } From 0061f781a0fe83adfe76f78f89653cbd41249a93 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:06 +0000 Subject: [PATCH 732/797] arm64: mm: add __{pud,pgd}_populate We currently have __pmd_populate for creating a pmd table entry given the physical address of a pte, but don't have equivalents for the pud or pgd levels of table. To enable us to manipulate tables which are mapped outside of the linear mapping (where we have a PA, but not a linear map VA), it is useful to have these functions. This patch adds __{pud,pgd}_populate. As these should not be called when the kernel uses folded {pmd,pud}s, in these cases they expand to BUILD_BUG(). So long as the appropriate checks are made on the {pud,pgd} entry prior to attempting population, these should be optimized out at compile time. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 1e531cce68c92b46c7d29f36a72f9a3e5886678f) Signed-off-by: Alex Shi --- arch/arm64/include/asm/pgalloc.h | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index c15053902942..ff98585d085a 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -42,11 +42,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) free_page((unsigned long)pmd); } -static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) { - set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); + set_pud(pud, __pud(pmd | prot)); } +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE); +} +#else +static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) +{ + BUILD_BUG(); +} #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 @@ -62,11 +71,20 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) free_page((unsigned long)pud); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) { - set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); + set_pgd(pgdp, __pgd(pud | prot)); } +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE); +} +#else +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) +{ + BUILD_BUG(); +} #endif /* CONFIG_PGTABLE_LEVELS > 3 */ extern pgd_t *pgd_alloc(struct mm_struct *mm); From 9568281b80e91974c04f6aaac50ecfd4dcf31df1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:07 +0000 Subject: [PATCH 733/797] arm64: mm: add functions to walk tables in fixmap As a preparatory step to allow us to allocate early page tables from unmapped memory using memblock_alloc, add new p??_{set,clear}_fixmap* functions which can be used to walk page tables outside of the linear mapping by using fixmap slots. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 961faac114819a01e627fe9c9c82b830bb3849d4) Signed-off-by: Alex Shi --- arch/arm64/include/asm/fixmap.h | 10 ++++++++++ arch/arm64/include/asm/pgtable.h | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 309704544d22..1a617d46fce9 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -62,6 +62,16 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses, FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + + /* + * Used for kernel page table creation, so unmapped memory may be used + * for tables. + */ + FIX_PTE, + FIX_PMD, + FIX_PUD, + FIX_PGD, + __end_of_fixed_addresses }; diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index db608e7984c6..c99dfc588deb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -59,6 +59,7 @@ #ifndef __ASSEMBLY__ +#include #include extern void __pte_error(const char *file, int line, unsigned long val); @@ -448,6 +449,10 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) +#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) +#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) +#define pte_clear_fixmap() clear_fixmap(FIX_PTE) + #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) /* @@ -487,12 +492,21 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) #define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t)) #define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr)))) +#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) +#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) +#define pmd_clear_fixmap() clear_fixmap(FIX_PMD) + #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) #else #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) +/* Match pmd_offset folding in */ +#define pmd_set_fixmap(addr) NULL +#define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp) +#define pmd_clear_fixmap() + #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 @@ -525,12 +539,21 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) #define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t)) #define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr)))) +#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) +#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr)) +#define pud_clear_fixmap() clear_fixmap(FIX_PUD) + #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) #else #define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) +/* Match pud_offset folding in */ +#define pud_set_fixmap(addr) NULL +#define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp) +#define pud_clear_fixmap() + #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) @@ -545,6 +568,9 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) +#define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) +#define pgd_clear_fixmap() clear_fixmap(FIX_PGD) + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | From c790554bd459b56042e784162b5017c41120538e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:08 +0000 Subject: [PATCH 734/797] arm64: mm: use fixmap when creating page tables As a preparatory step to allow us to allocate early page tables from unmapped memory using memblock_alloc, modify the __create_mapping callees to map and unmap the tables they modify using fixmap entries. All but the top-level pgd initialisation is performed via the fixmap. Subsequent patches will inject the pgd physical address, and migrate to using the FIX_PGD slot. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit f4710445458c0a1bd1c3c014ada2e7d7dc7b882f) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 61 ++++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0b6c8727bcd1..4f5a5fa3f8f4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -63,19 +63,30 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static void __init *early_pgtable_alloc(void) +static phys_addr_t __init early_pgtable_alloc(void) { phys_addr_t phys; void *ptr; phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); BUG_ON(!phys); - ptr = __va(phys); + + /* + * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE + * slot will be free, so we can (ab)use the FIX_PTE slot to initialise + * any level of table. + */ + ptr = pte_set_fixmap(phys); + memset(ptr, 0, PAGE_SIZE); - /* Ensure the zeroed page is visible to the page table walker */ - dsb(ishst); - return ptr; + /* + * Implicit barriers also ensure the zeroed page is visible to the page + * table walker + */ + pte_clear_fixmap(); + + return phys; } /* @@ -99,24 +110,28 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, - void *(*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void)) { pte_t *pte; if (pmd_none(*pmd) || pmd_sect(*pmd)) { - pte = pgtable_alloc(); + phys_addr_t pte_phys = pgtable_alloc(); + pte = pte_set_fixmap(pte_phys); if (pmd_sect(*pmd)) split_pmd(pmd, pte); - __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); flush_tlb_all(); + pte_clear_fixmap(); } BUG_ON(pmd_bad(*pmd)); - pte = pte_offset_kernel(pmd, addr); + pte = pte_set_fixmap_offset(pmd, addr); do { set_pte(pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); + + pte_clear_fixmap(); } static void split_pud(pud_t *old_pud, pmd_t *pmd) @@ -134,7 +149,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void)) { pmd_t *pmd; unsigned long next; @@ -143,7 +158,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_sect(*pud)) { - pmd = pgtable_alloc(); + phys_addr_t pmd_phys = pgtable_alloc(); + pmd = pmd_set_fixmap(pmd_phys); if (pud_sect(*pud)) { /* * need to have the 1G of mappings continue to be @@ -151,12 +167,13 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, */ split_pud(pud, pmd); } - pud_populate(mm, pud, pmd); + __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); flush_tlb_all(); + pmd_clear_fixmap(); } BUG_ON(pud_bad(*pud)); - pmd = pmd_offset(pud, addr); + pmd = pmd_set_fixmap_offset(pud, addr); do { next = pmd_addr_end(addr, end); /* try section mapping first */ @@ -182,6 +199,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, } phys += next - addr; } while (pmd++, addr = next, addr != end); + + pmd_clear_fixmap(); } static inline bool use_1G_block(unsigned long addr, unsigned long next, @@ -199,18 +218,18 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = pgtable_alloc(); - pgd_populate(mm, pgd, pud); + phys_addr_t pud_phys = pgtable_alloc(); + __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE); } BUG_ON(pgd_bad(*pgd)); - pud = pud_offset(pgd, addr); + pud = pud_set_fixmap_offset(pgd, addr); do { next = pud_addr_end(addr, end); @@ -243,6 +262,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, } phys += next - addr; } while (pud++, addr = next, addr != end); + + pud_clear_fixmap(); } /* @@ -252,7 +273,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - void *(*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void)) { unsigned long addr, length, end, next; @@ -275,14 +296,14 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, } while (pgd++, addr = next, addr != end); } -static void *late_pgtable_alloc(void) +static phys_addr_t late_pgtable_alloc(void) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); BUG_ON(!ptr); /* Ensure the zeroed page is visible to the page table walker */ dsb(ishst); - return ptr; + return __pa(ptr); } static void __init create_mapping(phys_addr_t phys, unsigned long virt, From 8f64994ff3068b62b867188a12e366ee237da3b5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:09 +0000 Subject: [PATCH 735/797] arm64: mm: allocate pagetables anywhere Now that create_mapping uses fixmap slots to modify pte, pmd, and pud entries, we can access page tables anywhere in physical memory, regardless of the extent of the linear mapping. Given that, we no longer need to limit memblock allocations during page table creation, and can leave the limit as its default MEMBLOCK_ALLOC_ANYWHERE. We never add memory which will fall outside of the linear map range given phys_offset and MAX_MEMBLOCK_ADDR are configured appropriately, so any tables we create will fall in the linear map of the final tables. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit cdef5f6e9e0e5ee397759b664a9f875ff59ccf01) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4f5a5fa3f8f4..d50535a07c6e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -384,20 +384,6 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end) static void __init map_mem(void) { struct memblock_region *reg; - phys_addr_t limit; - - /* - * Temporarily limit the memblock range. We need to do this as - * create_mapping requires puds, pmds and ptes to be allocated from - * memory addressable from the initial direct kernel mapping. - * - * The initial direct kernel mapping, located at swapper_pg_dir, gives - * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps, - * memory starting from PHYS_OFFSET (which must be aligned to 2MB as - * per Documentation/arm64/booting.txt). - */ - limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE; - memblock_set_current_limit(limit); /* map all the memory banks */ for_each_memblock(memory, reg) { @@ -407,29 +393,8 @@ static void __init map_mem(void) if (start >= end) break; - if (ARM64_SWAPPER_USES_SECTION_MAPS) { - /* - * For the first memory bank align the start address and - * current memblock limit to prevent create_mapping() from - * allocating pte page tables from unmapped memory. With - * the section maps, if the first block doesn't end on section - * size boundary, create_mapping() will try to allocate a pte - * page, which may be returned from an unmapped area. - * When section maps are not used, the pte page table for the - * current limit is already present in swapper_pg_dir. - */ - if (start < limit) - start = ALIGN(start, SECTION_SIZE); - if (end < limit) { - limit = end & SECTION_MASK; - memblock_set_current_limit(limit); - } - } __map_memblock(start, end); } - - /* Limit no longer required. */ - memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } static void __init fixup_executable(void) From 55ce0af58717f7651d5a393f687f2ea9a109e4f5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:10 +0000 Subject: [PATCH 736/797] arm64: mm: allow passing a pgdir to alloc_init_* To allow us to initialise pgdirs which are fixmapped, allow explicitly passing a pgdir rather than an mm. A new __create_pgd_mapping function is added for this, with existing __create_mapping callers migrated to this. The mm argument was previously only used at the top level. Now that it is redundant at all levels, it is removed. To indicate its new found similarity to alloc_init_{pud,pmd,pte}, __create_mapping is renamed to init_pgd. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 11509a306bb6ea595878b2d246d2d56b1783e040) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d50535a07c6e..570ba3e3d362 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -146,8 +146,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) } while (pmd++, i++, i < PTRS_PER_PMD); } -static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end, +static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void)) { @@ -215,8 +214,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, return true; } -static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end, +static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void)) { @@ -257,7 +255,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, } } } else { - alloc_init_pmd(mm, pud, addr, next, phys, prot, + alloc_init_pmd(pud, addr, next, phys, prot, pgtable_alloc); } phys += next - addr; @@ -270,8 +268,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, - phys_addr_t phys, unsigned long virt, +static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void)) { @@ -291,7 +288,7 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, prot, pgtable_alloc); + alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -306,6 +303,14 @@ static phys_addr_t late_pgtable_alloc(void) return __pa(ptr); } +static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*alloc)(void)) +{ + init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc); +} + static void __init create_mapping(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { @@ -314,16 +319,16 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, - size, prot, early_pgtable_alloc); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, + early_pgtable_alloc); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, - late_pgtable_alloc); + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, + late_pgtable_alloc); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -335,8 +340,8 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, return; } - return __create_mapping(&init_mm, pgd_offset_k(virt), - phys, virt, size, prot, late_pgtable_alloc); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, + late_pgtable_alloc); } #ifdef CONFIG_DEBUG_RODATA From 0060e7a78b1a3b208d178b285fb3912f4fd4d9ee Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:11 +0000 Subject: [PATCH 737/797] arm64: ensure _stext and _etext are page-aligned Currently we have separate ALIGN_DEBUG_RO{,_MIN} directives to align _etext and __init_begin. While we ensure that __init_begin is page-aligned, we do not provide the same guarantee for _etext. This is not problematic currently as the alignment of __init_begin is sufficient to prevent issues when we modify permissions. Subsequent patches will assume page alignment of segments of the kernel we wish to map with different permissions. To ensure this, move _etext after the ALIGN_DEBUG_RO_MIN for the init section. This renders the prior ALIGN_DEBUG_RO irrelevant, and hence it is removed. Likewise, upgrade to ALIGN_DEBUG_RO_MIN(PAGE_SIZE) for _stext. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit fca082bfb543ccaaff864fc0892379ccaa1711cd) Signed-off-by: Alex Shi --- arch/arm64/kernel/vmlinux.lds.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index e3928f578891..b78a3c772294 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -95,7 +95,7 @@ SECTIONS _text = .; HEAD_TEXT } - ALIGN_DEBUG_RO + ALIGN_DEBUG_RO_MIN(PAGE_SIZE) .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -116,10 +116,9 @@ SECTIONS RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) NOTES - ALIGN_DEBUG_RO - _etext = .; /* End of text and rodata section */ ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + _etext = .; /* End of text and rodata section */ __init_begin = .; INIT_TEXT_SECTION(8) From a8a81d65140e0bdfe27e53ae3529546b5dce6bd4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:12 +0000 Subject: [PATCH 738/797] arm64: mm: create new fine-grained mappings at boot At boot we may change the granularity of the tables mapping the kernel (by splitting or making sections). This may happen when we create the linear mapping (in __map_memblock), or at any point we try to apply fine-grained permissions to the kernel (e.g. fixup_executable, mark_rodata_ro, fixup_init). Changing the active page tables in this manner may result in multiple entries for the same address being allocated into TLBs, risking problems such as TLB conflict aborts or issues derived from the amalgamation of TLB entries. Generally, a break-before-make (BBM) approach is necessary to avoid conflicts, but we cannot do this for the kernel tables as it risks unmapping text or data being used to do so. Instead, we can create a new set of tables from scratch in the safety of the existing mappings, and subsequently migrate over to these using the new cpu_replace_ttbr1 helper, which avoids the two sets of tables being active simultaneously. To avoid issues when we later modify permissions of the page tables (e.g. in fixup_init), we must create the page tables at a granularity such that later modification does not result in splitting of tables. This patch applies this strategy, creating a new set of fine-grained page tables from scratch, and safely migrating to them. The existing fixmap and kasan shadow page tables are reused in the new fine-grained tables. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Cc: Andrey Ryabinin Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 068a17a5805dfbca4bbf03e664ca6b19709cc7a8) Signed-off-by: Alex Shi --- arch/arm64/include/asm/kasan.h | 3 + arch/arm64/mm/kasan_init.c | 15 ++++ arch/arm64/mm/mmu.c | 155 +++++++++++++++++++-------------- 3 files changed, 110 insertions(+), 63 deletions(-) diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 2774fa384c47..de0d21211c34 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -7,6 +7,7 @@ #include #include +#include /* * KASAN_SHADOW_START: beginning of the kernel virtual addresses. @@ -28,10 +29,12 @@ #define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) void kasan_init(void); +void kasan_copy_shadow(pgd_t *pgdir); asmlinkage void kasan_early_init(void); #else static inline void kasan_init(void) { } +static inline void kasan_copy_shadow(pgd_t *pgdir) { } #endif #endif diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 263b59020500..cc569a38bc76 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -97,6 +97,21 @@ asmlinkage void __init kasan_early_init(void) kasan_map_early_shadow(); } +/* + * Copy the current shadow region into a new pgdir. + */ +void __init kasan_copy_shadow(pgd_t *pgdir) +{ + pgd_t *pgd, *pgd_new, *pgd_end; + + pgd = pgd_offset_k(KASAN_SHADOW_START); + pgd_end = pgd_offset_k(KASAN_SHADOW_END); + pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START); + do { + set_pgd(pgd_new, *pgd); + } while (pgd++, pgd_new++, pgd != pgd_end); +} + static void __init clear_pgds(unsigned long start, unsigned long end) { diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 570ba3e3d362..4874d2fea1c9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -344,49 +345,42 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, late_pgtable_alloc); } -#ifdef CONFIG_DEBUG_RODATA -static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { - /* - * Set up the executable regions using the existing section mappings - * for now. This will get more fine grained later once all memory - * is mapped - */ - unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE); - unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE); - if (end < kernel_x_start) { - create_mapping(start, __phys_to_virt(start), - end - start, PAGE_KERNEL); - } else if (start >= kernel_x_end) { - create_mapping(start, __phys_to_virt(start), - end - start, PAGE_KERNEL); - } else { - if (start < kernel_x_start) - create_mapping(start, __phys_to_virt(start), - kernel_x_start - start, - PAGE_KERNEL); - create_mapping(kernel_x_start, - __phys_to_virt(kernel_x_start), - kernel_x_end - kernel_x_start, - PAGE_KERNEL_EXEC); - if (kernel_x_end < end) - create_mapping(kernel_x_end, - __phys_to_virt(kernel_x_end), - end - kernel_x_end, - PAGE_KERNEL); + unsigned long kernel_start = __pa(_stext); + unsigned long kernel_end = __pa(_end); + + /* + * The kernel itself is mapped at page granularity. Map all other + * memory, making sure we don't overwrite the existing kernel mappings. + */ + + /* No overlap with the kernel. */ + if (end < kernel_start || start >= kernel_end) { + __create_pgd_mapping(pgd, start, __phys_to_virt(start), + end - start, PAGE_KERNEL, + early_pgtable_alloc); + return; } + /* + * This block overlaps the kernel mapping. Map the portion(s) which + * don't overlap. + */ + if (start < kernel_start) + __create_pgd_mapping(pgd, start, + __phys_to_virt(start), + kernel_start - start, PAGE_KERNEL, + early_pgtable_alloc); + if (kernel_end < end) + __create_pgd_mapping(pgd, kernel_end, + __phys_to_virt(kernel_end), + end - kernel_end, PAGE_KERNEL, + early_pgtable_alloc); } -#else -static void __init __map_memblock(phys_addr_t start, phys_addr_t end) -{ - create_mapping(start, __phys_to_virt(start), end - start, - PAGE_KERNEL_EXEC); -} -#endif -static void __init map_mem(void) +static void __init map_mem(pgd_t *pgd) { struct memblock_region *reg; @@ -398,33 +392,10 @@ static void __init map_mem(void) if (start >= end) break; - __map_memblock(start, end); + __map_memblock(pgd, start, end); } } -static void __init fixup_executable(void) -{ -#ifdef CONFIG_DEBUG_RODATA - /* now that we are actually fully mapped, make the start/end more fine grained */ - if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) { - unsigned long aligned_start = round_down(__pa(_stext), - SWAPPER_BLOCK_SIZE); - - create_mapping(aligned_start, __phys_to_virt(aligned_start), - __pa(_stext) - aligned_start, - PAGE_KERNEL); - } - - if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) { - unsigned long aligned_end = round_up(__pa(__init_end), - SWAPPER_BLOCK_SIZE); - create_mapping(__pa(__init_end), (unsigned long)__init_end, - aligned_end - __pa(__init_end), - PAGE_KERNEL); - } -#endif -} - #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void) { @@ -442,14 +413,72 @@ void fixup_init(void) PAGE_KERNEL); } +static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, + pgprot_t prot) +{ + phys_addr_t pa_start = __pa(va_start); + unsigned long size = va_end - va_start; + + BUG_ON(!PAGE_ALIGNED(pa_start)); + BUG_ON(!PAGE_ALIGNED(size)); + + __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, + early_pgtable_alloc); +} + +/* + * Create fine-grained mappings for the kernel. + */ +static void __init map_kernel(pgd_t *pgd) +{ + + map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC); + map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC); + map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL); + + /* + * The fixmap falls in a separate pgd to the kernel, and doesn't live + * in the carveout for the swapper_pg_dir. We can simply re-use the + * existing dir for the fixmap. + */ + set_pgd(pgd_offset_raw(pgd, FIXADDR_START), *pgd_offset_k(FIXADDR_START)); + + kasan_copy_shadow(pgd); +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. */ void __init paging_init(void) { - map_mem(); - fixup_executable(); + phys_addr_t pgd_phys = early_pgtable_alloc(); + pgd_t *pgd = pgd_set_fixmap(pgd_phys); + + map_kernel(pgd); + map_mem(pgd); + + /* + * We want to reuse the original swapper_pg_dir so we don't have to + * communicate the new address to non-coherent secondaries in + * secondary_entry, and so cpu_switch_mm can generate the address with + * adrp+add rather than a load from some global variable. + * + * To do this we need to go via a temporary pgd. + */ + cpu_replace_ttbr1(__va(pgd_phys)); + memcpy(swapper_pg_dir, pgd, PAGE_SIZE); + cpu_replace_ttbr1(swapper_pg_dir); + + pgd_clear_fixmap(); + memblock_free(pgd_phys, PAGE_SIZE); + + /* + * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd + * allocated with it. + */ + memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE, + SWAPPER_DIR_SIZE - PAGE_SIZE); bootmem_init(); } From f00cf2ba83ca2b1ade50b27dd60d6f4294ddeef7 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 26 Jan 2016 11:10:38 +0000 Subject: [PATCH 739/797] arm64: kernel: implement ACPI parking protocol The SBBR and ACPI specifications allow ACPI based systems that do not implement PSCI (eg systems with no EL3) to boot through the ACPI parking protocol specification[1]. This patch implements the ACPI parking protocol CPU operations, and adds code that eases parsing the parking protocol data structures to the ARM64 SMP initializion carried out at the same time as cpus enumeration. To wake-up the CPUs from the parked state, this patch implements a wakeup IPI for ARM64 (ie arch_send_wakeup_ipi_mask()) that mirrors the ARM one, so that a specific IPI is sent for wake-up purpose in order to distinguish it from other IPI sources. Given the current ACPI MADT parsing API, the patch implements a glue layer that helps passing MADT GICC data structure from SMP initialization code to the parking protocol implementation somewhat overriding the CPU operations interfaces. This to avoid creating a completely trasparent DT/ACPI CPU operations layer that would require creating opaque structure handling for CPUs data (DT represents CPU through DT nodes, ACPI through static MADT table entries), which seems overkill given that ACPI on ARM64 mandates only two booting protocols (PSCI and parking protocol), so there is no need for further protocol additions. Based on the original work by Mark Salter [1] https://acpica.org/sites/acpica/files/MP%20Startup%20for%20ARM%20platforms.docx Signed-off-by: Lorenzo Pieralisi Tested-by: Loc Ho Cc: Will Deacon Cc: Hanjun Guo Cc: Sudeep Holla Cc: Mark Rutland Cc: Mark Salter Cc: Al Stone [catalin.marinas@arm.com: Added WARN_ONCE(!acpi_parking_protocol_valid() on the IPI] Signed-off-by: Catalin Marinas (cherry picked from commit 5e89c55e4ed81d7abb1ce8828db35fa389dc0e90) Signed-off-by: Alex Shi --- arch/arm64/Kconfig | 9 ++ arch/arm64/include/asm/acpi.h | 19 ++- arch/arm64/include/asm/hardirq.h | 2 +- arch/arm64/include/asm/smp.h | 9 ++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/acpi_parking_protocol.c | 153 ++++++++++++++++++++++ arch/arm64/kernel/cpu_ops.c | 27 +++- arch/arm64/kernel/smp.c | 28 ++++ 8 files changed, 242 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/kernel/acpi_parking_protocol.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ffa3c549a4ba..4a1b665d90dc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -706,6 +706,15 @@ endmenu menu "Boot options" +config ARM64_ACPI_PARKING_PROTOCOL + bool "Enable support for the ARM64 ACPI parking protocol" + depends on ACPI + help + Enable support for the ARM64 ACPI parking protocol. If disabled + the kernel will not allow booting through the ARM64 ACPI parking + protocol even if the corresponding data is present in the ACPI + MADT table. + config CMDLINE string "Default kernel command string" default "" diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index caafd63b8092..aee323b13802 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -87,9 +87,26 @@ void __init acpi_init_cpus(void); static inline void acpi_init_cpus(void) { } #endif /* CONFIG_ACPI */ +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +bool acpi_parking_protocol_valid(int cpu); +void __init +acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor); +#else +static inline bool acpi_parking_protocol_valid(int cpu) { return false; } +static inline void +acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor) +{} +#endif + static inline const char *acpi_get_enable_method(int cpu) { - return acpi_psci_present() ? "psci" : NULL; + if (acpi_psci_present()) + return "psci"; + + if (acpi_parking_protocol_valid(cpu)) + return "parking-protocol"; + + return NULL; } #ifdef CONFIG_ACPI_APEI diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index a57601f9d17c..8740297dac77 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include #include -#define NR_IPI 5 +#define NR_IPI 6 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index d9c3d6a6100a..2013a4dc5124 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -64,6 +64,15 @@ extern void secondary_entry(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); +#else +static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask) +{ + BUILD_BUG(); +} +#endif + extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 474691f8b13a..c4e2f70c0aa0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -41,6 +41,7 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c new file mode 100644 index 000000000000..4b1e5a7a98da --- /dev/null +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -0,0 +1,153 @@ +/* + * ARM64 ACPI Parking Protocol implementation + * + * Authors: Lorenzo Pieralisi + * Mark Salter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include + +#include + +struct cpu_mailbox_entry { + phys_addr_t mailbox_addr; + u8 version; + u8 gic_cpu_id; +}; + +static struct cpu_mailbox_entry cpu_mailbox_entries[NR_CPUS]; + +void __init acpi_set_mailbox_entry(int cpu, + struct acpi_madt_generic_interrupt *p) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + + cpu_entry->mailbox_addr = p->parked_address; + cpu_entry->version = p->parking_version; + cpu_entry->gic_cpu_id = p->cpu_interface_number; +} + +bool acpi_parking_protocol_valid(int cpu) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + + return cpu_entry->mailbox_addr && cpu_entry->version; +} + +static int acpi_parking_protocol_cpu_init(unsigned int cpu) +{ + pr_debug("%s: ACPI parked addr=%llx\n", __func__, + cpu_mailbox_entries[cpu].mailbox_addr); + + return 0; +} + +static int acpi_parking_protocol_cpu_prepare(unsigned int cpu) +{ + return 0; +} + +struct parking_protocol_mailbox { + __le32 cpu_id; + __le32 reserved; + __le64 entry_point; +}; + +static int acpi_parking_protocol_cpu_boot(unsigned int cpu) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + struct parking_protocol_mailbox __iomem *mailbox; + __le32 cpu_id; + + /* + * Map mailbox memory with attribute device nGnRE (ie ioremap - + * this deviates from the parking protocol specifications since + * the mailboxes are required to be mapped nGnRnE; the attribute + * discrepancy is harmless insofar as the protocol specification + * is concerned). + * If the mailbox is mistakenly allocated in the linear mapping + * by FW ioremap will fail since the mapping will be prevented + * by the kernel (it clashes with the linear mapping attributes + * specifications). + */ + mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox)); + if (!mailbox) + return -EIO; + + cpu_id = readl_relaxed(&mailbox->cpu_id); + /* + * Check if firmware has set-up the mailbox entry properly + * before kickstarting the respective cpu. + */ + if (cpu_id != ~0U) { + iounmap(mailbox); + return -ENXIO; + } + + /* + * We write the entry point and cpu id as LE regardless of the + * native endianness of the kernel. Therefore, any boot-loaders + * that read this address need to convert this address to the + * Boot-Loader's endianness before jumping. + */ + writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point); + writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id); + + arch_send_wakeup_ipi_mask(cpumask_of(cpu)); + + iounmap(mailbox); + + return 0; +} + +static void acpi_parking_protocol_cpu_postboot(void) +{ + int cpu = smp_processor_id(); + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + struct parking_protocol_mailbox __iomem *mailbox; + __le64 entry_point; + + /* + * Map mailbox memory with attribute device nGnRE (ie ioremap - + * this deviates from the parking protocol specifications since + * the mailboxes are required to be mapped nGnRnE; the attribute + * discrepancy is harmless insofar as the protocol specification + * is concerned). + * If the mailbox is mistakenly allocated in the linear mapping + * by FW ioremap will fail since the mapping will be prevented + * by the kernel (it clashes with the linear mapping attributes + * specifications). + */ + mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox)); + if (!mailbox) + return; + + entry_point = readl_relaxed(&mailbox->entry_point); + /* + * Check if firmware has cleared the entry_point as expected + * by the protocol specification. + */ + WARN_ON(entry_point); + + iounmap(mailbox); +} + +const struct cpu_operations acpi_parking_protocol_ops = { + .name = "parking-protocol", + .cpu_init = acpi_parking_protocol_cpu_init, + .cpu_prepare = acpi_parking_protocol_cpu_prepare, + .cpu_boot = acpi_parking_protocol_cpu_boot, + .cpu_postboot = acpi_parking_protocol_cpu_postboot +}; diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index b6bd7d447768..c7cfb8fe06f9 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -25,19 +25,30 @@ #include extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations acpi_parking_protocol_ops; extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS]; -static const struct cpu_operations *supported_cpu_ops[] __initconst = { +static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; +static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL + &acpi_parking_protocol_ops, +#endif + &cpu_psci_ops, + NULL, +}; + static const struct cpu_operations * __init cpu_get_ops(const char *name) { - const struct cpu_operations **ops = supported_cpu_ops; + const struct cpu_operations **ops; + + ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; while (*ops) { if (!strcmp(name, (*ops)->name)) @@ -75,8 +86,16 @@ static const char *__init cpu_read_enable_method(int cpu) } } else { enable_method = acpi_get_enable_method(cpu); - if (!enable_method) - pr_err("Unsupported ACPI enable-method\n"); + if (!enable_method) { + /* + * In ACPI systems the boot CPU does not require + * checking the enable method since for some + * boot protocol (ie parking protocol) it need not + * be initialized. Don't warn spuriously. + */ + if (cpu != 0) + pr_err("Unsupported ACPI enable-method\n"); + } } return enable_method; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 68e7f79630d4..24cb4f800033 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -70,6 +70,7 @@ enum ipi_msg_type { IPI_CPU_STOP, IPI_TIMER, IPI_IRQ_WORK, + IPI_WAKEUP }; /* @@ -443,6 +444,17 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) /* map the logical cpu id to cpu MPIDR */ cpu_logical_map(cpu_count) = hwid; + /* + * Set-up the ACPI parking protocol cpu entries + * while initializing the cpu_logical_map to + * avoid parsing MADT entries multiple times for + * nothing (ie a valid cpu_logical_map entry should + * contain a valid parking protocol data set to + * initialize the cpu if the parking protocol is + * the only available enable method). + */ + acpi_set_mailbox_entry(cpu_count, processor); + cpu_count++; } @@ -625,6 +637,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), + S(IPI_WAKEUP, "CPU wake-up interrupts"), }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) @@ -668,6 +681,13 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); } +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +void arch_send_wakeup_ipi_mask(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_WAKEUP); +} +#endif + #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { @@ -745,6 +765,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; #endif +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL + case IPI_WAKEUP: + WARN_ONCE(!acpi_parking_protocol_valid(cpu), + "CPU%u: Wake-up IPI outside the ACPI parking protocol\n", + cpu); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; From a0e40450cf255994501d3f84081f95b1fb41623d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 27 Jan 2016 10:50:19 +0100 Subject: [PATCH 740/797] arm64: allow vmalloc regions to be set with set_memory_* The range of set_memory_* is currently restricted to the module address range because of difficulties in breaking down larger block sizes. vmalloc maps PAGE_SIZE pages so it is safe to use as well. Update the function ranges and add a comment explaining why the range is restricted the way it is. Suggested-by: Laura Abbott Acked-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon (cherry picked from commit 95f5c80050ad723163aa80dc8bffd48ef4afc6d5) Signed-off-by: Alex Shi --- arch/arm64/mm/pageattr.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index cf6240741134..0795c3a36d8f 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -44,6 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages, unsigned long end = start + size; int ret; struct page_change_data data; + struct vm_struct *area; if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; @@ -51,10 +53,23 @@ static int change_memory_common(unsigned long addr, int numpages, WARN_ON_ONCE(1); } - if (start < MODULES_VADDR || start >= MODULES_END) - return -EINVAL; - - if (end < MODULES_VADDR || end >= MODULES_END) + /* + * Kernel VA mappings are always live, and splitting live section + * mappings into page mappings may cause TLB conflicts. This means + * we have to ensure that changing the permission bits of the range + * we are operating on does not result in such splitting. + * + * Let's restrict ourselves to mappings created by vmalloc (or vmap). + * Those are guaranteed to consist entirely of page mappings, and + * splitting is never needed. + * + * So check whether the [addr, addr + size) interval is entirely + * covered by precisely one VM area that has the VM_ALLOC flag set. + */ + area = find_vm_area((void *)addr); + if (!area || + end > (unsigned long)area->addr + area->size || + !(area->flags & VM_ALLOC)) return -EINVAL; if (!numpages) From 41cb2829d020e4fdaeb5eb9286153f4c7dc8e7dd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Feb 2016 12:46:23 +0000 Subject: [PATCH 741/797] arm64: prefetch: don't provide spin_lock_prefetch with LSE The LSE atomics rely on us not dirtying data at L1 if we can avoid it, otherwise many of the potential scalability benefits are lost. This patch replaces spin_lock_prefetch with a nop when the LSE atomics are in use, so that users don't shoot themselves in the foot by causing needless coherence traffic at L1. Signed-off-by: Will Deacon Tested-by: Andrew Pinski Signed-off-by: Catalin Marinas (cherry picked from commit cd5e10bdf3795d22f10787bb1991c43798c885d5) Signed-off-by: Alex Shi --- arch/arm64/include/asm/processor.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 4acb7ca94fcd..31b76fce4477 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -29,6 +29,7 @@ #include +#include #include #include #include @@ -177,9 +178,11 @@ static inline void prefetchw(const void *ptr) } #define ARCH_HAS_SPINLOCK_PREFETCH -static inline void spin_lock_prefetch(const void *x) +static inline void spin_lock_prefetch(const void *ptr) { - prefetchw(x); + asm volatile(ARM64_LSE_ATOMIC_INSN( + "prfm pstl1strm, %a0", + "nop") : : "p" (ptr)); } #define HAVE_ARCH_PICK_MMAP_LAYOUT From 742e490adaa444e9657528ef38dde35f2916c793 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Feb 2016 12:46:24 +0000 Subject: [PATCH 742/797] arm64: prefetch: add alternative pattern for CPUs without a prefetcher Most CPUs have a hardware prefetcher which generally performs better without explicit prefetch instructions issued by software, however some CPUs (e.g. Cavium ThunderX) rely solely on explicit prefetch instructions. This patch adds an alternative pattern (ARM64_HAS_NO_HW_PREFETCH) to allow our library code to make use of explicit prefetch instructions during things like copy routines only when the CPU does not have the capability to perform the prefetching itself. Signed-off-by: Will Deacon Tested-by: Andrew Pinski Signed-off-by: Catalin Marinas (cherry picked from commit d5370f754875460662abe8561388e019d90dd0c4) Signed-off-by: Alex Shi --- arch/arm64/include/asm/cpufeature.h | 3 ++- arch/arm64/include/asm/cputype.h | 17 ++++++++++++++++- arch/arm64/kernel/cpu_errata.c | 18 +++--------------- arch/arm64/kernel/cpufeature.c | 17 +++++++++++++++++ 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8f271b83f910..8d56bd8550dc 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -30,8 +30,9 @@ #define ARM64_HAS_LSE_ATOMICS 5 #define ARM64_WORKAROUND_CAVIUM_23154 6 #define ARM64_WORKAROUND_834220 7 +#define ARM64_HAS_NO_HW_PREFETCH 8 -#define ARM64_NCAPS 8 +#define ARM64_NCAPS 9 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 1a5949364ed0..7540284a17fe 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -57,11 +57,22 @@ #define MIDR_IMPLEMENTOR(midr) \ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) -#define MIDR_CPU_PART(imp, partnum) \ +#define MIDR_CPU_MODEL(imp, partnum) \ (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) +#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ + MIDR_ARCHITECTURE_MASK) + +#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max) \ +({ \ + u32 _model = (midr) & MIDR_CPU_MODEL_MASK; \ + u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); \ + \ + _model == (model) && rv >= (rv_min) && rv <= (rv_max); \ + }) + #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_APM 0x50 #define ARM_CPU_IMP_CAVIUM 0x43 @@ -75,6 +86,10 @@ #define CAVIUM_CPU_PART_THUNDERX 0x0A1 +#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) +#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) + #ifndef __ASSEMBLY__ /* diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index feb6b4efa641..e6bc988e8dbf 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -21,24 +21,12 @@ #include #include -#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) -#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) -#define MIDR_THUNDERX MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) - -#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ - MIDR_ARCHITECTURE_MASK) - static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry) { - u32 midr = read_cpuid_id(); - - if ((midr & CPU_MODEL_MASK) != entry->midr_model) - return false; - - midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK; - - return (midr >= entry->midr_range_min && midr <= entry->midr_range_max); + return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, + entry->midr_range_min, + entry->midr_range_max); } #define MIDR_RANGE(model, min, max) \ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5c90aa490a2b..3615d7d7c9af 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -621,6 +621,18 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) return has_sre; } +static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) +{ + u32 midr = read_cpuid_id(); + u32 rv_min, rv_max; + + /* Cavium ThunderX pass 1.x and 2.x */ + rv_min = 0; + rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK; + + return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -651,6 +663,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 2, }, #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ + { + .desc = "Software prefetching using PRFM", + .capability = ARM64_HAS_NO_HW_PREFETCH, + .matches = has_no_hw_prefetch, + }, {}, }; From 93c384820cf3c1db51073e746980866d2bce8af9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Feb 2016 12:46:25 +0000 Subject: [PATCH 743/797] arm64: lib: improve copy_page to deal with 128 bytes at a time We want to avoid lots of different copy_page implementations, settling for something that is "good enough" everywhere and hopefully easy to understand and maintain whilst we're at it. This patch reworks our copy_page implementation based on discussions with Cavium on the list and benchmarking on Cortex-A processors so that: - The loop is unrolled to copy 128 bytes per iteration - The reads are offset so that we read from the next 128-byte block in the same iteration that we store the previous block - Explicit prefetch instructions are removed for now, since they hurt performance on CPUs with hardware prefetching - The loop exit condition is calculated at the start of the loop Signed-off-by: Will Deacon Tested-by: Andrew Pinski Signed-off-by: Catalin Marinas (cherry picked from commit 223e23e8aa26b0bb62c597637e77295e14f6a62c) Signed-off-by: Alex Shi --- arch/arm64/lib/copy_page.S | 46 +++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 512b9a7b980e..2534533ceb1d 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -27,20 +27,50 @@ * x1 - src */ ENTRY(copy_page) - /* Assume cache line size is 64 bytes. */ - prfm pldl1strm, [x1, #64] -1: ldp x2, x3, [x1] + ldp x2, x3, [x1] ldp x4, x5, [x1, #16] ldp x6, x7, [x1, #32] ldp x8, x9, [x1, #48] - add x1, x1, #64 - prfm pldl1strm, [x1, #64] + ldp x10, x11, [x1, #64] + ldp x12, x13, [x1, #80] + ldp x14, x15, [x1, #96] + ldp x16, x17, [x1, #112] + + mov x18, #(PAGE_SIZE - 128) + add x1, x1, #128 +1: + subs x18, x18, #128 + + stnp x2, x3, [x0] + ldp x2, x3, [x1] + stnp x4, x5, [x0, #16] + ldp x4, x5, [x1, #16] + stnp x6, x7, [x0, #32] + ldp x6, x7, [x1, #32] + stnp x8, x9, [x0, #48] + ldp x8, x9, [x1, #48] + stnp x10, x11, [x0, #64] + ldp x10, x11, [x1, #64] + stnp x12, x13, [x0, #80] + ldp x12, x13, [x1, #80] + stnp x14, x15, [x0, #96] + ldp x14, x15, [x1, #96] + stnp x16, x17, [x0, #112] + ldp x16, x17, [x1, #112] + + add x0, x0, #128 + add x1, x1, #128 + + b.gt 1b + stnp x2, x3, [x0] stnp x4, x5, [x0, #16] stnp x6, x7, [x0, #32] stnp x8, x9, [x0, #48] - add x0, x0, #64 - tst x1, #(PAGE_SIZE - 1) - b.ne 1b + stnp x10, x11, [x0, #64] + stnp x12, x13, [x0, #80] + stnp x14, x15, [x0, #96] + stnp x16, x17, [x0, #112] + ret ENDPROC(copy_page) From e46018fe4fd741b5f58b6c3cf8b94ed34603c325 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Tue, 2 Feb 2016 12:46:26 +0000 Subject: [PATCH 744/797] arm64: lib: patch in prfm for copy_page if requested On ThunderX T88 pass 1 and pass 2, there is no hardware prefetching so we need to patch in explicit software prefetching instructions Prefetching improves this code by 60% over the original code and 2x over the code without prefetching for the affected hardware using the benchmark code at https://github.com/apinski-cavium/copy_page_benchmark Signed-off-by: Andrew Pinski Signed-off-by: Will Deacon Tested-by: Andrew Pinski Signed-off-by: Catalin Marinas (cherry picked from commit 60e0a09db24adc8809696307e5d97cc4ba7cb3e0) Signed-off-by: Alex Shi --- arch/arm64/lib/copy_page.S | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 2534533ceb1d..4c1e700840b6 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -18,6 +18,8 @@ #include #include #include +#include +#include /* * Copy a page from src to dest (both are page aligned) @@ -27,6 +29,15 @@ * x1 - src */ ENTRY(copy_page) +alternative_if_not ARM64_HAS_NO_HW_PREFETCH + nop + nop +alternative_else + # Prefetch two cache lines ahead. + prfm pldl1strm, [x1, #128] + prfm pldl1strm, [x1, #256] +alternative_endif + ldp x2, x3, [x1] ldp x4, x5, [x1, #16] ldp x6, x7, [x1, #32] @@ -41,6 +52,12 @@ ENTRY(copy_page) 1: subs x18, x18, #128 +alternative_if_not ARM64_HAS_NO_HW_PREFETCH + nop +alternative_else + prfm pldl1strm, [x1, #384] +alternative_endif + stnp x2, x3, [x0] ldp x2, x3, [x1] stnp x4, x5, [x0, #16] From a97b93b11bd9f76b0cba55b8ff04a7489b087caf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 10 Feb 2016 10:07:30 +0000 Subject: [PATCH 745/797] arm64: prefetch: add missing #include for spin_lock_prefetch As of 52e662326e1e ("arm64: prefetch: don't provide spin_lock_prefetch with LSE"), spin_lock_prefetch is patched at runtime when the LSE atomics are in use. This relies on the ARM64_LSE_ATOMIC_INSN macro to drive the alternatives framework, but that macro is only available via asm/lse.h, which isn't explicitly included in processor.h. Consequently, drivers can run into build failures such as: In file included from include/linux/prefetch.h:14:0, from drivers/net/ethernet/intel/i40e/i40e_txrx.c:27: arch/arm64/include/asm/processor.h: In function 'spin_lock_prefetch': arch/arm64/include/asm/processor.h:183:15: error: expected string literal before 'ARM64_LSE_ATOMIC_INSN' asm volatile(ARM64_LSE_ATOMIC_INSN( This patch add the missing include and gets things building again. Reported-by: kbuild test robot Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit afb83cc3f0e4f86ea0e1cc3db7a90f58f1abd4d5) Signed-off-by: Alex Shi --- arch/arm64/include/asm/processor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 31b76fce4477..5bb1d763d17a 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include From 65e670213029f7df5dd20407eecbe691aa078930 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 5 Feb 2016 16:24:46 -0800 Subject: [PATCH 746/797] arm64: Drop alloc function from create_mapping create_mapping is only used in fixmap_remap_fdt. All the create_mapping calls need to happen on existing translation table pages without additional allocations. Rather than have an alloc function be called and fail, just set it to NULL and catch its use. Also change the name to create_mapping_noalloc to better capture what exactly is going on. Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit 132233a759580f5ce9b1bfaac9073e47d03c460d) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4874d2fea1c9..3096240e6eb8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -116,7 +116,9 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, pte_t *pte; if (pmd_none(*pmd) || pmd_sect(*pmd)) { - phys_addr_t pte_phys = pgtable_alloc(); + phys_addr_t pte_phys; + BUG_ON(!pgtable_alloc); + pte_phys = pgtable_alloc(); pte = pte_set_fixmap(pte_phys); if (pmd_sect(*pmd)) split_pmd(pmd, pte); @@ -158,7 +160,9 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_sect(*pud)) { - phys_addr_t pmd_phys = pgtable_alloc(); + phys_addr_t pmd_phys; + BUG_ON(!pgtable_alloc); + pmd_phys = pgtable_alloc(); pmd = pmd_set_fixmap(pmd_phys); if (pud_sect(*pud)) { /* @@ -223,7 +227,9 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long next; if (pgd_none(*pgd)) { - phys_addr_t pud_phys = pgtable_alloc(); + phys_addr_t pud_phys; + BUG_ON(!pgtable_alloc); + pud_phys = pgtable_alloc(); __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE); } BUG_ON(pgd_bad(*pgd)); @@ -312,7 +318,12 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc); } -static void __init create_mapping(phys_addr_t phys, unsigned long virt, +/* + * This function can only be used to modify existing table entries, + * without allocating new levels of table. Note that this permits the + * creation of new section or page entries. + */ +static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { @@ -321,7 +332,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, return; } __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - early_pgtable_alloc); + NULL); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, @@ -678,7 +689,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) /* * Make sure that the FDT region can be mapped without the need to * allocate additional translation table pages, so that it is safe - * to call create_mapping() this early. + * to call create_mapping_noalloc() this early. * * On 64k pages, the FDT will be mapped using PTEs, so we need to * be in the same PMD as the rest of the fixmap. @@ -694,8 +705,8 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) dt_virt = (void *)dt_virt_base + offset; /* map the first chunk so we can read the size from the header */ - create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, - SWAPPER_BLOCK_SIZE, prot); + create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), + dt_virt_base, SWAPPER_BLOCK_SIZE, prot); if (fdt_check_header(dt_virt) != 0) return NULL; @@ -705,7 +716,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) return NULL; if (offset + size > SWAPPER_BLOCK_SIZE) - create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, round_up(offset + size, SWAPPER_BLOCK_SIZE), prot); memblock_reserve(dt_phys, size); From e4d0298cdff23a21e532291df7a4fb6e0a908be4 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 5 Feb 2016 16:24:47 -0800 Subject: [PATCH 747/797] arm64: Add support for ARCH_SUPPORTS_DEBUG_PAGEALLOC ARCH_SUPPORTS_DEBUG_PAGEALLOC provides a hook to map and unmap pages for debugging purposes. This requires memory be mapped with PAGE_SIZE mappings since breaking down larger mappings at runtime will lead to TLB conflicts. Check if debug_pagealloc is enabled at runtime and if so, map everyting with PAGE_SIZE pages. Implement the functions to actually map/unmap the pages at runtime. Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott [catalin.marinas@arm.com: static annotation block_mappings_allowed() and #ifdef] Signed-off-by: Catalin Marinas (cherry picked from commit 83863f25e4b8214e994ef8b5647aad614d74b45d) Signed-off-by: Alex Shi --- arch/arm64/Kconfig | 3 +++ arch/arm64/mm/mmu.c | 26 +++++++++++++++++++++-- arch/arm64/mm/pageattr.c | 46 +++++++++++++++++++++++++++++++--------- 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4a1b665d90dc..98992dee9a29 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -507,6 +507,9 @@ config HOTPLUG_CPU source kernel/Kconfig.preempt source kernel/Kconfig.hz +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + config ARCH_HAS_HOLES_MEMORYMODEL def_bool y if SPARSEMEM diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3096240e6eb8..d1fa678355c9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -149,6 +149,26 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) } while (pmd++, i++, i < PTRS_PER_PMD); } +#ifdef CONFIG_DEBUG_PAGEALLOC +static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) +{ + + /* + * If debug_page_alloc is enabled we must map the linear map + * using pages. However, other mappings created by + * create_mapping_noalloc must use sections in some cases. Allow + * sections to be used in those cases, where no pgtable_alloc + * function is provided. + */ + return !pgtable_alloc || !debug_pagealloc_enabled(); +} +#else +static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) +{ + return true; +} +#endif + static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void)) @@ -181,7 +201,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, do { next = pmd_addr_end(addr, end); /* try section mapping first */ - if (((addr | next | phys) & ~SECTION_MASK) == 0) { + if (((addr | next | phys) & ~SECTION_MASK) == 0 && + block_mappings_allowed(pgtable_alloc)) { pmd_t old_pmd =*pmd; set_pmd(pmd, __pmd(phys | pgprot_val(mk_sect_prot(prot)))); @@ -241,7 +262,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys)) { + if (use_1G_block(addr, next, phys) && + block_mappings_allowed(pgtable_alloc)) { pud_t old_pud = *pud; set_pud(pud, __pud(phys | pgprot_val(mk_sect_prot(prot)))); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 0795c3a36d8f..ca6d268e3313 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -37,14 +37,31 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr, return 0; } +/* + * This function assumes that the range is mapped with PAGE_SIZE pages. + */ +static int __change_memory_common(unsigned long start, unsigned long size, + pgprot_t set_mask, pgprot_t clear_mask) +{ + struct page_change_data data; + int ret; + + data.set_mask = set_mask; + data.clear_mask = clear_mask; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range, + &data); + + flush_tlb_kernel_range(start, start + size); + return ret; +} + static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr; unsigned long size = PAGE_SIZE*numpages; unsigned long end = start + size; - int ret; - struct page_change_data data; struct vm_struct *area; if (!PAGE_ALIGNED(addr)) { @@ -75,14 +92,7 @@ static int change_memory_common(unsigned long addr, int numpages, if (!numpages) return 0; - data.set_mask = set_mask; - data.clear_mask = clear_mask; - - ret = apply_to_page_range(&init_mm, start, size, change_page_range, - &data); - - flush_tlb_kernel_range(start, end); - return ret; + return __change_memory_common(start, size, set_mask, clear_mask); } int set_memory_ro(unsigned long addr, int numpages) @@ -114,3 +124,19 @@ int set_memory_x(unsigned long addr, int numpages) __pgprot(PTE_PXN)); } EXPORT_SYMBOL_GPL(set_memory_x); + +#ifdef CONFIG_DEBUG_PAGEALLOC +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long addr = (unsigned long) page_address(page); + + if (enable) + __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(PTE_VALID), + __pgprot(0)); + else + __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(0), + __pgprot(PTE_VALID)); +} +#endif From 5ca7d16080e32bb34f8d2ead86adde49d1c4c652 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 5 Feb 2016 16:24:48 -0800 Subject: [PATCH 748/797] arm64: ptdump: Indicate whether memory should be faulting With CONFIG_DEBUG_PAGEALLOC, pages do not have the valid bit set when free in the buddy allocator. Add an indiciation to the page table dumping code that the valid bit is not set, 'F' for fault, to make this easier to understand. Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit d7e9d59494a9a5d83274f5af2148b82ca22dff3f) Signed-off-by: Alex Shi --- arch/arm64/mm/dump.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 0adbebbc2803..0841b2bf0e6a 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -90,6 +90,11 @@ struct prot_bits { static const struct prot_bits pte_bits[] = { { + .mask = PTE_VALID, + .val = PTE_VALID, + .set = " ", + .clear = "F", + }, { .mask = PTE_USER, .val = PTE_USER, .set = "USR", From 1d1e6a82d643537e9b80cf58f887b2be616aa515 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 5 Feb 2016 15:50:18 -0800 Subject: [PATCH 749/797] arm64: ubsan: select ARCH_HAS_UBSAN_SANITIZE_ALL To enable UBSAN on arm64, ARCH_HAS_UBSAN_SANITIZE_ALL need to be selected. Basic kernel bootup test is passed on arm64 with CONFIG_UBSAN_SANITIZE_ALL enabled. Signed-off-by: Yang Shi Acked-by: Andrey Ryabinin Tested-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit f0b7f8a4b44657386273a67179dd901c81cd11a6) Signed-off-by: Alex Shi --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 98992dee9a29..1420102341d0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -13,6 +13,7 @@ config ARM64 select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS + select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC From 391a428880ad64f0ab9dcf48f4666ba2a6aa7e76 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 10 Feb 2016 13:52:22 -0800 Subject: [PATCH 750/797] arm64: vdso: Mark vDSO code as read-only Although the arm64 vDSO is cleanly separated by code/data with the code being read-only in userspace mappings, the code page is still writable from the kernel. There have been exploits (such as http://itszn.com/blog/?p=21) that take advantage of this on x86 to go from a bad kernel write to full root. Prevent this specific exploit on arm64 by putting the vDSO code page in read-only memory as well. Before the change: [ 3.138366] vdso: 2 pages (1 code @ ffffffc000a71000, 1 data @ ffffffc000a70000) ---[ Kernel Mapping ]--- 0xffffffc000000000-0xffffffc000082000 520K RW NX SHD AF UXN MEM/NORMAL 0xffffffc000082000-0xffffffc000200000 1528K ro x SHD AF UXN MEM/NORMAL 0xffffffc000200000-0xffffffc000800000 6M ro x SHD AF BLK UXN MEM/NORMAL 0xffffffc000800000-0xffffffc0009b6000 1752K ro x SHD AF UXN MEM/NORMAL 0xffffffc0009b6000-0xffffffc000c00000 2344K RW NX SHD AF UXN MEM/NORMAL 0xffffffc000c00000-0xffffffc008000000 116M RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffc00c000000-0xffffffc07f000000 1840M RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffc800000000-0xffffffc840000000 1G RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffc840000000-0xffffffc87ae00000 942M RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffc87ae00000-0xffffffc87ae70000 448K RW NX SHD AF UXN MEM/NORMAL 0xffffffc87af80000-0xffffffc87af8a000 40K RW NX SHD AF UXN MEM/NORMAL 0xffffffc87af8b000-0xffffffc87b000000 468K RW NX SHD AF UXN MEM/NORMAL 0xffffffc87b000000-0xffffffc87fe00000 78M RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffc87fe00000-0xffffffc87ff50000 1344K RW NX SHD AF UXN MEM/NORMAL 0xffffffc87ff90000-0xffffffc87ffa0000 64K RW NX SHD AF UXN MEM/NORMAL 0xffffffc87fff0000-0xffffffc880000000 64K RW NX SHD AF UXN MEM/NORMAL After: [ 3.138368] vdso: 2 pages (1 code @ ffffffc0006de000, 1 data @ ffffffc000a74000) ---[ Kernel Mapping ]--- 0xffffffc000000000-0xffffffc000082000 520K RW NX SHD AF UXN MEM/NORMAL 0xffffffc000082000-0xffffffc000200000 1528K ro x SHD AF UXN MEM/NORMAL 0xffffffc000200000-0xffffffc000800000 6M ro x SHD AF BLK UXN MEM/NORMAL 0xffffffc000800000-0xffffffc0009b8000 1760K ro x SHD AF UXN MEM/NORMAL 0xffffffc0009b8000-0xffffffc000c00000 2336K RW NX SHD AF UXN MEM/NORMAL 0xffffffc000c00000-0xffffffc008000000 116M RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffc00c000000-0xffffffc07f000000 1840M RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffc800000000-0xffffffc840000000 1G RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffc840000000-0xffffffc87ae00000 942M RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffc87ae00000-0xffffffc87ae70000 448K RW NX SHD AF UXN MEM/NORMAL 0xffffffc87af80000-0xffffffc87af8a000 40K RW NX SHD AF UXN MEM/NORMAL 0xffffffc87af8b000-0xffffffc87b000000 468K RW NX SHD AF UXN MEM/NORMAL 0xffffffc87b000000-0xffffffc87fe00000 78M RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffc87fe00000-0xffffffc87ff50000 1344K RW NX SHD AF UXN MEM/NORMAL 0xffffffc87ff90000-0xffffffc87ffa0000 64K RW NX SHD AF UXN MEM/NORMAL 0xffffffc87fff0000-0xffffffc880000000 64K RW NX SHD AF UXN MEM/NORMAL Inspired by https://lkml.org/lkml/2016/1/19/494 based on work by the PaX Team, Brad Spengler, and Kees Cook. Signed-off-by: David Brown Acked-by: Will Deacon Acked-by: Ard Biesheuvel [catalin.marinas@arm.com: removed superfluous __PAGE_ALIGNED_DATA] Signed-off-by: Catalin Marinas (cherry picked from commit 88d8a7994e564d209d4b2583496631c2357d386b) Signed-off-by: Alex Shi --- arch/arm64/kernel/vdso/vdso.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S index 60c1db54b41a..82379a70ef03 100644 --- a/arch/arm64/kernel/vdso/vdso.S +++ b/arch/arm64/kernel/vdso/vdso.S @@ -21,9 +21,8 @@ #include #include - __PAGE_ALIGNED_DATA - .globl vdso_start, vdso_end + .section .rodata .balign PAGE_SIZE vdso_start: .incbin "arch/arm64/kernel/vdso/vdso.so" From 866817f9f155879d1d28f0944958a542bb70475c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 15 Feb 2016 09:51:49 +0100 Subject: [PATCH 751/797] arm64: use local label prefixes for __reg_num symbols The __reg_num_xNN symbols that are used to implement the msr_s and mrs_s macros are recorded in the ELF metadata of each object file. This does not affect the size of the final binary, but it does clutter the output of tools like readelf, i.e., $ readelf -a vmlinux |grep -c __reg_num_x 50976 So let's use symbols with the .L prefix, these are strictly local, and don't end up in the object files. $ readelf -a vmlinux |grep -c __reg_num_x 0 Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 7abc7d833c9eb16efc8a59239d3771a6e30be367) Signed-off-by: Alex Shi --- arch/arm64/include/asm/sysreg.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d48ab5b41f52..76907c94b11f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -194,32 +194,32 @@ #ifdef __ASSEMBLY__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 - .equ __reg_num_x\num, \num + .equ .L__reg_num_x\num, \num .endr - .equ __reg_num_xzr, 31 + .equ .L__reg_num_xzr, 31 .macro mrs_s, rt, sreg - .inst 0xd5200000|(\sreg)|(__reg_num_\rt) + .inst 0xd5200000|(\sreg)|(.L__reg_num_\rt) .endm .macro msr_s, sreg, rt - .inst 0xd5000000|(\sreg)|(__reg_num_\rt) + .inst 0xd5000000|(\sreg)|(.L__reg_num_\rt) .endm #else asm( " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" -" .equ __reg_num_x\\num, \\num\n" +" .equ .L__reg_num_x\\num, \\num\n" " .endr\n" -" .equ __reg_num_xzr, 31\n" +" .equ .L__reg_num_xzr, 31\n" "\n" " .macro mrs_s, rt, sreg\n" -" .inst 0xd5200000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n" " .endm\n" "\n" " .macro msr_s, sreg, rt\n" -" .inst 0xd5000000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n" " .endm\n" ); From 0b3419007e096a706ac4894a2cf28cd97b6028e1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 5 Feb 2016 14:58:46 +0000 Subject: [PATCH 752/797] arm64: cpufeature: Change read_cpuid() to use sysreg's mrs_s macro Older assemblers may not have support for newer feature registers. To get round this, sysreg.h provides a 'mrs_s' macro that takes a register encoding and generates the raw instruction. Change read_cpuid() to use mrs_s in all cases so that new registers don't have to be a special case. Including sysreg.h means we need to move the include and definition of read_cpuid() after the #ifndef __ASSEMBLY__ to avoid syntax errors in vmlinux.lds. Signed-off-by: James Morse Acked-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 0f54b14e76f5302afe164dc911b049b5df836ff5) Signed-off-by: Alex Shi --- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/cputype.h | 20 ++++++----- arch/arm64/kernel/cpufeature.c | 54 ++++++++++++++--------------- arch/arm64/kernel/cpuinfo.c | 50 +++++++++++++------------- arch/arm64/mm/context.c | 2 +- 5 files changed, 65 insertions(+), 63 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8d56bd8550dc..8131abfabb0a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -177,7 +177,7 @@ u64 read_system_reg(u32 id); static inline bool cpu_supports_mixed_endian_el0(void) { - return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); + return id_aa64mmfr0_mixed_endian_el0(read_cpuid(SYS_ID_AA64MMFR0_EL1)); } static inline bool system_supports_mixed_endian_el0(void) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7540284a17fe..b3a83da152a7 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -32,12 +32,6 @@ #define MPIDR_AFFINITY_LEVEL(mpidr, level) \ ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK) -#define read_cpuid(reg) ({ \ - u64 __val; \ - asm("mrs %0, " #reg : "=r" (__val)); \ - __val; \ -}) - #define MIDR_REVISION_MASK 0xf #define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK) #define MIDR_PARTNUM_SHIFT 4 @@ -92,6 +86,14 @@ #ifndef __ASSEMBLY__ +#include + +#define read_cpuid(reg) ({ \ + u64 __val; \ + asm("mrs_s %0, " __stringify(reg) : "=r" (__val)); \ + __val; \ +}) + /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID @@ -99,12 +101,12 @@ */ static inline u32 __attribute_const__ read_cpuid_id(void) { - return read_cpuid(MIDR_EL1); + return read_cpuid(SYS_MIDR_EL1); } static inline u64 __attribute_const__ read_cpuid_mpidr(void) { - return read_cpuid(MPIDR_EL1); + return read_cpuid(SYS_MPIDR_EL1); } static inline unsigned int __attribute_const__ read_cpuid_implementor(void) @@ -119,7 +121,7 @@ static inline unsigned int __attribute_const__ read_cpuid_part_number(void) static inline u32 __attribute_const__ read_cpuid_cachetype(void) { - return read_cpuid(CTR_EL0); + return read_cpuid(SYS_CTR_EL0); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3615d7d7c9af..1ef10e784031 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -808,35 +808,35 @@ static inline void set_sys_caps_initialised(void) static u64 __raw_read_system_reg(u32 sys_id) { switch (sys_id) { - case SYS_ID_PFR0_EL1: return (u64)read_cpuid(ID_PFR0_EL1); - case SYS_ID_PFR1_EL1: return (u64)read_cpuid(ID_PFR1_EL1); - case SYS_ID_DFR0_EL1: return (u64)read_cpuid(ID_DFR0_EL1); - case SYS_ID_MMFR0_EL1: return (u64)read_cpuid(ID_MMFR0_EL1); - case SYS_ID_MMFR1_EL1: return (u64)read_cpuid(ID_MMFR1_EL1); - case SYS_ID_MMFR2_EL1: return (u64)read_cpuid(ID_MMFR2_EL1); - case SYS_ID_MMFR3_EL1: return (u64)read_cpuid(ID_MMFR3_EL1); - case SYS_ID_ISAR0_EL1: return (u64)read_cpuid(ID_ISAR0_EL1); - case SYS_ID_ISAR1_EL1: return (u64)read_cpuid(ID_ISAR1_EL1); - case SYS_ID_ISAR2_EL1: return (u64)read_cpuid(ID_ISAR2_EL1); - case SYS_ID_ISAR3_EL1: return (u64)read_cpuid(ID_ISAR3_EL1); - case SYS_ID_ISAR4_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); - case SYS_MVFR0_EL1: return (u64)read_cpuid(MVFR0_EL1); - case SYS_MVFR1_EL1: return (u64)read_cpuid(MVFR1_EL1); - case SYS_MVFR2_EL1: return (u64)read_cpuid(MVFR2_EL1); + case SYS_ID_PFR0_EL1: return read_cpuid(SYS_ID_PFR0_EL1); + case SYS_ID_PFR1_EL1: return read_cpuid(SYS_ID_PFR1_EL1); + case SYS_ID_DFR0_EL1: return read_cpuid(SYS_ID_DFR0_EL1); + case SYS_ID_MMFR0_EL1: return read_cpuid(SYS_ID_MMFR0_EL1); + case SYS_ID_MMFR1_EL1: return read_cpuid(SYS_ID_MMFR1_EL1); + case SYS_ID_MMFR2_EL1: return read_cpuid(SYS_ID_MMFR2_EL1); + case SYS_ID_MMFR3_EL1: return read_cpuid(SYS_ID_MMFR3_EL1); + case SYS_ID_ISAR0_EL1: return read_cpuid(SYS_ID_ISAR0_EL1); + case SYS_ID_ISAR1_EL1: return read_cpuid(SYS_ID_ISAR1_EL1); + case SYS_ID_ISAR2_EL1: return read_cpuid(SYS_ID_ISAR2_EL1); + case SYS_ID_ISAR3_EL1: return read_cpuid(SYS_ID_ISAR3_EL1); + case SYS_ID_ISAR4_EL1: return read_cpuid(SYS_ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(SYS_ID_ISAR4_EL1); + case SYS_MVFR0_EL1: return read_cpuid(SYS_MVFR0_EL1); + case SYS_MVFR1_EL1: return read_cpuid(SYS_MVFR1_EL1); + case SYS_MVFR2_EL1: return read_cpuid(SYS_MVFR2_EL1); - case SYS_ID_AA64PFR0_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64DFR0_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64MMFR0_EL1: return (u64)read_cpuid(ID_AA64MMFR0_EL1); - case SYS_ID_AA64MMFR1_EL1: return (u64)read_cpuid(ID_AA64MMFR1_EL1); - case SYS_ID_AA64ISAR0_EL1: return (u64)read_cpuid(ID_AA64ISAR0_EL1); - case SYS_ID_AA64ISAR1_EL1: return (u64)read_cpuid(ID_AA64ISAR1_EL1); + case SYS_ID_AA64PFR0_EL1: return read_cpuid(SYS_ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(SYS_ID_AA64PFR0_EL1); + case SYS_ID_AA64DFR0_EL1: return read_cpuid(SYS_ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(SYS_ID_AA64DFR0_EL1); + case SYS_ID_AA64MMFR0_EL1: return read_cpuid(SYS_ID_AA64MMFR0_EL1); + case SYS_ID_AA64MMFR1_EL1: return read_cpuid(SYS_ID_AA64MMFR1_EL1); + case SYS_ID_AA64ISAR0_EL1: return read_cpuid(SYS_ID_AA64ISAR0_EL1); + case SYS_ID_AA64ISAR1_EL1: return read_cpuid(SYS_ID_AA64ISAR1_EL1); - case SYS_CNTFRQ_EL0: return (u64)read_cpuid(CNTFRQ_EL0); - case SYS_CTR_EL0: return (u64)read_cpuid(CTR_EL0); - case SYS_DCZID_EL0: return (u64)read_cpuid(DCZID_EL0); + case SYS_CNTFRQ_EL0: return read_cpuid(SYS_CNTFRQ_EL0); + case SYS_CTR_EL0: return read_cpuid(SYS_CTR_EL0); + case SYS_DCZID_EL0: return read_cpuid(SYS_DCZID_EL0); default: BUG(); return 0; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 212ae6361d8b..76df22272804 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -201,35 +201,35 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); info->reg_ctr = read_cpuid_cachetype(); - info->reg_dczid = read_cpuid(DCZID_EL0); + info->reg_dczid = read_cpuid(SYS_DCZID_EL0); info->reg_midr = read_cpuid_id(); - info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); - info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); - info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); - info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); - info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); - info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); - info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); - info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); + info->reg_id_aa64dfr0 = read_cpuid(SYS_ID_AA64DFR0_EL1); + info->reg_id_aa64dfr1 = read_cpuid(SYS_ID_AA64DFR1_EL1); + info->reg_id_aa64isar0 = read_cpuid(SYS_ID_AA64ISAR0_EL1); + info->reg_id_aa64isar1 = read_cpuid(SYS_ID_AA64ISAR1_EL1); + info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1); + info->reg_id_aa64mmfr1 = read_cpuid(SYS_ID_AA64MMFR1_EL1); + info->reg_id_aa64pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1); + info->reg_id_aa64pfr1 = read_cpuid(SYS_ID_AA64PFR1_EL1); - info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); - info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); - info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); - info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); - info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); - info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); - info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); - info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); - info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); - info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); - info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); - info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); - info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + info->reg_id_dfr0 = read_cpuid(SYS_ID_DFR0_EL1); + info->reg_id_isar0 = read_cpuid(SYS_ID_ISAR0_EL1); + info->reg_id_isar1 = read_cpuid(SYS_ID_ISAR1_EL1); + info->reg_id_isar2 = read_cpuid(SYS_ID_ISAR2_EL1); + info->reg_id_isar3 = read_cpuid(SYS_ID_ISAR3_EL1); + info->reg_id_isar4 = read_cpuid(SYS_ID_ISAR4_EL1); + info->reg_id_isar5 = read_cpuid(SYS_ID_ISAR5_EL1); + info->reg_id_mmfr0 = read_cpuid(SYS_ID_MMFR0_EL1); + info->reg_id_mmfr1 = read_cpuid(SYS_ID_MMFR1_EL1); + info->reg_id_mmfr2 = read_cpuid(SYS_ID_MMFR2_EL1); + info->reg_id_mmfr3 = read_cpuid(SYS_ID_MMFR3_EL1); + info->reg_id_pfr0 = read_cpuid(SYS_ID_PFR0_EL1); + info->reg_id_pfr1 = read_cpuid(SYS_ID_PFR1_EL1); - info->reg_mvfr0 = read_cpuid(MVFR0_EL1); - info->reg_mvfr1 = read_cpuid(MVFR1_EL1); - info->reg_mvfr2 = read_cpuid(MVFR2_EL1); + info->reg_mvfr0 = read_cpuid(SYS_MVFR0_EL1); + info->reg_mvfr1 = read_cpuid(SYS_MVFR1_EL1); + info->reg_mvfr2 = read_cpuid(SYS_MVFR2_EL1); cpuinfo_detect_icache_policy(info); diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index e87f53ff5f58..7275628ba59f 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -187,7 +187,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) static int asids_init(void) { - int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4); + int fld = cpuid_feature_extract_field(read_cpuid(SYS_ID_AA64MMFR0_EL1), 4); switch (fld) { default: From f6c5d808273093c8220da4163c548932d18e8e36 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 5 Feb 2016 14:58:47 +0000 Subject: [PATCH 753/797] arm64: add ARMv8.2 id_aa64mmfr2 boiler plate ARMv8.2 adds a new feature register id_aa64mmfr2. This patch adds the cpu feature boiler plate used by the actual features in later patches. Signed-off-by: James Morse Reviewed-by: Suzuki K Poulose Signed-off-by: Catalin Marinas (cherry picked from commit 406e308770a92bd33995b2e5b681e86358328bb0) Signed-off-by: Alex Shi --- arch/arm64/include/asm/cpu.h | 1 + arch/arm64/include/asm/sysreg.h | 4 ++++ arch/arm64/kernel/cpufeature.c | 10 ++++++++++ arch/arm64/kernel/cpuinfo.c | 1 + 4 files changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index b5e9cee4b5f8..13a6103130cd 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -36,6 +36,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64isar1; u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; + u64 reg_id_aa64mmfr2; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 76907c94b11f..4bc8655529df 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -70,6 +70,7 @@ #define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) +#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) @@ -135,6 +136,9 @@ #define ID_AA64MMFR1_VMIDBITS_SHIFT 4 #define ID_AA64MMFR1_HADBS_SHIFT 0 +/* id_aa64mmfr2 */ +#define ID_AA64MMFR2_UAO_SHIFT 4 + /* id_aa64dfr0 */ #define ID_AA64DFR0_CTX_CMPS_SHIFT 28 #define ID_AA64DFR0_WRPS_SHIFT 20 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1ef10e784031..42918c797e8e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -123,6 +123,11 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_END, }; +static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static struct arm64_ftr_bits ftr_ctr[] = { U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), @@ -284,6 +289,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), /* Op1 = 3, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr), @@ -408,6 +414,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); + init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); @@ -517,6 +524,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu, info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, + info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); /* * EL3 is not our concern. @@ -831,6 +840,7 @@ static u64 __raw_read_system_reg(u32 sys_id) case SYS_ID_AA64DFR1_EL1: return read_cpuid(SYS_ID_AA64DFR0_EL1); case SYS_ID_AA64MMFR0_EL1: return read_cpuid(SYS_ID_AA64MMFR0_EL1); case SYS_ID_AA64MMFR1_EL1: return read_cpuid(SYS_ID_AA64MMFR1_EL1); + case SYS_ID_AA64MMFR2_EL1: return read_cpuid(SYS_ID_AA64MMFR2_EL1); case SYS_ID_AA64ISAR0_EL1: return read_cpuid(SYS_ID_AA64ISAR0_EL1); case SYS_ID_AA64ISAR1_EL1: return read_cpuid(SYS_ID_AA64ISAR1_EL1); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 76df22272804..966fbd52550b 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -210,6 +210,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64isar1 = read_cpuid(SYS_ID_AA64ISAR1_EL1); info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(SYS_ID_AA64MMFR1_EL1); + info->reg_id_aa64mmfr2 = read_cpuid(SYS_ID_AA64MMFR2_EL1); info->reg_id_aa64pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(SYS_ID_AA64PFR1_EL1); From 13e05550e107e46ef982e5c4347e4986aeeee7ec Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 5 Feb 2016 14:58:48 +0000 Subject: [PATCH 754/797] arm64: kernel: Add support for User Access Override 'User Access Override' is a new ARMv8.2 feature which allows the unprivileged load and store instructions to be overridden to behave in the normal way. This patch converts {get,put}_user() and friends to use ldtr*/sttr* instructions - so that they can only access EL0 memory, then enables UAO when fs==KERNEL_DS so that these functions can access kernel memory. This allows user space's read/write permissions to be checked against the page tables, instead of testing addr [catalin.marinas@arm.com: move uao_thread_switch() above dsb()] Signed-off-by: Catalin Marinas (cherry picked from commit 57f4959bad0a154aeca125b7d38d1d9471a12422) Signed-off-by: Alex Shi --- arch/arm64/Kconfig | 21 ++++++++ arch/arm64/include/asm/alternative.h | 72 ++++++++++++++++++++++++++++ arch/arm64/include/asm/cpufeature.h | 3 +- arch/arm64/include/asm/processor.h | 1 + arch/arm64/include/asm/sysreg.h | 3 ++ arch/arm64/include/asm/thread_info.h | 6 +++ arch/arm64/include/asm/uaccess.h | 44 ++++++++++++----- arch/arm64/include/uapi/asm/ptrace.h | 1 + arch/arm64/kernel/cpufeature.c | 11 +++++ arch/arm64/kernel/process.c | 19 ++++++++ arch/arm64/lib/clear_user.S | 8 ++-- arch/arm64/lib/copy_from_user.S | 8 ++-- arch/arm64/lib/copy_in_user.S | 16 +++---- arch/arm64/lib/copy_to_user.S | 8 ++-- arch/arm64/mm/fault.c | 31 +++++++++--- 15 files changed, 213 insertions(+), 39 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1420102341d0..4df85b5a2045 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -706,6 +706,27 @@ config ARM64_LSE_ATOMICS endmenu +config ARM64_UAO + bool "Enable support for User Access Override (UAO)" + default y + help + User Access Override (UAO; part of the ARMv8.2 Extensions) + causes the 'unprivileged' variant of the load/store instructions to + be overriden to be privileged. + + This option changes get_user() and friends to use the 'unprivileged' + variant of the load/store instructions. This ensures that user-space + really did have access to the supplied memory. When addr_limit is + set to kernel memory the UAO bit will be set, allowing privileged + access to kernel memory. + + Choosing this option will cause copy_to_user() et al to use user-space + memory permissions. + + The feature is detected at runtime, the kernel will use the + regular load/store instructions if the cpu does not implement the + feature. + endmenu menu "Boot options" diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index e4962f04201e..a9fc24ec1aa9 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -1,6 +1,8 @@ #ifndef __ASM_ALTERNATIVE_H #define __ASM_ALTERNATIVE_H +#include + #ifndef __ASSEMBLY__ #include @@ -63,6 +65,8 @@ void apply_alternatives(void *start, size_t length); #else +#include + .macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len .word \orig_offset - . .word \alt_offset - . @@ -136,6 +140,74 @@ void apply_alternatives(void *start, size_t length); alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) +/* + * Generate the assembly for UAO alternatives with exception table entries. + * This is complicated as there is no post-increment or pair versions of the + * unprivileged instructions, and USER() only works for single instructions. + */ +#ifdef CONFIG_ARM64_UAO + .macro uao_ldp l, reg1, reg2, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: ldp \reg1, \reg2, [\addr], \post_inc; +8889: nop; + nop; + alternative_else + ldtr \reg1, [\addr]; + ldtr \reg2, [\addr, #8]; + add \addr, \addr, \post_inc; + alternative_endif + + .section __ex_table,"a"; + .align 3; + .quad 8888b,\l; + .quad 8889b,\l; + .previous; + .endm + + .macro uao_stp l, reg1, reg2, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: stp \reg1, \reg2, [\addr], \post_inc; +8889: nop; + nop; + alternative_else + sttr \reg1, [\addr]; + sttr \reg2, [\addr, #8]; + add \addr, \addr, \post_inc; + alternative_endif + + .section __ex_table,"a"; + .align 3; + .quad 8888b,\l; + .quad 8889b,\l; + .previous + .endm + + .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: \inst \reg, [\addr], \post_inc; + nop; + alternative_else + \alt_inst \reg, [\addr]; + add \addr, \addr, \post_inc; + alternative_endif + + .section __ex_table,"a"; + .align 3; + .quad 8888b,\l; + .previous + .endm +#else + .macro uao_ldp l, reg1, reg2, addr, post_inc + USER(\l, ldp \reg1, \reg2, [\addr], \post_inc) + .endm + .macro uao_stp l, reg1, reg2, addr, post_inc + USER(\l, stp \reg1, \reg2, [\addr], \post_inc) + .endm + .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc + USER(\l, \inst \reg, [\addr], \post_inc) + .endm +#endif + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8131abfabb0a..a5df7cde616b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -31,8 +31,9 @@ #define ARM64_WORKAROUND_CAVIUM_23154 6 #define ARM64_WORKAROUND_834220 7 #define ARM64_HAS_NO_HW_PREFETCH 8 +#define ARM64_HAS_UAO 9 -#define ARM64_NCAPS 9 +#define ARM64_NCAPS 10 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 5bb1d763d17a..cef1cf398356 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -191,5 +191,6 @@ static inline void spin_lock_prefetch(const void *ptr) #endif void cpu_enable_pan(void *__unused); +void cpu_enable_uao(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 4bc8655529df..b9fd8ec79033 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -77,9 +77,12 @@ #define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) #define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) +#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ (!!x)<<8 | 0x1f) +#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\ + (!!x)<<8 | 0x1f) /* SCTLR_EL1 */ #define SCTLR_EL1_CP15BEN (0x1 << 5) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index abd64bd1f6d9..eba8db6838af 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -85,6 +85,12 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)sp_el0; } +/* Access struct thread_info of another thread */ +static inline struct thread_info *get_thread_info(unsigned long thread_stack) +{ + return (struct thread_info *)(thread_stack & ~(THREAD_SIZE - 1)); +} + #define thread_saved_pc(tsk) \ ((unsigned long)(tsk->thread.cpu_context.pc)) #define thread_saved_sp(tsk) \ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index b2ede967fe7d..f973bdce8410 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -64,6 +64,16 @@ extern int fixup_exception(struct pt_regs *regs); static inline void set_fs(mm_segment_t fs) { current_thread_info()->addr_limit = fs; + + /* + * Enable/disable UAO so that copy_to_user() etc can access + * kernel memory with the unprivileged instructions. + */ + if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS) + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); + else + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO, + CONFIG_ARM64_UAO)); } #define segment_eq(a, b) ((a) == (b)) @@ -113,9 +123,10 @@ static inline void set_fs(mm_segment_t fs) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_user_asm(instr, reg, x, addr, err) \ +#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ asm volatile( \ - "1: " instr " " reg "1, [%2]\n" \ + "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ + alt_instr " " reg "1, [%2]\n", feature) \ "2:\n" \ " .section .fixup, \"ax\"\n" \ " .align 2\n" \ @@ -138,16 +149,20 @@ do { \ CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 2: \ - __get_user_asm("ldrh", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 4: \ - __get_user_asm("ldr", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 8: \ - __get_user_asm("ldr", "%", __gu_val, (ptr), (err)); \ + __get_user_asm("ldr", "ldtr", "%", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ default: \ BUILD_BUG(); \ @@ -181,9 +196,10 @@ do { \ ((x) = 0, -EFAULT); \ }) -#define __put_user_asm(instr, reg, x, addr, err) \ +#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ asm volatile( \ - "1: " instr " " reg "1, [%2]\n" \ + "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ + alt_instr " " reg "1, [%2]\n", feature) \ "2:\n" \ " .section .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -205,16 +221,20 @@ do { \ CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 2: \ - __put_user_asm("strh", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 4: \ - __put_user_asm("str", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("str", "sttr", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 8: \ - __put_user_asm("str", "%", __pu_val, (ptr), (err)); \ + __put_user_asm("str", "sttr", "%", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ default: \ BUILD_BUG(); \ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 208db3df135a..b5c3933ed441 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -45,6 +45,7 @@ #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 #define PSR_PAN_BIT 0x00400000 +#define PSR_UAO_BIT 0x00800000 #define PSR_Q_BIT 0x08000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 42918c797e8e..ae22edf9d3c9 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -677,6 +677,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_NO_HW_PREFETCH, .matches = has_no_hw_prefetch, }, +#ifdef CONFIG_ARM64_UAO + { + .desc = "User Access Override", + .capability = ARM64_HAS_UAO, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .field_pos = ID_AA64MMFR2_UAO_SHIFT, + .min_field_value = 1, + .enable = cpu_enable_uao, + }, +#endif /* CONFIG_ARM64_UAO */ {}, }; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 88d742ba19d5..c1ca4ea065d4 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -280,6 +281,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; + if (IS_ENABLED(CONFIG_ARM64_UAO) && + cpus_have_cap(ARM64_HAS_UAO)) + childregs->pstate |= PSR_UAO_BIT; p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } @@ -308,6 +312,20 @@ static void tls_thread_switch(struct task_struct *next) : : "r" (tpidr), "r" (tpidrro)); } +/* Restore the UAO state depending on next's addr_limit */ +static void uao_thread_switch(struct task_struct *next) +{ + unsigned long next_sp = next->thread.cpu_context.sp; + + if (IS_ENABLED(CONFIG_ARM64_UAO) && + get_thread_info(next_sp)->addr_limit == KERNEL_DS) + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO, + CONFIG_ARM64_UAO)); + else + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO, + CONFIG_ARM64_UAO)); +} + /* * Thread switching. */ @@ -320,6 +338,7 @@ struct task_struct *__switch_to(struct task_struct *prev, tls_thread_switch(next); hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); + uao_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index a9723c71c52b..3f950b677c07 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -39,20 +39,20 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ subs x1, x1, #8 b.mi 2f 1: -USER(9f, str xzr, [x0], #8 ) +uao_user_alternative 9f, str, sttr, xzr, x0, 8 subs x1, x1, #8 b.pl 1b 2: adds x1, x1, #4 b.mi 3f -USER(9f, str wzr, [x0], #4 ) +uao_user_alternative 9f, str, sttr, wzr, x0, 4 sub x1, x1, #4 3: adds x1, x1, #2 b.mi 4f -USER(9f, strh wzr, [x0], #2 ) +uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 sub x1, x1, #2 4: adds x1, x1, #1 b.mi 5f -USER(9f, strb wzr, [x0] ) +uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 4699cd74f87e..1d982d64f1a7 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -34,7 +34,7 @@ */ .macro ldrb1 ptr, regB, val - USER(9998f, ldrb \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val .endm .macro strb1 ptr, regB, val @@ -42,7 +42,7 @@ .endm .macro ldrh1 ptr, regB, val - USER(9998f, ldrh \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val .endm .macro strh1 ptr, regB, val @@ -50,7 +50,7 @@ .endm .macro ldr1 ptr, regB, val - USER(9998f, ldr \ptr, [\regB], \val) + uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val .endm .macro str1 ptr, regB, val @@ -58,7 +58,7 @@ .endm .macro ldp1 ptr, regB, regC, val - USER(9998f, ldp \ptr, \regB, [\regC], \val) + uao_ldp 9998f, \ptr, \regB, \regC, \val .endm .macro stp1 ptr, regB, regC, val diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 81c8fc93c100..feaad1520dc1 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -35,35 +35,35 @@ * x0 - bytes not copied */ .macro ldrb1 ptr, regB, val - USER(9998f, ldrb \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val .endm .macro strb1 ptr, regB, val - USER(9998f, strb \ptr, [\regB], \val) + uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val .endm .macro ldrh1 ptr, regB, val - USER(9998f, ldrh \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val .endm .macro strh1 ptr, regB, val - USER(9998f, strh \ptr, [\regB], \val) + uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val .endm .macro ldr1 ptr, regB, val - USER(9998f, ldr \ptr, [\regB], \val) + uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val .endm .macro str1 ptr, regB, val - USER(9998f, str \ptr, [\regB], \val) + uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val .endm .macro ldp1 ptr, regB, regC, val - USER(9998f, ldp \ptr, \regB, [\regC], \val) + uao_ldp 9998f, \ptr, \regB, \regC, \val .endm .macro stp1 ptr, regB, regC, val - USER(9998f, stp \ptr, \regB, [\regC], \val) + uao_stp 9998f, \ptr, \regB, \regC, \val .endm end .req x5 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 7512bbbc07ac..2dae2cd2c481 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -37,7 +37,7 @@ .endm .macro strb1 ptr, regB, val - USER(9998f, strb \ptr, [\regB], \val) + uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val .endm .macro ldrh1 ptr, regB, val @@ -45,7 +45,7 @@ .endm .macro strh1 ptr, regB, val - USER(9998f, strh \ptr, [\regB], \val) + uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val .endm .macro ldr1 ptr, regB, val @@ -53,7 +53,7 @@ .endm .macro str1 ptr, regB, val - USER(9998f, str \ptr, [\regB], \val) + uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val .endm .macro ldp1 ptr, regB, regC, val @@ -61,7 +61,7 @@ .endm .macro stp1 ptr, regB, regC, val - USER(9998f, stp \ptr, \regB, [\regC], \val) + uao_stp 9998f, \ptr, \regB, \regC, \val .endm end .req x5 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 92ddac1e8ca2..820d47353cf0 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -192,6 +192,14 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr, return fault; } +static inline int permission_fault(unsigned int esr) +{ + unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT; + unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; + + return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -225,12 +233,10 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - /* - * PAN bit set implies the fault happened in kernel space, but not - * in the arch's user access functions. - */ - if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT)) - goto no_context; + if (permission_fault(esr) && (addr < USER_DS)) { + if (!search_exception_tables(regs->pc)) + panic("Accessing user space memory outside uaccess.h routines"); + } /* * As per x86, we may deadlock here. However, since the kernel only @@ -561,3 +567,16 @@ void cpu_enable_pan(void *__unused) config_sctlr_el1(SCTLR_EL1_SPAN, 0); } #endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_UAO +/* + * Kernel threads have fs=KERNEL_DS by default, and don't need to call + * set_fs(), devtmpfs in particular relies on this behaviour. + * We need to enable the feature at runtime (instead of adding it to + * PSR_MODE_EL1h) as the feature may not be implemented by the cpu. + */ +void cpu_enable_uao(void *__unused) +{ + asm(SET_PSTATE_UAO(1)); +} +#endif /* CONFIG_ARM64_UAO */ From 31b28ec1f1051bf82515fe38d0ae1884f40783cf Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 5 Feb 2016 14:58:49 +0000 Subject: [PATCH 755/797] arm64: cpufeature: Test 'matches' pointer to find the end of the list CPU feature code uses the desc field as a test to find the end of the list, this means every entry must have a description. This generates noise for entries in the list that aren't really features, but combinations of them. e.g. > CPU features: detected feature: Privileged Access Never > CPU features: detected feature: PAN and not UAO These combination features are needed for corner cases with alternatives, where cpu features interact. Change all walkers of the arm64_features[] and arm64_hwcaps[] lists to test 'matches' not 'desc', and only print 'desc' if it is non-NULL. Signed-off-by: James Morse Reviewed-by : Suzuki K Poulose Signed-off-by: Catalin Marinas (cherry picked from commit 644c2ae198412c956700e55a2acf80b2541f6aa5) Signed-off-by: Alex Shi --- arch/arm64/kernel/cpufeature.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ae22edf9d3c9..9cc8186cd14b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -771,7 +771,7 @@ static void __init setup_cpu_hwcaps(void) int i; const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; - for (i = 0; hwcaps[i].desc; i++) + for (i = 0; hwcaps[i].matches; i++) if (hwcaps[i].matches(&hwcaps[i])) cap_set_hwcap(&hwcaps[i]); } @@ -781,11 +781,11 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, { int i; - for (i = 0; caps[i].desc; i++) { + for (i = 0; caps[i].matches; i++) { if (!caps[i].matches(&caps[i])) continue; - if (!cpus_have_cap(caps[i].capability)) + if (!cpus_have_cap(caps[i].capability) && caps[i].desc) pr_info("%s %s\n", info, caps[i].desc); cpus_set_cap(caps[i].capability); } @@ -800,7 +800,7 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; - for (i = 0; caps[i].desc; i++) + for (i = 0; caps[i].matches; i++) if (caps[i].enable && cpus_have_cap(caps[i].capability)) on_each_cpu(caps[i].enable, NULL, true); } @@ -907,7 +907,7 @@ void verify_local_cpu_capabilities(void) return; caps = arm64_features; - for (i = 0; caps[i].desc; i++) { + for (i = 0; caps[i].matches; i++) { if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg) continue; /* @@ -920,7 +920,7 @@ void verify_local_cpu_capabilities(void) caps[i].enable(NULL); } - for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) { + for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) { if (!cpus_have_hwcap(&caps[i])) continue; if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) From cdfec5aaf4a886521b7f54dfe2db61735558d546 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 5 Feb 2016 14:58:50 +0000 Subject: [PATCH 756/797] arm64: kernel: Don't toggle PAN on systems with UAO If a CPU supports both Privileged Access Never (PAN) and User Access Override (UAO), we don't need to disable/re-enable PAN round all copy_to_user() like calls. UAO alternatives cause these calls to use the 'unprivileged' load/store instructions, which are overridden to be the privileged kind when fs==KERNEL_DS. This patch changes the copy_to_user() calls to have their PAN toggling depend on a new composite 'feature' ARM64_ALT_PAN_NOT_UAO. If both features are detected, PAN will be enabled, but the copy_to_user() alternatives will not be applied. This means PAN will be enabled all the time for these functions. If only PAN is detected, the toggling will be enabled as normal. This will save the time taken to disable/re-enable PAN, and allow us to catch copy_to_user() accesses that occur with fs==KERNEL_DS. Futex and swp-emulation code continue to hang their PAN toggling code on ARM64_HAS_PAN. Signed-off-by: James Morse Signed-off-by: Catalin Marinas (cherry picked from commit 705441960033e66b63524521f153fbb28c99ddbd) Signed-off-by: Alex Shi --- arch/arm64/include/asm/cpufeature.h | 3 ++- arch/arm64/include/asm/uaccess.h | 8 ++++---- arch/arm64/kernel/cpufeature.c | 16 ++++++++++++++++ arch/arm64/lib/clear_user.S | 4 ++-- arch/arm64/lib/copy_from_user.S | 4 ++-- arch/arm64/lib/copy_in_user.S | 4 ++-- arch/arm64/lib/copy_to_user.S | 4 ++-- arch/arm64/mm/fault.c | 3 +++ 8 files changed, 33 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index a5df7cde616b..37a53fc6b384 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -32,8 +32,9 @@ #define ARM64_WORKAROUND_834220 7 #define ARM64_HAS_NO_HW_PREFETCH 8 #define ARM64_HAS_UAO 9 +#define ARM64_ALT_PAN_NOT_UAO 10 -#define ARM64_NCAPS 10 +#define ARM64_NCAPS 11 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index f973bdce8410..16ba0d5c9740 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -145,7 +145,7 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ @@ -168,7 +168,7 @@ do { \ BUILD_BUG(); \ } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ } while (0) @@ -217,7 +217,7 @@ do { \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ @@ -239,7 +239,7 @@ do { \ default: \ BUILD_BUG(); \ } \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ } while (0) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9cc8186cd14b..7566cad9fa1d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -67,6 +67,10 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); .width = 0, \ } +/* meta feature for alternatives */ +static bool __maybe_unused +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry); + static struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), @@ -688,6 +692,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .enable = cpu_enable_uao, }, #endif /* CONFIG_ARM64_UAO */ +#ifdef CONFIG_ARM64_PAN + { + .capability = ARM64_ALT_PAN_NOT_UAO, + .matches = cpufeature_pan_not_uao, + }, +#endif /* CONFIG_ARM64_PAN */ {}, }; @@ -966,3 +976,9 @@ void __init setup_cpu_features(void) pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", L1_CACHE_BYTES, cls); } + +static bool __maybe_unused +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry) +{ + return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO)); +} diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 3f950b677c07..5d1cad3ce6d6 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -33,7 +33,7 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x2, x1 // save the size for fixup return subs x1, x1, #8 @@ -54,7 +54,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 b.mi 5f uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 1d982d64f1a7..17e8306dca29 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -67,11 +67,11 @@ end .req x5 ENTRY(__copy_from_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 // Nothing to copy ret diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index feaad1520dc1..f7292dd08c84 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -68,11 +68,11 @@ end .req x5 ENTRY(__copy_in_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 ret diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 2dae2cd2c481..21faae60f988 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -66,11 +66,11 @@ end .req x5 ENTRY(__copy_to_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 ret diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 820d47353cf0..d0762a729d01 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -234,6 +234,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, } if (permission_fault(esr) && (addr < USER_DS)) { + if (get_thread_info(regs->sp)->addr_limit == KERNEL_DS) + panic("Accessing user space memory with fs=KERNEL_DS"); + if (!search_exception_tables(regs->pc)) panic("Accessing user space memory outside uaccess.h routines"); } From 9193df45aa67b769ee8af97609537a876a81baca Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 18 Feb 2016 15:50:04 +0000 Subject: [PATCH 757/797] arm64: Remove the get_thread_info() function This function was introduced by previous commits implementing UAO. However, it can be replaced with task_thread_info() in uao_thread_switch() or get_fs() in do_page_fault() (the latter being called only on the current context, so no need for using the saved pt_regs). Signed-off-by: Catalin Marinas (cherry picked from commit e950631e84e7e38892ffbeee5e1816b270026b0e) Signed-off-by: Alex Shi --- arch/arm64/include/asm/thread_info.h | 6 ------ arch/arm64/kernel/process.c | 15 ++++++--------- arch/arm64/mm/fault.c | 2 +- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index eba8db6838af..abd64bd1f6d9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -85,12 +85,6 @@ static inline struct thread_info *current_thread_info(void) return (struct thread_info *)sp_el0; } -/* Access struct thread_info of another thread */ -static inline struct thread_info *get_thread_info(unsigned long thread_stack) -{ - return (struct thread_info *)(thread_stack & ~(THREAD_SIZE - 1)); -} - #define thread_saved_pc(tsk) \ ((unsigned long)(tsk->thread.cpu_context.pc)) #define thread_saved_sp(tsk) \ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index c1ca4ea065d4..80624829db61 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -315,15 +315,12 @@ static void tls_thread_switch(struct task_struct *next) /* Restore the UAO state depending on next's addr_limit */ static void uao_thread_switch(struct task_struct *next) { - unsigned long next_sp = next->thread.cpu_context.sp; - - if (IS_ENABLED(CONFIG_ARM64_UAO) && - get_thread_info(next_sp)->addr_limit == KERNEL_DS) - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO, - CONFIG_ARM64_UAO)); - else - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO, - CONFIG_ARM64_UAO)); + if (IS_ENABLED(CONFIG_ARM64_UAO)) { + if (task_thread_info(next)->addr_limit == KERNEL_DS) + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); + else + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO)); + } } /* diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d0762a729d01..a8eafeceb08a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -234,7 +234,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, } if (permission_fault(esr) && (addr < USER_DS)) { - if (get_thread_info(regs->sp)->addr_limit == KERNEL_DS) + if (get_fs() == KERNEL_DS) panic("Accessing user space memory with fs=KERNEL_DS"); if (!search_exception_tables(regs->pc)) From c73cfcaf47182e53c779181031f20e6d3793e07b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:32 +0100 Subject: [PATCH 758/797] of/fdt: make memblock minimum physical address arch configurable By default, early_init_dt_add_memory_arch() ignores memory below the base of the kernel image since it won't be addressable via the linear mapping. However, this is not appropriate anymore once we decouple the kernel text mapping from the linear mapping, so archs may want to drop the low limit entirely. So allow the minimum to be overridden by setting MIN_MEMBLOCK_ADDR. Acked-by: Mark Rutland Acked-by: Rob Herring Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 270522a04f7a9911983878fa37da467f9ff1c938) Signed-off-by: Alex Shi --- drivers/of/fdt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 655f79db7899..1f98156f8996 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -976,13 +976,16 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, } #ifdef CONFIG_HAVE_MEMBLOCK +#ifndef MIN_MEMBLOCK_ADDR +#define MIN_MEMBLOCK_ADDR __pa(PAGE_OFFSET) +#endif #ifndef MAX_MEMBLOCK_ADDR #define MAX_MEMBLOCK_ADDR ((phys_addr_t)~0) #endif void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) { - const u64 phys_offset = __pa(PAGE_OFFSET); + const u64 phys_offset = MIN_MEMBLOCK_ADDR; if (!PAGE_ALIGNED(base)) { if (size < PAGE_SIZE - (base & ~PAGE_MASK)) { From b01c68c7494903dca326579248d3757c715b84f8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:33 +0100 Subject: [PATCH 759/797] of/fdt: factor out assignment of initrd_start/initrd_end Since architectures may not yet have their linear mapping up and running when the initrd address is discovered from the DT, factor out the assignment of initrd_start and initrd_end, so that an architecture can override it and use the translation it needs. Signed-off-by: Ard Biesheuvel Acked-by: Rob Herring Signed-off-by: Catalin Marinas (cherry picked from commit 369bc9abf22bf026e8645a4dd746b90649a2f6ee) Signed-off-by: Alex Shi --- drivers/of/fdt.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 1f98156f8996..3e90bce70545 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -760,6 +760,16 @@ const void * __init of_flat_dt_match_machine(const void *default_match, } #ifdef CONFIG_BLK_DEV_INITRD +#ifndef __early_init_dt_declare_initrd +static void __early_init_dt_declare_initrd(unsigned long start, + unsigned long end) +{ + initrd_start = (unsigned long)__va(start); + initrd_end = (unsigned long)__va(end); + initrd_below_start_ok = 1; +} +#endif + /** * early_init_dt_check_for_initrd - Decode initrd location from flat tree * @node: reference to node containing initrd location ('chosen') @@ -782,9 +792,7 @@ static void __init early_init_dt_check_for_initrd(unsigned long node) return; end = of_read_number(prop, len/4); - initrd_start = (unsigned long)__va(start); - initrd_end = (unsigned long)__va(end); - initrd_below_start_ok = 1; + __early_init_dt_declare_initrd(start, end); pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", (unsigned long long)start, (unsigned long long)end); From 2894f328e3bbb808606c82fa1e2ea300089df728 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:34 +0100 Subject: [PATCH 760/797] arm64: prevent potential circular header dependencies in asm/bug.h Currently, using BUG_ON() in header files is cumbersome, due to the fact that asm/bug.h transitively includes a lot of other header files, resulting in the actual BUG_ON() invocation appearing before its definition in the preprocessor input. So let's reverse the #include dependency between asm/bug.h and asm/debug-monitors.h, by moving the definition of BUG_BRK_IMM from the latter to the former. Also fix up one user of asm/debug-monitors.h which relied on a transitive include. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 03336b1df9929e5d9c28fd9768948b6151cb046c) Signed-off-by: Alex Shi Conflicts: skip arch/arm64/kvm/hyp/debug-sr.c --- arch/arm64/include/asm/bug.h | 2 +- arch/arm64/include/asm/debug-monitors.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h index 4a748ce9ba1a..679d49221998 100644 --- a/arch/arm64/include/asm/bug.h +++ b/arch/arm64/include/asm/bug.h @@ -18,7 +18,7 @@ #ifndef _ARCH_ARM64_ASM_BUG_H #define _ARCH_ARM64_ASM_BUG_H -#include +#define BUG_BRK_IMM 0x800 #ifdef CONFIG_GENERIC_BUG #define HAVE_ARCH_BUG diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 279c85b5ec09..e893a1fca9c2 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -57,7 +58,6 @@ #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 -#define BUG_BRK_IMM 0x800 /* * BRK instruction encoding From 37cbc7db8e4fa9b66e15cf8661383a6b51c9a3e7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:35 +0100 Subject: [PATCH 761/797] arm64: add support for ioremap() block mappings This wires up the existing generic huge-vmap feature, which allows ioremap() to use PMD or PUD sized block mappings. It also adds support to the unmap path for dealing with block mappings, which will allow us to unmap the __init region using unmap_kernel_range() in a subsequent patch. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 324420bf91f60582bb481133db9547111768ef17) Signed-off-by: Alex Shi --- .../features/vm/huge-vmap/arch-support.txt | 2 +- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/memory.h | 6 +++ arch/arm64/mm/mmu.c | 41 +++++++++++++++++++ 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt index af6816bccb43..df1d1f3c9af2 100644 --- a/Documentation/features/vm/huge-vmap/arch-support.txt +++ b/Documentation/features/vm/huge-vmap/arch-support.txt @@ -9,7 +9,7 @@ | alpha: | TODO | | arc: | TODO | | arm: | TODO | - | arm64: | TODO | + | arm64: | ok | | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4df85b5a2045..8cd8d06ece4a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -49,6 +49,7 @@ config ARM64 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE + select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 853953cd1f08..c65aad7b13dc 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -100,6 +100,12 @@ #define MT_S2_NORMAL 0xf #define MT_S2_DEVICE_nGnRE 0x1 +#ifdef CONFIG_ARM64_4K_PAGES +#define IOREMAP_MAX_ORDER (PUD_SHIFT) +#else +#define IOREMAP_MAX_ORDER (PMD_SHIFT) +#endif + #ifndef __ASSEMBLY__ extern phys_addr_t memstart_addr; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d1fa678355c9..b4afa9fbb00f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -745,3 +745,44 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) return dt_virt; } + +int __init arch_ioremap_pud_supported(void) +{ + /* only 4k granule supports level 1 block mappings */ + return IS_ENABLED(CONFIG_ARM64_4K_PAGES); +} + +int __init arch_ioremap_pmd_supported(void) +{ + return 1; +} + +int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +{ + BUG_ON(phys & ~PUD_MASK); + set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +{ + BUG_ON(phys & ~PMD_MASK); + set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (!pud_sect(*pud)) + return 0; + pud_clear(pud); + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (!pmd_sect(*pmd)) + return 0; + pmd_clear(pmd); + return 1; +} From 1dd59fe47656335cd3c913e378718eb49b7b1b38 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:36 +0100 Subject: [PATCH 762/797] arm64: introduce KIMAGE_VADDR as the virtual base of the kernel region This introduces the preprocessor symbol KIMAGE_VADDR which will serve as the symbolic virtual base of the kernel region, i.e., the kernel's virtual offset will be KIMAGE_VADDR + TEXT_OFFSET. For now, we define it as being equal to PAGE_OFFSET, but in the future, it will be moved below it once we move the kernel virtual mapping out of the linear mapping. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit ab893fb9f1b17f02139bce547bb4b69e96b9ae16) Signed-off-by: Alex Shi --- arch/arm64/include/asm/memory.h | 10 ++++++++-- arch/arm64/kernel/head.S | 2 +- arch/arm64/kernel/vmlinux.lds.S | 4 ++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c65aad7b13dc..aebc739f5a11 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -51,7 +51,8 @@ #define VA_BITS (CONFIG_ARM64_VA_BITS) #define VA_START (UL(0xffffffffffffffff) << VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) -#define MODULES_END (PAGE_OFFSET) +#define KIMAGE_VADDR (PAGE_OFFSET) +#define MODULES_END (KIMAGE_VADDR) #define MODULES_VADDR (MODULES_END - SZ_64M) #define PCI_IO_END (MODULES_VADDR - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) @@ -75,8 +76,13 @@ * private definitions which should NOT be used outside memory.h * files. Use virt_to_phys/phys_to_virt/__pa/__va instead. */ -#define __virt_to_phys(x) (((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET)) +#define __virt_to_phys(x) ({ \ + phys_addr_t __x = (phys_addr_t)(x); \ + __x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) : \ + (__x - KIMAGE_VADDR + PHYS_OFFSET); }) + #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET)) +#define __phys_to_kimg(x) ((unsigned long)((x) - PHYS_OFFSET + KIMAGE_VADDR)) /* * Convert a page to/from a physical address diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 53b9f9f128c2..04d38a058b19 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -389,7 +389,7 @@ __create_page_tables: * Map the kernel image (starting with PHYS_OFFSET). */ mov x0, x26 // swapper_pg_dir - mov x5, #PAGE_OFFSET + ldr x5, =KIMAGE_VADDR create_pgd_entry x0, x5, x3, x6 ldr x6, =KERNEL_END // __va(KERNEL_END) mov x3, x24 // phys offset diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index b78a3c772294..282e3e64a17e 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -89,7 +89,7 @@ SECTIONS *(.discard.*) } - . = PAGE_OFFSET + TEXT_OFFSET; + . = KIMAGE_VADDR + TEXT_OFFSET; .head.text : { _text = .; @@ -186,4 +186,4 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ -ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned") +ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") From 4545faf8e5b81592ba597141d432ca7e2b52a43e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:37 +0100 Subject: [PATCH 763/797] arm64: pgtable: implement static [pte|pmd|pud]_offset variants The page table accessors pte_offset(), pud_offset() and pmd_offset() rely on __va translations, so they can only be used after the linear mapping has been installed. For the early fixmap and kasan init routines, whose page tables are allocated statically in the kernel image, these functions will return bogus values. So implement pte_offset_kimg(), pmd_offset_kimg() and pud_offset_kimg(), which can be used instead before any page tables have been allocated dynamically. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 6533945a32c762c5db70d7a3ec251a040b2d9661) Signed-off-by: Alex Shi --- arch/arm64/include/asm/pgtable.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c99dfc588deb..9a560b368910 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -455,6 +455,9 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) +/* use ONLY for statically allocated translation tables */ +#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -498,6 +501,9 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) +/* use ONLY for statically allocated translation tables */ +#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) + #else #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) @@ -507,6 +513,8 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) #define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp) #define pmd_clear_fixmap() +#define pmd_offset_kimg(dir,addr) ((pmd_t *)dir) + #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 @@ -545,6 +553,9 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) +/* use ONLY for statically allocated translation tables */ +#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) + #else #define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) @@ -554,6 +565,8 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) #define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp) #define pud_clear_fixmap() +#define pud_offset_kimg(dir,addr) ((pud_t *)dir) + #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) From ade984b5fcdda640ca25a1606f9cdaf279e8b4c7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:38 +0100 Subject: [PATCH 764/797] arm64: decouple early fixmap init from linear mapping Since the early fixmap page tables are populated using pages that are part of the static footprint of the kernel, they are covered by the initial kernel mapping, and we can refer to them without using __va/__pa translations, which are tied to the linear mapping. Since the fixmap page tables are disjoint from the kernel mapping up to the top level pgd entry, we can refer to bm_pte[] directly, and there is no need to walk the page tables and perform __pa()/__va() translations at each step. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 157962f5a8f236cab898b68bdaa69ce68922f0bf) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b4afa9fbb00f..0f58a45df1f3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -614,7 +614,7 @@ static inline pud_t * fixmap_pud(unsigned long addr) BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - return pud_offset(pgd, addr); + return pud_offset_kimg(pgd, addr); } static inline pmd_t * fixmap_pmd(unsigned long addr) @@ -623,16 +623,12 @@ static inline pmd_t * fixmap_pmd(unsigned long addr) BUG_ON(pud_none(*pud) || pud_bad(*pud)); - return pmd_offset(pud, addr); + return pmd_offset_kimg(pud, addr); } static inline pte_t * fixmap_pte(unsigned long addr) { - pmd_t *pmd = fixmap_pmd(addr); - - BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); - - return pte_offset_kernel(pmd, addr); + return &bm_pte[pte_index(addr)]; } void __init early_fixmap_init(void) @@ -644,14 +640,14 @@ void __init early_fixmap_init(void) pgd = pgd_offset_k(addr); pgd_populate(&init_mm, pgd, bm_pud); - pud = pud_offset(pgd, addr); + pud = fixmap_pud(addr); pud_populate(&init_mm, pud, bm_pmd); - pmd = pmd_offset(pud, addr); + pmd = fixmap_pmd(addr); pmd_populate_kernel(&init_mm, pmd, bm_pte); /* * The boot-ioremap range spans multiple pmds, for which - * we are not preparted: + * we are not prepared: */ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); From 44b9620e6822e2acb0d65507d89cd0658055843c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:39 +0100 Subject: [PATCH 765/797] arm64: kvm: deal with kernel symbols outside of linear mapping KVM on arm64 uses a fixed offset between the linear mapping at EL1 and the HYP mapping at EL2. Before we can move the kernel virtual mapping out of the linear mapping, we have to make sure that references to kernel symbols that are accessed via the HYP mapping are translated to their linear equivalent. Reviewed-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit a0bf9776cd0be4490d4675d4108e13379849fc7f) Signed-off-by: Alex Shi Conflicts: skip new funcs create_hyp_mappings(__start_rodata, in arch/arm/kvm/arm.c and keep funcs in arch/arm64/kvm/hyp.S --- arch/arm/include/asm/kvm_asm.h | 2 ++ arch/arm/kvm/arm.c | 5 +++-- arch/arm64/include/asm/kvm_asm.h | 17 +++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 8 +++++--- arch/arm64/kvm/hyp.S | 6 +++--- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 194c91b610ff..c35c349da069 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -79,6 +79,8 @@ #define rr_lo_hi(a1, a2) a1, a2 #endif +#define kvm_ksym_ref(kva) (kva) + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e06fd299de08..70e6d557c75f 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -969,7 +969,7 @@ static void cpu_init_hyp_mode(void *dummy) pgd_ptr = kvm_mmu_get_httbr(); stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; - vector_ptr = (unsigned long)__kvm_hyp_vector; + vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); @@ -1061,7 +1061,8 @@ static int init_hyp_mode(void) /* * Map the Hyp-code called directly from the host */ - err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); + err = create_hyp_mappings(kvm_ksym_ref(__kvm_hyp_code_start), + kvm_ksym_ref(__kvm_hyp_code_end)); if (err) { kvm_err("Cannot map world-switch code\n"); goto out_free_mappings; diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 5e377101f919..e95c39543629 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -102,7 +102,24 @@ #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) +#define kvm_ksym_ref(sym) ((void *)&sym + kvm_ksym_shift) + #ifndef __ASSEMBLY__ +#if __GNUC__ > 4 +#define kvm_ksym_shift (PAGE_OFFSET - KIMAGE_VADDR) +#else +/* + * GCC versions 4.9 and older will fold the constant below into the addend of + * the reference to 'sym' above if kvm_ksym_shift is declared static or if the + * constant is used directly. However, since we use the small code model for + * the core kernel, the reference to 'sym' will be emitted as a adrp/add pair, + * with a +/- 4 GB range, resulting in linker relocation errors if the shift + * is sufficiently large. So prevent the compiler from folding the shift into + * the addend, by making the shift a variable with external linkage. + */ +__weak u64 kvm_ksym_shift = PAGE_OFFSET - KIMAGE_VADDR; +#endif + struct kvm; struct kvm_vcpu; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a35ce7266aac..90c6368ad7c8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -222,7 +222,7 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); -u64 kvm_call_hyp(void *hypfn, ...); +u64 __kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); @@ -243,8 +243,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, * Call initialization code, and switch to the full blown * HYP code. */ - kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, - hyp_stack_ptr, vector_ptr); + __kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, + hyp_stack_ptr, vector_ptr); } static inline void kvm_arch_hardware_disable(void) {} @@ -258,4 +258,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 86c289832272..309e3479dc2c 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -923,7 +923,7 @@ __hyp_panic_str: .align 2 /* - * u64 kvm_call_hyp(void *hypfn, ...); + * u64 __kvm_call_hyp(void *hypfn, ...); * * This is not really a variadic function in the classic C-way and care must * be taken when calling this to ensure parameters are passed in registers @@ -940,10 +940,10 @@ __hyp_panic_str: * used to implement __hyp_get_vectors in the same way as in * arch/arm64/kernel/hyp_stub.S. */ -ENTRY(kvm_call_hyp) +ENTRY(__kvm_call_hyp) hvc #0 ret -ENDPROC(kvm_call_hyp) +ENDPROC(__kvm_call_hyp) .macro invalid_vector label, target .align 2 From 49d5b2c298815fcaee00779e69a741ad1e80e740 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:40 +0100 Subject: [PATCH 766/797] arm64: move kernel image to base of vmalloc area This moves the module area to right before the vmalloc area, and moves the kernel image to the base of the vmalloc area. This is an intermediate step towards implementing KASLR, which allows the kernel image to be located anywhere in the vmalloc area. Since other subsystems such as hibernate may still need to refer to the kernel text or data segments via their linears addresses, both are mapped in the linear region as well. The linear alias of the text region is mapped read-only/non-executable to prevent inadvertent modification or execution. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit f9040773b7bbbd9e98eb6184a263512a7cfc133f) Signed-off-by: Alex Shi --- arch/arm64/include/asm/kasan.h | 2 +- arch/arm64/include/asm/memory.h | 21 ++++-- arch/arm64/include/asm/pgtable.h | 10 +-- arch/arm64/mm/dump.c | 12 ++-- arch/arm64/mm/init.c | 23 +++---- arch/arm64/mm/kasan_init.c | 27 +++++++- arch/arm64/mm/mmu.c | 110 +++++++++++++++++++++---------- 7 files changed, 137 insertions(+), 68 deletions(-) diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index de0d21211c34..71ad0f93eb71 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -14,7 +14,7 @@ * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses. */ #define KASAN_SHADOW_START (VA_START) -#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3))) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) /* * This value is used to map an address to the corresponding shadow diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index aebc739f5a11..4388651d1f0d 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -45,16 +45,15 @@ * VA_START - the first kernel virtual address. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. - * The module space lives between the addresses given by TASK_SIZE - * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) #define VA_START (UL(0xffffffffffffffff) << VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) -#define KIMAGE_VADDR (PAGE_OFFSET) -#define MODULES_END (KIMAGE_VADDR) -#define MODULES_VADDR (MODULES_END - SZ_64M) -#define PCI_IO_END (MODULES_VADDR - SZ_2M) +#define KIMAGE_VADDR (MODULES_END) +#define MODULES_END (MODULES_VADDR + MODULES_VSIZE) +#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) +#define MODULES_VSIZE (SZ_64M) +#define PCI_IO_END (PAGE_OFFSET - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) #define TASK_SIZE_64 (UL(1) << VA_BITS) @@ -71,6 +70,16 @@ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) +/* + * The size of the KASAN shadow region. This should be 1/8th of the + * size of the entire kernel virtual address space. + */ +#ifdef CONFIG_KASAN +#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3)) +#else +#define KASAN_SHADOW_SIZE (0) +#endif + /* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 9a560b368910..c3c2518eecfe 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -36,19 +36,13 @@ * * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array * (rounded up to PUD_SIZE). - * VMALLOC_START: beginning of the kernel VA space + * VMALLOC_START: beginning of the kernel vmalloc space * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, * fixed mappings and modules */ #define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) -#ifndef CONFIG_KASAN -#define VMALLOC_START (VA_START) -#else -#include -#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K) -#endif - +#define VMALLOC_START (MODULES_END) #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define VMEMMAP_START (VMALLOC_END + SZ_64K) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 0841b2bf0e6a..6be918478f85 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -35,7 +35,9 @@ struct addr_marker { }; enum address_markers_idx { - VMALLOC_START_NR = 0, + MODULES_START_NR = 0, + MODULES_END_NR, + VMALLOC_START_NR, VMALLOC_END_NR, #ifdef CONFIG_SPARSEMEM_VMEMMAP VMEMMAP_START_NR, @@ -45,12 +47,12 @@ enum address_markers_idx { FIXADDR_END_NR, PCI_START_NR, PCI_END_NR, - MODULES_START_NR, - MODULES_END_NR, KERNEL_SPACE_NR, }; static struct addr_marker address_markers[] = { + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, { VMALLOC_START, "vmalloc() Area" }, { VMALLOC_END, "vmalloc() End" }, #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -61,9 +63,7 @@ static struct addr_marker address_markers[] = { { FIXADDR_TOP, "Fixmap end" }, { PCI_IO_START, "PCI I/O start" }, { PCI_IO_END, "PCI I/O end" }, - { MODULES_VADDR, "Modules start" }, - { MODULES_END, "Modules end" }, - { PAGE_OFFSET, "Kernel Mapping" }, + { PAGE_OFFSET, "Linear Mapping" }, { -1, NULL }, }; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index dba32ceff17a..ac4d8159d6f3 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -302,22 +303,26 @@ void __init mem_init(void) #ifdef CONFIG_KASAN " kasan : 0x%16lx - 0x%16lx (%6ld GB)\n" #endif + " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" " vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n" + " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .data : 0x%p" " - 0x%p" " (%6ld KB)\n" #ifdef CONFIG_SPARSEMEM_VMEMMAP " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" " 0x%16lx - 0x%16lx (%6ld MB actual)\n" #endif " fixed : 0x%16lx - 0x%16lx (%6ld KB)\n" " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" - " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" - " memory : 0x%16lx - 0x%16lx (%6ld MB)\n" - " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" - " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" - " .data : 0x%p" " - 0x%p" " (%6ld KB)\n", + " memory : 0x%16lx - 0x%16lx (%6ld MB)\n", #ifdef CONFIG_KASAN MLG(KASAN_SHADOW_START, KASAN_SHADOW_END), #endif + MLM(MODULES_VADDR, MODULES_END), MLG(VMALLOC_START, VMALLOC_END), + MLK_ROUNDUP(__init_begin, __init_end), + MLK_ROUNDUP(_text, _etext), + MLK_ROUNDUP(_sdata, _edata), #ifdef CONFIG_SPARSEMEM_VMEMMAP MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE), @@ -326,11 +331,7 @@ void __init mem_init(void) #endif MLK(FIXADDR_START, FIXADDR_TOP), MLM(PCI_IO_START, PCI_IO_END), - MLM(MODULES_VADDR, MODULES_END), - MLM(PAGE_OFFSET, (unsigned long)high_memory), - MLK_ROUNDUP(__init_begin, __init_end), - MLK_ROUNDUP(_text, _etext), - MLK_ROUNDUP(_sdata, _edata)); + MLM(PAGE_OFFSET, (unsigned long)high_memory)); #undef MLK #undef MLM @@ -358,8 +359,8 @@ void __init mem_init(void) void free_initmem(void) { - fixup_init(); free_initmem_default(0); + fixup_init(); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index cc569a38bc76..7f10cc91fa8a 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -17,9 +17,11 @@ #include #include +#include #include #include #include +#include #include static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); @@ -33,7 +35,7 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr, if (pmd_none(*pmd)) pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); - pte = pte_offset_kernel(pmd, addr); + pte = pte_offset_kimg(pmd, addr); do { next = addr + PAGE_SIZE; set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page), @@ -51,7 +53,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud, if (pud_none(*pud)) pud_populate(&init_mm, pud, kasan_zero_pmd); - pmd = pmd_offset(pud, addr); + pmd = pmd_offset_kimg(pud, addr); do { next = pmd_addr_end(addr, end); kasan_early_pte_populate(pmd, addr, next); @@ -68,7 +70,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd, if (pgd_none(*pgd)) pgd_populate(&init_mm, pgd, kasan_zero_pud); - pud = pud_offset(pgd, addr); + pud = pud_offset_kimg(pgd, addr); do { next = pud_addr_end(addr, end); kasan_early_pmd_populate(pud, addr, next); @@ -126,9 +128,13 @@ static void __init clear_pgds(unsigned long start, void __init kasan_init(void) { + u64 kimg_shadow_start, kimg_shadow_end; struct memblock_region *reg; int i; + kimg_shadow_start = (u64)kasan_mem_to_shadow(_text); + kimg_shadow_end = (u64)kasan_mem_to_shadow(_end); + /* * We are going to perform proper setup of shadow memory. * At first we should unmap early shadow (clear_pgds() call bellow). @@ -142,8 +148,23 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + vmemmap_populate(kimg_shadow_start, kimg_shadow_end, NUMA_NO_NODE); + + /* + * vmemmap_populate() has populated the shadow region that covers the + * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round + * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent + * kasan_populate_zero_shadow() from replacing the PMD block mappings + * with PMD table mappings at the edges of the shadow region for the + * kernel image. + */ + if (ARM64_SWAPPER_USES_SECTION_MAPS) + kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE); + kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, kasan_mem_to_shadow((void *)MODULES_VADDR)); + kasan_populate_zero_shadow((void *)kimg_shadow_end, + kasan_mem_to_shadow((void *)PAGE_OFFSET)); for_each_memblock(memory, reg) { void *start = (void *)__phys_to_virt(reg->base); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0f58a45df1f3..895a8457259c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -53,6 +53,10 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -380,16 +384,15 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { - unsigned long kernel_start = __pa(_stext); - unsigned long kernel_end = __pa(_end); + unsigned long kernel_end = __pa(_etext); /* - * The kernel itself is mapped at page granularity. Map all other - * memory, making sure we don't overwrite the existing kernel mappings. + * Take care not to create a writable alias for the + * read-only text and rodata sections of the kernel image. */ - /* No overlap with the kernel. */ + /* No overlap with the kernel text */ if (end < kernel_start || start >= kernel_end) { __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, PAGE_KERNEL, @@ -398,8 +401,8 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end } /* - * This block overlaps the kernel mapping. Map the portion(s) which - * don't overlap. + * This block overlaps the kernel text mapping. + * Map the portion(s) which don't overlap. */ if (start < kernel_start) __create_pgd_mapping(pgd, start, @@ -411,6 +414,16 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end __phys_to_virt(kernel_end), end - kernel_end, PAGE_KERNEL, early_pgtable_alloc); + + /* + * Map the linear alias of the [_stext, _etext) interval as + * read-only/non-executable. This makes the contents of the + * region accessible to subsystems such as hibernate, but + * protects it from inadvertent modification or execution. + */ + __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), + kernel_end - kernel_start, PAGE_KERNEL_RO, + early_pgtable_alloc); } static void __init map_mem(pgd_t *pgd) @@ -429,25 +442,28 @@ static void __init map_mem(pgd_t *pgd) } } -#ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void) { + if (!IS_ENABLED(CONFIG_DEBUG_RODATA)) + return; + create_mapping_late(__pa(_stext), (unsigned long)_stext, (unsigned long)_etext - (unsigned long)_stext, PAGE_KERNEL_ROX); - } -#endif void fixup_init(void) { - create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, - (unsigned long)__init_end - (unsigned long)__init_begin, - PAGE_KERNEL); + /* + * Unmap the __init region but leave the VM area in place. This + * prevents the region from being reused for kernel modules, which + * is not supported by kallsyms. + */ + unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, - pgprot_t prot) + pgprot_t prot, struct vm_struct *vma) { phys_addr_t pa_start = __pa(va_start); unsigned long size = va_end - va_start; @@ -457,6 +473,14 @@ static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, early_pgtable_alloc); + + vma->addr = va_start; + vma->phys_addr = pa_start; + vma->size = size; + vma->flags = VM_MAP; + vma->caller = __builtin_return_address(0); + + vm_area_add_early(vma); } /* @@ -464,17 +488,35 @@ static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, */ static void __init map_kernel(pgd_t *pgd) { + static struct vm_struct vmlinux_text, vmlinux_init, vmlinux_data; - map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC); - map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC); - map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL); + map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, + &vmlinux_init); + map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); - /* - * The fixmap falls in a separate pgd to the kernel, and doesn't live - * in the carveout for the swapper_pg_dir. We can simply re-use the - * existing dir for the fixmap. - */ - set_pgd(pgd_offset_raw(pgd, FIXADDR_START), *pgd_offset_k(FIXADDR_START)); + if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { + /* + * The fixmap falls in a separate pgd to the kernel, and doesn't + * live in the carveout for the swapper_pg_dir. We can simply + * re-use the existing dir for the fixmap. + */ + set_pgd(pgd_offset_raw(pgd, FIXADDR_START), + *pgd_offset_k(FIXADDR_START)); + } else if (CONFIG_PGTABLE_LEVELS > 3) { + /* + * The fixmap shares its top level pgd entry with the kernel + * mapping. This can really only occur when we are running + * with 16k/4 levels, so we can simply reuse the pud level + * entry instead. + */ + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), + __pud(__pa(bm_pmd) | PUD_TYPE_TABLE)); + pud_clear_fixmap(); + } else { + BUG(); + } kasan_copy_shadow(pgd); } @@ -600,14 +642,6 @@ void vmemmap_free(unsigned long start, unsigned long end) } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#if CONFIG_PGTABLE_LEVELS > 2 -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif -#if CONFIG_PGTABLE_LEVELS > 3 -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; -#endif - static inline pud_t * fixmap_pud(unsigned long addr) { pgd_t *pgd = pgd_offset_k(addr); @@ -639,8 +673,18 @@ void __init early_fixmap_init(void) unsigned long addr = FIXADDR_START; pgd = pgd_offset_k(addr); - pgd_populate(&init_mm, pgd, bm_pud); - pud = fixmap_pud(addr); + if (CONFIG_PGTABLE_LEVELS > 3 && !pgd_none(*pgd)) { + /* + * We only end up here if the kernel mapping and the fixmap + * share the top level pgd entry, which should only happen on + * 16k/4 levels configurations. + */ + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + pud = pud_offset_kimg(pgd, addr); + } else { + pgd_populate(&init_mm, pgd, bm_pud); + pud = fixmap_pud(addr); + } pud_populate(&init_mm, pud, bm_pmd); pmd = fixmap_pmd(addr); pmd_populate_kernel(&init_mm, pmd, bm_pte); From 368a063148f5d4e5b40e65b9954a9b98b1a7cc3c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:41 +0100 Subject: [PATCH 767/797] arm64: defer __va translation of initrd_start and initrd_end Before deferring the assignment of memstart_addr in a subsequent patch, to the moment where all memory has been discovered and possibly clipped based on the size of the linear region and the presence of a mem= command line parameter, we need to ensure that memstart_addr is not used to perform __va translations before it is assigned. One such use is in the generic early DT discovery of the initrd location, which is recorded as a virtual address in the globals initrd_start and initrd_end. So wire up the generic support to declare the initrd addresses, and implement it without __va() translations, and perform the translation after memstart_addr has been assigned. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit a89dea585371a9d5d85499db47c93f129be8e0c4) Signed-off-by: Alex Shi --- arch/arm64/include/asm/memory.h | 8 ++++++++ arch/arm64/mm/init.c | 13 +++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 4388651d1f0d..18b7e77c7495 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -121,6 +121,14 @@ #define IOREMAP_MAX_ORDER (PMD_SHIFT) #endif +#ifdef CONFIG_BLK_DEV_INITRD +#define __early_init_dt_declare_initrd(__start, __end) \ + do { \ + initrd_start = (__start); \ + initrd_end = (__end); \ + } while (0) +#endif + #ifndef __ASSEMBLY__ extern phys_addr_t memstart_addr; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ac4d8159d6f3..92acbee2bb8b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -59,8 +59,8 @@ static int __init early_initrd(char *p) if (*endp == ',') { size = memparse(endp + 1, NULL); - initrd_start = (unsigned long)__va(start); - initrd_end = (unsigned long)__va(start + size); + initrd_start = start; + initrd_end = start + size; } return 0; } @@ -168,8 +168,13 @@ void __init arm64_memblock_init(void) */ memblock_reserve(__pa(_text), _end - _text); #ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) - memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start); + if (initrd_start) { + memblock_reserve(initrd_start, initrd_end - initrd_start); + + /* the generic initrd code expects virtual addresses */ + initrd_start = __phys_to_virt(initrd_start); + initrd_end = __phys_to_virt(initrd_end); + } #endif early_init_fdt_scan_reserved_mem(); From 72b991537db5f4c361b540cbf0059c7268d848c4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Feb 2016 13:52:42 +0100 Subject: [PATCH 768/797] arm64: allow kernel Image to be loaded anywhere in physical memory This relaxes the kernel Image placement requirements, so that it may be placed at any 2 MB aligned offset in physical memory. This is accomplished by ignoring PHYS_OFFSET when installing memblocks, and accounting for the apparent virtual offset of the kernel Image. As a result, virtual address references below PAGE_OFFSET are correctly mapped onto physical references into the kernel Image regardless of where it sits in memory. Special care needs to be taken for dealing with memory limits passed via mem=, since the generic implementation clips memory top down, which may clip the kernel image itself if it is loaded high up in memory. To deal with this case, we simply add back the memory covering the kernel image, which may result in more memory to be retained than was passed as a mem= parameter. Since mem= should not be considered a production feature, a panic notifier handler is installed that dumps the memory limit at panic time if one was set. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit a7f8de168ace487fa7b88cb154e413cf40e87fc6) Signed-off-by: Alex Shi --- Documentation/arm64/booting.txt | 20 +++++--- arch/arm64/include/asm/boot.h | 6 +++ arch/arm64/include/asm/kernel-pgtable.h | 12 +++++ arch/arm64/include/asm/kvm_asm.h | 17 +------ arch/arm64/include/asm/memory.h | 18 +++---- arch/arm64/kernel/head.S | 6 ++- arch/arm64/kernel/image.h | 13 +++-- arch/arm64/mm/init.c | 63 ++++++++++++++++++++++++- arch/arm64/mm/mmu.c | 3 ++ 9 files changed, 119 insertions(+), 39 deletions(-) diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt index 701d39d3171a..56d6d8b796db 100644 --- a/Documentation/arm64/booting.txt +++ b/Documentation/arm64/booting.txt @@ -109,7 +109,13 @@ Header notes: 1 - 4K 2 - 16K 3 - 64K - Bits 3-63: Reserved. + Bit 3: Kernel physical placement + 0 - 2MB aligned base should be as close as possible + to the base of DRAM, since memory below it is not + accessible via the linear mapping + 1 - 2MB aligned base may be anywhere in physical + memory + Bits 4-63: Reserved. - When image_size is zero, a bootloader should attempt to keep as much memory as possible free for use by the kernel immediately after the @@ -117,14 +123,14 @@ Header notes: depending on selected features, and is effectively unbound. The Image must be placed text_offset bytes from a 2MB aligned base -address near the start of usable system RAM and called there. Memory -below that base address is currently unusable by Linux, and therefore it -is strongly recommended that this location is the start of system RAM. -The region between the 2 MB aligned base address and the start of the -image has no special significance to the kernel, and may be used for -other purposes. +address anywhere in usable system RAM and called there. The region +between the 2 MB aligned base address and the start of the image has no +special significance to the kernel, and may be used for other purposes. At least image_size bytes from the start of the image must be free for use by the kernel. +NOTE: versions prior to v4.6 cannot make use of memory below the +physical offset of the Image so it is recommended that the Image be +placed as close as possible to the start of system RAM. Any memory described to the kernel (even that below the start of the image) which is not marked as reserved from the kernel (e.g., with a diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h index 81151b67b26b..ebf2481889c3 100644 --- a/arch/arm64/include/asm/boot.h +++ b/arch/arm64/include/asm/boot.h @@ -11,4 +11,10 @@ #define MIN_FDT_ALIGN 8 #define MAX_FDT_SIZE SZ_2M +/* + * arm64 requires the kernel image to placed + * TEXT_OFFSET bytes beyond a 2 MB aligned base + */ +#define MIN_KIMG_ALIGN SZ_2M + #endif diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index a459714ee29e..5c6375d8528b 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -79,5 +79,17 @@ #define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) #endif +/* + * To make optimal use of block mappings when laying out the linear + * mapping, round down the base of physical memory to a size that can + * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE + * (64k granule), or a multiple that can be mapped using contiguous bits + * in the page tables: 32 * PMD_SIZE (16k granule) + */ +#ifdef CONFIG_ARM64_64K_PAGES +#define ARM64_MEMSTART_ALIGN SZ_512M +#else +#define ARM64_MEMSTART_ALIGN SZ_1G +#endif #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index e95c39543629..419bc6661b5c 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -102,24 +102,9 @@ #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) -#define kvm_ksym_ref(sym) ((void *)&sym + kvm_ksym_shift) +#define kvm_ksym_ref(sym) phys_to_virt((u64)&sym - kimage_voffset) #ifndef __ASSEMBLY__ -#if __GNUC__ > 4 -#define kvm_ksym_shift (PAGE_OFFSET - KIMAGE_VADDR) -#else -/* - * GCC versions 4.9 and older will fold the constant below into the addend of - * the reference to 'sym' above if kvm_ksym_shift is declared static or if the - * constant is used directly. However, since we use the small code model for - * the core kernel, the reference to 'sym' will be emitted as a adrp/add pair, - * with a +/- 4 GB range, resulting in linker relocation errors if the shift - * is sufficiently large. So prevent the compiler from folding the shift into - * the addend, by making the shift a variable with external linkage. - */ -__weak u64 kvm_ksym_shift = PAGE_OFFSET - KIMAGE_VADDR; -#endif - struct kvm; struct kvm_vcpu; diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 18b7e77c7495..3239e4d78e0d 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -24,6 +24,7 @@ #include #include #include +#include #include /* @@ -88,10 +89,10 @@ #define __virt_to_phys(x) ({ \ phys_addr_t __x = (phys_addr_t)(x); \ __x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) : \ - (__x - KIMAGE_VADDR + PHYS_OFFSET); }) + (__x - kimage_voffset); }) #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET)) -#define __phys_to_kimg(x) ((unsigned long)((x) - PHYS_OFFSET + KIMAGE_VADDR)) +#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* * Convert a page to/from a physical address @@ -133,15 +134,16 @@ extern phys_addr_t memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ -#define PHYS_OFFSET ({ memstart_addr; }) +#define PHYS_OFFSET ({ BUG_ON(memstart_addr & 1); memstart_addr; }) + +/* the offset between the kernel virtual and physical mappings */ +extern u64 kimage_voffset; /* - * The maximum physical address that the linear direct mapping - * of system RAM can cover. (PAGE_OFFSET can be interpreted as - * a 2's complement signed quantity and negated to derive the - * maximum size of the linear mapping.) + * Allow all memory at the discovery stage. We will clip it later. */ -#define MAX_MEMBLOCK_ADDR ({ memstart_addr - PAGE_OFFSET - 1; }) +#define MIN_MEMBLOCK_ADDR 0 +#define MAX_MEMBLOCK_ADDR U64_MAX /* * PFNs are used to describe any physical page; this means diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 04d38a058b19..05b98289093e 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -428,7 +428,11 @@ __mmap_switched: and x4, x4, #~(THREAD_SIZE - 1) msr sp_el0, x4 // Save thread_info str_l x21, __fdt_pointer, x5 // Save FDT pointer - str_l x24, memstart_addr, x6 // Save PHYS_OFFSET + + ldr x4, =KIMAGE_VADDR // Save the offset between + sub x4, x4, x24 // the kernel virtual and + str_l x4, kimage_voffset, x5 // physical mappings + mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 999633bd7294..c9c62cab25a4 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -42,15 +42,18 @@ #endif #ifdef CONFIG_CPU_BIG_ENDIAN -#define __HEAD_FLAG_BE 1 +#define __HEAD_FLAG_BE 1 #else -#define __HEAD_FLAG_BE 0 +#define __HEAD_FLAG_BE 0 #endif -#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2) +#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2) -#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \ - (__HEAD_FLAG_PAGE_SIZE << 1)) +#define __HEAD_FLAG_PHYS_BASE 1 + +#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \ + (__HEAD_FLAG_PAGE_SIZE << 1) | \ + (__HEAD_FLAG_PHYS_BASE << 3)) /* * These will output as part of the Image header, which should be little-endian diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 92acbee2bb8b..2c7a3c2868e4 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -35,8 +35,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -46,7 +48,13 @@ #include "mm.h" -phys_addr_t memstart_addr __read_mostly = 0; +/* + * We need to be able to catch inadvertent references to memstart_addr + * that occur (potentially in generic code) before arm64_memblock_init() + * executes, which assigns it its actual value. So use a default value + * that cannot be mistaken for a real physical address. + */ +phys_addr_t memstart_addr __read_mostly = ~0ULL; phys_addr_t arm64_dma_phys_limit __read_mostly; #ifdef CONFIG_BLK_DEV_INITRD @@ -160,7 +168,33 @@ early_param("mem", early_mem); void __init arm64_memblock_init(void) { - memblock_enforce_memory_limit(memory_limit); + const s64 linear_region_size = -(s64)PAGE_OFFSET; + + /* + * Select a suitable value for the base of physical memory. + */ + memstart_addr = round_down(memblock_start_of_DRAM(), + ARM64_MEMSTART_ALIGN); + + /* + * Remove the memory that we will not be able to cover with the + * linear mapping. Take care not to clip the kernel which may be + * high in memory. + */ + memblock_remove(max(memstart_addr + linear_region_size, __pa(_end)), + ULLONG_MAX); + if (memblock_end_of_DRAM() > linear_region_size) + memblock_remove(0, memblock_end_of_DRAM() - linear_region_size); + + /* + * Apply the memory limit if it was set. Since the kernel may be loaded + * high up in memory, add back the kernel region that must be accessible + * via the linear mapping. + */ + if (memory_limit != (phys_addr_t)ULLONG_MAX) { + memblock_enforce_memory_limit(memory_limit); + memblock_add(__pa(_text), (u64)(_end - _text)); + } /* * Register the kernel text, kernel data, initrd, and initial @@ -386,3 +420,28 @@ static int __init keepinitrd_setup(char *__unused) __setup("keepinitrd", keepinitrd_setup); #endif + +/* + * Dump out memory limit information on panic. + */ +static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p) +{ + if (memory_limit != (phys_addr_t)ULLONG_MAX) { + pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20); + } else { + pr_emerg("Memory Limit: none\n"); + } + return 0; +} + +static struct notifier_block mem_limit_notifier = { + .notifier_call = dump_mem_limit, +}; + +static int __init register_mem_limit_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &mem_limit_notifier); + return 0; +} +__initcall(register_mem_limit_dumper); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 895a8457259c..fb5c872fe3d6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -46,6 +46,9 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 kimage_voffset __read_mostly; +EXPORT_SYMBOL(kimage_voffset); + /* * Empty_zero_page is a special page that is used for zero-initialized data * and COW. From a67099df67dec4550a650aa3b871e2b0ecd20957 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 19 Feb 2016 14:28:58 +0000 Subject: [PATCH 769/797] arm64: User die() instead of panic() in do_page_fault() The former gives better error reporting on unhandled permission faults (introduced by the UAO patches). Signed-off-by: Catalin Marinas (cherry picked from commit 70c8abc28762d04e36c92e07eee2ce6ab41049cb) Signed-off-by: Alex Shi --- arch/arm64/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a8eafeceb08a..44e56de23f79 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -235,10 +235,10 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (permission_fault(esr) && (addr < USER_DS)) { if (get_fs() == KERNEL_DS) - panic("Accessing user space memory with fs=KERNEL_DS"); + die("Accessing user space memory with fs=KERNEL_DS", regs, esr); if (!search_exception_tables(regs->pc)) - panic("Accessing user space memory outside uaccess.h routines"); + die("Accessing user space memory outside uaccess.h routines", regs, esr); } /* From bf7cb966b2f5ef5a456a25d84d2b64c79730a07a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 22 Feb 2016 18:46:03 +0100 Subject: [PATCH 770/797] arm64: mm: only perform memstart_addr sanity check if DEBUG_VM Checking whether memstart_addr has been assigned every time it is referenced adds a branch instruction that may hurt performance if the reference in question occurs on a hot path. So only perform the check if CONFIG_DEBUG_VM=y. Signed-off-by: Ard Biesheuvel [catalin.marinas@arm.com: replaced #ifdef with VM_BUG_ON] Signed-off-by: Catalin Marinas (cherry picked from commit a92405f082d43267575444a6927085e4c8a69e4e) Signed-off-by: Alex Shi --- arch/arm64/include/asm/memory.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 3239e4d78e0d..460d09bf9442 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -132,9 +132,11 @@ #ifndef __ASSEMBLY__ +#include + extern phys_addr_t memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ -#define PHYS_OFFSET ({ BUG_ON(memstart_addr & 1); memstart_addr; }) +#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; From d3bb0180b3ecbcff076ca7e41d4ad4fa0ee4c9d7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 22 Feb 2016 18:46:04 +0100 Subject: [PATCH 771/797] arm64: mm: use bit ops rather than arithmetic in pa/va translations Since PAGE_OFFSET is chosen such that it cuts the kernel VA space right in half, and since the size of the kernel VA space itself is always a power of 2, we can treat PAGE_OFFSET as a bitmask and replace the additions/subtractions with 'or' and 'and-not' operations. For the comparison against PAGE_OFFSET, a mov/cmp/branch sequence ends up getting replaced with a single tbz instruction. For the additions and subtractions, we save a mov instruction since the mask is folded into the instruction's immediate field. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 8439e62a15614e8fcd43835d57b7245cd9870dc5) Signed-off-by: Alex Shi --- arch/arm64/include/asm/memory.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 460d09bf9442..eb798156cf56 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -88,10 +88,10 @@ */ #define __virt_to_phys(x) ({ \ phys_addr_t __x = (phys_addr_t)(x); \ - __x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) : \ - (__x - kimage_voffset); }) + __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \ + (__x - kimage_voffset); }) -#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET)) +#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* @@ -132,6 +132,7 @@ #ifndef __ASSEMBLY__ +#include #include extern phys_addr_t memstart_addr; From 11e7d3ccfae5510815c72cec4066d5d5acbfa718 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 23 Feb 2016 08:56:45 +0100 Subject: [PATCH 772/797] arm64: move brk immediate argument definitions to separate header Instead of reversing the header dependency between asm/bug.h and asm/debug-monitors.h, split off the brk instruction immediate value defines into a new header asm/brk-imm.h, and include it from both. This solves the circular dependency issue that prevents BUG() from being used in some header files, and keeps the definitions together. Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit f98deee9a9f8c47d05a0f64d86440882dca772ff) Signed-off-by: Alex Shi --- arch/arm64/include/asm/brk-imm.h | 25 +++++++++++++++++++++++++ arch/arm64/include/asm/bug.h | 2 +- arch/arm64/include/asm/debug-monitors.h | 14 +------------- 3 files changed, 27 insertions(+), 14 deletions(-) create mode 100644 arch/arm64/include/asm/brk-imm.h diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h new file mode 100644 index 000000000000..ed693c5bcec0 --- /dev/null +++ b/arch/arm64/include/asm/brk-imm.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_BRK_IMM_H +#define __ASM_BRK_IMM_H + +/* + * #imm16 values used for BRK instruction generation + * Allowed values for kgdb are 0x400 - 0x7ff + * 0x100: for triggering a fault on purpose (reserved) + * 0x400: for dynamic BRK instruction + * 0x401: for compile time BRK instruction + * 0x800: kernel-mode BUG() and WARN() traps + */ +#define FAULT_BRK_IMM 0x100 +#define KGDB_DYN_DBG_BRK_IMM 0x400 +#define KGDB_COMPILED_DBG_BRK_IMM 0x401 +#define BUG_BRK_IMM 0x800 + +#endif diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h index 679d49221998..561190d15881 100644 --- a/arch/arm64/include/asm/bug.h +++ b/arch/arm64/include/asm/bug.h @@ -18,7 +18,7 @@ #ifndef _ARCH_ARM64_ASM_BUG_H #define _ARCH_ARM64_ASM_BUG_H -#define BUG_BRK_IMM 0x800 +#include #ifdef CONFIG_GENERIC_BUG #define HAVE_ARCH_BUG diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index e893a1fca9c2..2fcb9b7c876c 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include @@ -47,18 +47,6 @@ */ #define BREAK_INSTR_SIZE AARCH64_INSN_SIZE -/* - * #imm16 values used for BRK instruction generation - * Allowed values for kgbd are 0x400 - 0x7ff - * 0x100: for triggering a fault on purpose (reserved) - * 0x400: for dynamic BRK instruction - * 0x401: for compile time BRK instruction - * 0x800: kernel-mode BUG() and WARN() traps - */ -#define FAULT_BRK_IMM 0x100 -#define KGDB_DYN_DBG_BRK_IMM 0x400 -#define KGDB_COMPILED_DBG_BRK_IMM 0x401 - /* * BRK instruction encoding * The #imm16 value should be placed at bits[20:5] within BRK ins From 6537906675622c231257664593ed3174b117b3ef Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Nov 2015 12:37:35 +0100 Subject: [PATCH 773/797] arm64: add support for module PLTs This adds support for emitting PLTs at module load time for relative branches that are out of range. This is a prerequisite for KASLR, which may place the kernel and the modules anywhere in the vmalloc area, making it more likely that branch target offsets exceed the maximum range of +/- 128 MB. In this version, I removed the distinction between relocations against .init executable sections and ordinary executable sections. The reason is that it is hardly worth the trouble, given that .init.text usually does not contain that many far branches, and this version now only reserves PLT entry space for jump and call relocations against undefined symbols (since symbols defined in the same module can be assumed to be within +/- 128 MB) For example, the mac80211.ko module (which is fairly sizable at ~400 KB) built with -mcmodel=large gives the following relocation counts: relocs branches unique !local .text 3925 3347 518 219 .init.text 11 8 7 1 .exit.text 4 4 4 1 .text.unlikely 81 67 36 17 ('unique' means branches to unique type/symbol/addend combos, of which !local is the subset referring to undefined symbols) IOW, we are only emitting a single PLT entry for the .init sections, and we are better off just adding it to the core PLT section instead. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit fd045f6cd98ec4953147b318418bd45e441e52a3) Signed-off-by: Alex Shi --- arch/arm64/Kconfig | 9 ++ arch/arm64/Makefile | 6 +- arch/arm64/include/asm/module.h | 11 ++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/module-plts.c | 201 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/module.c | 22 ++++ arch/arm64/kernel/module.lds | 3 + 7 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kernel/module-plts.c create mode 100644 arch/arm64/kernel/module.lds diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8cd8d06ece4a..22db20491733 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -365,6 +365,7 @@ config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" depends on MODULES default y + select ARM64_MODULE_CMODEL_LARGE help This option builds kernel modules using the large memory model in order to avoid the use of the ADRP instruction, which can cause @@ -728,6 +729,14 @@ config ARM64_UAO regular load/store instructions if the cpu does not implement the feature. +config ARM64_MODULE_CMODEL_LARGE + bool + +config ARM64_MODULE_PLTS + bool + select ARM64_MODULE_CMODEL_LARGE + select HAVE_MOD_ARCH_SPECIFIC + endmenu menu "Boot options" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 548a2939d7e6..71054a38decf 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -43,10 +43,14 @@ endif CHECKFLAGS += -D__aarch64__ -ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y) KBUILD_CFLAGS_MODULE += -mcmodel=large endif +ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) +KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index e80e232b730e..8652fb613304 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -20,4 +20,15 @@ #define MODULE_ARCH_VERMAGIC "aarch64" +#ifdef CONFIG_ARM64_MODULE_PLTS +struct mod_arch_specific { + struct elf64_shdr *plt; + int plt_num_entries; + int plt_max_entries; +}; +#endif + +u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, + Elf64_Sym *sym); + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c4e2f70c0aa0..8d971f9c6ed5 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o +arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c new file mode 100644 index 000000000000..1ce90d8450ae --- /dev/null +++ b/arch/arm64/kernel/module-plts.c @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2014-2016 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +struct plt_entry { + /* + * A program that conforms to the AArch64 Procedure Call Standard + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or + * IP1 (x17) may be inserted at any branch instruction that is + * exposed to a relocation that supports long branches. Since that + * is exactly what we are dealing with here, we are free to use x16 + * as a scratch register in the PLT veneers. + */ + __le32 mov0; /* movn x16, #0x.... */ + __le32 mov1; /* movk x16, #0x...., lsl #16 */ + __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 br; /* br x16 */ +}; + +u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, + Elf64_Sym *sym) +{ + struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr; + int i = mod->arch.plt_num_entries; + u64 val = sym->st_value + rela->r_addend; + + /* + * We only emit PLT entries against undefined (SHN_UNDEF) symbols, + * which are listed in the ELF symtab section, but without a type + * or a size. + * So, similar to how the module loader uses the Elf64_Sym::st_value + * field to store the resolved addresses of undefined symbols, let's + * borrow the Elf64_Sym::st_size field (whose value is never used by + * the module loader, even for symbols that are defined) to record + * the address of a symbol's associated PLT entry as we emit it for a + * zero addend relocation (which is the only kind we have to deal with + * in practice). This allows us to find duplicates without having to + * go through the table every time. + */ + if (rela->r_addend == 0 && sym->st_size != 0) { + BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]); + return sym->st_size; + } + + mod->arch.plt_num_entries++; + BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries); + + /* + * MOVK/MOVN/MOVZ opcode: + * +--------+------------+--------+-----------+-------------+---------+ + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | + * +--------+------------+--------+-----------+-------------+---------+ + * + * Rd := 0x10 (x16) + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) + * sf := 1 (64-bit variant) + */ + plt[i] = (struct plt_entry){ + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), + cpu_to_le32(0xd61f0200) + }; + + if (rela->r_addend == 0) + sym->st_size = (u64)&plt[i]; + + return (u64)&plt[i]; +} + +#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) + +static int cmp_rela(const void *a, const void *b) +{ + const Elf64_Rela *x = a, *y = b; + int i; + + /* sort by type, symbol index and addend */ + i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info)); + if (i == 0) + i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info)); + if (i == 0) + i = cmp_3way(x->r_addend, y->r_addend); + return i; +} + +static bool duplicate_rel(const Elf64_Rela *rela, int num) +{ + /* + * Entries are sorted by type, symbol index and addend. That means + * that, if a duplicate entry exists, it must be in the preceding + * slot. + */ + return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0; +} + +static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) +{ + unsigned int ret = 0; + Elf64_Sym *s; + int i; + + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + /* + * We only have to consider branch targets that resolve + * to undefined symbols. This is not simply a heuristic, + * it is a fundamental limitation, since the PLT itself + * is part of the module, and needs to be within 128 MB + * as well, so modules can never grow beyond that limit. + */ + s = syms + ELF64_R_SYM(rela[i].r_info); + if (s->st_shndx != SHN_UNDEF) + break; + + /* + * Jump relocations with non-zero addends against + * undefined symbols are supported by the ELF spec, but + * do not occur in practice (e.g., 'jump n bytes past + * the entry point of undefined function symbol f'). + * So we need to support them, but there is no need to + * take them into consideration when trying to optimize + * this code. So let's only check for duplicates when + * the addend is zero: this allows us to record the PLT + * entry address in the symbol table itself, rather than + * having to search the list for duplicates each time we + * emit one. + */ + if (rela[i].r_addend != 0 || !duplicate_rel(rela, i)) + ret++; + break; + } + } + return ret; +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned long plt_max_entries = 0; + Elf64_Sym *syms = NULL; + int i; + + /* + * Find the empty .plt section so we can expand it to store the PLT + * entries. Record the symtab address as well. + */ + for (i = 0; i < ehdr->e_shnum; i++) { + if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0) + mod->arch.plt = sechdrs + i; + else if (sechdrs[i].sh_type == SHT_SYMTAB) + syms = (Elf64_Sym *)sechdrs[i].sh_addr; + } + + if (!mod->arch.plt) { + pr_err("%s: module PLT section missing\n", mod->name); + return -ENOEXEC; + } + if (!syms) { + pr_err("%s: module symtab section missing\n", mod->name); + return -ENOEXEC; + } + + for (i = 0; i < ehdr->e_shnum; i++) { + Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset; + int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); + Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info; + + if (sechdrs[i].sh_type != SHT_RELA) + continue; + + /* ignore relocations that operate on non-exec sections */ + if (!(dstsec->sh_flags & SHF_EXECINSTR)) + continue; + + /* sort by type, symbol index and addend */ + sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); + + plt_max_entries += count_plts(syms, rels, numrels); + } + + mod->arch.plt->sh_type = SHT_NOBITS; + mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry); + mod->arch.plt_num_entries = 0; + mod->arch.plt_max_entries = plt_max_entries; + return 0; +} diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 93e970231ca9..a9dde97f5ca5 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -38,6 +38,21 @@ void *module_alloc(unsigned long size) GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); + if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + !IS_ENABLED(CONFIG_KASAN)) + /* + * KASAN can only deal with module allocations being served + * from the reserved module region, since the remainder of + * the vmalloc region is already backed by zero shadow pages, + * and punching holes into it is non-trivial. Since the module + * region is not randomized when KASAN is enabled, it is even + * less likely that the module region gets exhausted, so we + * can simply omit this fallback in that case. + */ + p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, + VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_module_alloc(p, size) < 0)) { vfree(p); return NULL; @@ -361,6 +376,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); + + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + ovf == -ERANGE) { + val = module_emit_plt_entry(me, &rel[i], sym); + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, + 26, AARCH64_INSN_IMM_26); + } break; default: diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds new file mode 100644 index 000000000000..8949f6c6f729 --- /dev/null +++ b/arch/arm64/kernel/module.lds @@ -0,0 +1,3 @@ +SECTIONS { + .plt (NOLOAD) : { BYTE(0) } +} From 9bd7f88a1dd7b6c513b6f7cf39154e69d0cbf62e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 26 Dec 2015 13:48:02 +0100 Subject: [PATCH 774/797] arm64: avoid R_AARCH64_ABS64 relocations for Image header fields Unfortunately, the current way of using the linker to emit build time constants into the Image header will no longer work once we switch to the use of PIE executables. The reason is that such constants are emitted into the binary using R_AARCH64_ABS64 relocations, which are resolved at runtime, not at build time, and the places targeted by those relocations will contain zeroes before that. So refactor the endian swapping linker script constant generation code so that it emits the upper and lower 32-bit words separately. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 6ad1fe5d9077a1ab40bf74b61994d2e770b00b14) Signed-off-by: Alex Shi --- arch/arm64/include/asm/assembler.h | 11 ++++++++++ arch/arm64/kernel/head.S | 6 +++--- arch/arm64/kernel/image.h | 32 ++++++++++++++++++------------ 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bb7b72734c24..ba5aff6c830e 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -215,4 +215,15 @@ lr .req x30 // link register .size __pi_##x, . - x; \ ENDPROC(x) + /* + * Emit a 64-bit absolute little endian symbol reference in a way that + * ensures that it will be resolved at build time, even when building a + * PIE binary. This requires cooperation from the linker script, which + * must emit the lo32/hi32 halves individually. + */ + .macro le64sym, sym + .long \sym\()_lo32 + .long \sym\()_hi32 + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 05b98289093e..f076debf392d 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -83,9 +83,9 @@ efi_head: b stext // branch to kernel start, magic .long 0 // reserved #endif - .quad _kernel_offset_le // Image load offset from start of RAM, little-endian - .quad _kernel_size_le // Effective size of kernel image, little-endian - .quad _kernel_flags_le // Informative flags, little-endian + le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian + le64sym _kernel_size_le // Effective size of kernel image, little-endian + le64sym _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index c9c62cab25a4..db1bf57948f1 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -26,21 +26,27 @@ * There aren't any ELF relocations we can use to endian-swap values known only * at link time (e.g. the subtraction of two symbol addresses), so we must get * the linker to endian-swap certain values before emitting them. + * + * Note that, in order for this to work when building the ELF64 PIE executable + * (for KASLR), these values should not be referenced via R_AARCH64_ABS64 + * relocations, since these are fixed up at runtime rather than at build time + * when PIE is in effect. So we need to split them up in 32-bit high and low + * words. */ #ifdef CONFIG_CPU_BIG_ENDIAN -#define DATA_LE64(data) \ - ((((data) & 0x00000000000000ff) << 56) | \ - (((data) & 0x000000000000ff00) << 40) | \ - (((data) & 0x0000000000ff0000) << 24) | \ - (((data) & 0x00000000ff000000) << 8) | \ - (((data) & 0x000000ff00000000) >> 8) | \ - (((data) & 0x0000ff0000000000) >> 24) | \ - (((data) & 0x00ff000000000000) >> 40) | \ - (((data) & 0xff00000000000000) >> 56)) +#define DATA_LE32(data) \ + ((((data) & 0x000000ff) << 24) | \ + (((data) & 0x0000ff00) << 8) | \ + (((data) & 0x00ff0000) >> 8) | \ + (((data) & 0xff000000) >> 24)) #else -#define DATA_LE64(data) ((data) & 0xffffffffffffffff) +#define DATA_LE32(data) ((data) & 0xffffffff) #endif +#define DEFINE_IMAGE_LE64(sym, data) \ + sym##_lo32 = DATA_LE32((data) & 0xffffffff); \ + sym##_hi32 = DATA_LE32((data) >> 32) + #ifdef CONFIG_CPU_BIG_ENDIAN #define __HEAD_FLAG_BE 1 #else @@ -61,9 +67,9 @@ * endian swapped in head.S, all are done here for consistency. */ #define HEAD_SYMBOLS \ - _kernel_size_le = DATA_LE64(_end - _text); \ - _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ - _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); + DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \ + DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ + DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); #ifdef CONFIG_EFI From 63f9fbe469f17deee50491bc293bb3d2843f3e4a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 26 Dec 2015 12:46:40 +0100 Subject: [PATCH 775/797] arm64: avoid dynamic relocations in early boot code Before implementing KASLR for arm64 by building a self-relocating PIE executable, we have to ensure that values we use before the relocation routine is executed are not subject to dynamic relocation themselves. This applies not only to virtual addresses, but also to values that are supplied by the linker at build time and relocated using R_AARCH64_ABS64 relocations. So instead, use assemble time constants, or force the use of static relocations by folding the constants into the instructions. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 2bf31a4a05f5b00f37d65ba029d36a0230286cb7) Signed-off-by: Alex Shi --- arch/arm64/kernel/efi-entry.S | 2 +- arch/arm64/kernel/head.S | 39 +++++++++++++++++++++++------------ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index a773db92908b..f82036e02485 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -61,7 +61,7 @@ ENTRY(entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - ldr x21, =stext_offset + movz x21, #:abs_g0:stext_offset add x21, x0, x21 /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index f076debf392d..4cad8f9f2268 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -67,12 +67,11 @@ * in the entry routines. */ __HEAD - +_head: /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ #ifdef CONFIG_EFI -efi_head: /* * This add instruction has no meaningful effect except that * its opcode forms the magic "MZ" signature required by UEFI. @@ -94,14 +93,14 @@ efi_head: .byte 0x4d .byte 0x64 #ifdef CONFIG_EFI - .long pe_header - efi_head // Offset to the PE header. + .long pe_header - _head // Offset to the PE header. #else .word 0 // reserved #endif #ifdef CONFIG_EFI .globl __efistub_stext_offset - .set __efistub_stext_offset, stext - efi_head + .set __efistub_stext_offset, stext - _head .align 3 pe_header: .ascii "PE" @@ -124,7 +123,7 @@ optional_header: .long _end - stext // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData - .long __efistub_entry - efi_head // AddressOfEntryPoint + .long __efistub_entry - _head // AddressOfEntryPoint .long __efistub_stext_offset // BaseOfCode extra_header_fields: @@ -139,7 +138,7 @@ extra_header_fields: .short 0 // MinorSubsystemVersion .long 0 // Win32VersionValue - .long _end - efi_head // SizeOfImage + .long _end - _head // SizeOfImage // Everything before the kernel image is considered part of the header .long __efistub_stext_offset // SizeOfHeaders @@ -219,11 +218,13 @@ ENTRY(stext) * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, =__mmap_switched // address to jump to after + ldr x27, 0f // address to jump to after // MMU has been enabled adr_l lr, __enable_mmu // return (PIC) address b __cpu_setup // initialise processor ENDPROC(stext) + .align 3 +0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR /* * Preserve the arguments passed by the bootloader in x0 .. x3 @@ -391,7 +392,8 @@ __create_page_tables: mov x0, x26 // swapper_pg_dir ldr x5, =KIMAGE_VADDR create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END // __va(KERNEL_END) + ldr w6, kernel_img_size + add x6, x6, x5 mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -408,6 +410,9 @@ __create_page_tables: mov lr, x27 ret ENDPROC(__create_page_tables) + +kernel_img_size: + .long _end - (_head - TEXT_OFFSET) .ltorg /* @@ -415,6 +420,10 @@ ENDPROC(__create_page_tables) */ .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: + adr_l x8, vectors // load VBAR_EL1 with virtual + msr vbar_el1, x8 // vector table address + isb + // Clear BSS adr_l x0, __bss_start mov x1, xzr @@ -610,13 +619,19 @@ ENTRY(secondary_startup) adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - ldr x21, =secondary_data - ldr x27, =__secondary_switched // address to jump to after enabling the MMU + ldr x8, =KIMAGE_VADDR + ldr w9, 0f + sub x27, x8, w9, sxtw // address to jump to after enabling the MMU b __enable_mmu ENDPROC(secondary_startup) +0: .long (_text - TEXT_OFFSET) - __secondary_switched ENTRY(__secondary_switched) - ldr x0, [x21] // get secondary_data.stack + adr_l x5, vectors + msr vbar_el1, x5 + isb + + ldr_l x0, secondary_data // get secondary_data.stack mov sp, x0 and x0, x0, #~(THREAD_SIZE - 1) msr sp_el0, x0 // save thread_info @@ -641,8 +656,6 @@ __enable_mmu: ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - ldr x5, =vectors - msr vbar_el1, x5 msr ttbr0_el1, x25 // load TTBR0 msr ttbr1_el1, x26 // load TTBR1 isb From 632fd2f00cb569f65adc7d2c67282b70a6af8634 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 11 Jan 2016 17:08:26 +0100 Subject: [PATCH 776/797] arm64: make asm/elf.h available to asm files This reshuffles some code in asm/elf.h and puts a #ifndef __ASSEMBLY__ around its C definitions so that the CPP defines can be used in asm source files as well. Acked-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 4a2e034e5cdadde4c712f79bdd57d1455c76a3db) Signed-off-by: Alex Shi --- arch/arm64/include/asm/elf.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index faad6df49e5b..435f55952e1f 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -24,15 +24,6 @@ #include #include -typedef unsigned long elf_greg_t; - -#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t)) -#define ELF_CORE_COPY_REGS(dest, regs) \ - *(struct user_pt_regs *)&(dest) = (regs)->user_regs; - -typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -typedef struct user_fpsimd_state elf_fpregset_t; - /* * AArch64 static relocation types. */ @@ -127,6 +118,17 @@ typedef struct user_fpsimd_state elf_fpregset_t; */ #define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) +#ifndef __ASSEMBLY__ + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t)) +#define ELF_CORE_COPY_REGS(dest, regs) \ + *(struct user_pt_regs *)&(dest) = (regs)->user_regs; + +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; +typedef struct user_fpsimd_state elf_fpregset_t; + /* * When the program starts, a1 contains a pointer to a function to be * registered with atexit, as per the SVR4 ABI. A value of 0 means we have no @@ -186,4 +188,6 @@ extern int aarch32_setup_vectors_page(struct linux_binprm *bprm, #endif /* CONFIG_COMPAT */ +#endif /* !__ASSEMBLY__ */ + #endif From 6ef77fd5962d90de7957ef9eafddd659c7375a4f Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 2 Feb 2016 15:53:59 +0000 Subject: [PATCH 777/797] arm64: futex.h: Add missing PAN toggling futex.h's futex_atomic_cmpxchg_inatomic() does not use the __futex_atomic_op() macro and needs its own PAN toggling. This was missed when the feature was implemented. Fixes: 338d4f49d6f ("arm64: kernel: Add support for Privileged Access Never") Signed-off-by: James Morse Signed-off-by: Will Deacon (cherry picked from commit 811d61e384e24759372bb3f01772f3744b0a8327) Signed-off-by: Alex Shi --- arch/arm64/include/asm/futex.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 007a69fc4f40..5f3ab8c1db55 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -121,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; asm volatile("// futex_atomic_cmpxchg_inatomic\n" +ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" @@ -137,6 +138,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, " .align 3\n" " .quad 1b, 4b, 2b, 4b\n" " .popsection\n" +ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) : "memory"); From afc69bdc2ac79942a59f67296738ba8e85e784fb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 10 Jan 2016 11:42:28 +0100 Subject: [PATCH 778/797] scripts/sortextable: add support for ET_DYN binaries Add support to scripts/sortextable for handling relocatable (PIE) executables, whose ELF type is ET_DYN, not ET_EXEC. Other than adding support for the new type, no changes are needed. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 7b957b6e603623ef8b2e8222fa94b976df613fa2) Signed-off-by: Alex Shi --- scripts/sortextable.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/sortextable.c b/scripts/sortextable.c index c2423d913b46..ecefa0a634f8 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -266,9 +266,9 @@ do_file(char const *const fname) break; } /* end switch */ if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0 - || r2(&ehdr->e_type) != ET_EXEC + || (r2(&ehdr->e_type) != ET_EXEC && r2(&ehdr->e_type) != ET_DYN) || ehdr->e_ident[EI_VERSION] != EV_CURRENT) { - fprintf(stderr, "unrecognized ET_EXEC file %s\n", fname); + fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file %s\n", fname); fail_file(); } @@ -304,7 +304,7 @@ do_file(char const *const fname) if (r2(&ehdr->e_ehsize) != sizeof(Elf32_Ehdr) || r2(&ehdr->e_shentsize) != sizeof(Elf32_Shdr)) { fprintf(stderr, - "unrecognized ET_EXEC file: %s\n", fname); + "unrecognized ET_EXEC/ET_DYN file: %s\n", fname); fail_file(); } do32(ehdr, fname, custom_sort); @@ -314,7 +314,7 @@ do_file(char const *const fname) if (r2(&ghdr->e_ehsize) != sizeof(Elf64_Ehdr) || r2(&ghdr->e_shentsize) != sizeof(Elf64_Shdr)) { fprintf(stderr, - "unrecognized ET_EXEC file: %s\n", fname); + "unrecognized ET_EXEC/ET_DYN file: %s\n", fname); fail_file(); } do64(ghdr, fname, custom_sort); From 5f1dcb6d02a6f6dd104539a19f7db8a189ba4bb8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 Jan 2016 12:39:09 +0100 Subject: [PATCH 779/797] extable: add support for relative extables to search and sort routines This adds support to the generic search_extable() and sort_extable() implementations for dealing with exception table entries whose fields contain relative offsets rather than absolute addresses. Acked-by: Helge Deller Acked-by: Heiko Carstens Acked-by: H. Peter Anvin Acked-by: Tony Luck Acked-by: Will Deacon Acked-by: Richard Henderson Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit a272858a3c1ecd4a935ba23c66668f81214bd110) Signed-off-by: Alex Shi --- lib/extable.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/lib/extable.c b/lib/extable.c index 4cac81ec225e..0be02ad561e9 100644 --- a/lib/extable.c +++ b/lib/extable.c @@ -14,7 +14,37 @@ #include #include +#ifndef ARCH_HAS_RELATIVE_EXTABLE +#define ex_to_insn(x) ((x)->insn) +#else +static inline unsigned long ex_to_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} +#endif + #ifndef ARCH_HAS_SORT_EXTABLE +#ifndef ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex NULL +#else +static void swap_ex(void *a, void *b, int size) +{ + struct exception_table_entry *x = a, *y = b, tmp; + int delta = b - a; + + tmp = *x; + x->insn = y->insn + delta; + y->insn = tmp.insn - delta; + +#ifdef swap_ex_entry_fixup + swap_ex_entry_fixup(x, y, tmp, delta); +#else + x->fixup = y->fixup + delta; + y->fixup = tmp.fixup - delta; +#endif +} +#endif /* ARCH_HAS_RELATIVE_EXTABLE */ + /* * The exception table needs to be sorted so that the binary * search that we use to find entries in it works properly. @@ -26,9 +56,9 @@ static int cmp_ex(const void *a, const void *b) const struct exception_table_entry *x = a, *y = b; /* avoid overflow */ - if (x->insn > y->insn) + if (ex_to_insn(x) > ex_to_insn(y)) return 1; - if (x->insn < y->insn) + if (ex_to_insn(x) < ex_to_insn(y)) return -1; return 0; } @@ -37,7 +67,7 @@ void sort_extable(struct exception_table_entry *start, struct exception_table_entry *finish) { sort(start, finish - start, sizeof(struct exception_table_entry), - cmp_ex, NULL); + cmp_ex, swap_ex); } #ifdef CONFIG_MODULES @@ -48,13 +78,15 @@ void sort_extable(struct exception_table_entry *start, void trim_init_extable(struct module *m) { /*trim the beginning*/ - while (m->num_exentries && within_module_init(m->extable[0].insn, m)) { + while (m->num_exentries && + within_module_init(ex_to_insn(&m->extable[0]), m)) { m->extable++; m->num_exentries--; } /*trim the end*/ while (m->num_exentries && - within_module_init(m->extable[m->num_exentries-1].insn, m)) + within_module_init(ex_to_insn(&m->extable[m->num_exentries - 1]), + m)) m->num_exentries--; } #endif /* CONFIG_MODULES */ @@ -81,13 +113,13 @@ search_extable(const struct exception_table_entry *first, * careful, the distance between value and insn * can be larger than MAX_LONG: */ - if (mid->insn < value) + if (ex_to_insn(mid) < value) first = mid + 1; - else if (mid->insn > value) + else if (ex_to_insn(mid) > value) last = mid - 1; else return mid; - } - return NULL; + } + return NULL; } #endif From 525787eea48f8c6a1630e6dab07313896cfc6b8d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 Jan 2016 15:02:12 +0100 Subject: [PATCH 780/797] arm64: switch to relative exception tables Instead of using absolute addresses for both the exception location and the fixup, use offsets relative to the exception table entry values. Not only does this cut the size of the exception table in half, it is also a prerequisite for KASLR, since absolute exception table entries are subject to dynamic relocation, which is incompatible with the sorting of the exception table that occurs at build time. This patch also introduces the _ASM_EXTABLE preprocessor macro (which exists on x86 as well) and its _asm_extable assembly counterpart, as shorthands to emit exception table entries. Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 6c94f27ac847ff8ef15b3da5b200574923bd6287) Signed-off-by: Alex Shi --- arch/arm64/include/asm/alternative.h | 19 +++++----------- arch/arm64/include/asm/assembler.h | 15 +++++++++---- arch/arm64/include/asm/futex.h | 12 ++++------ arch/arm64/include/asm/uaccess.h | 30 +++++++++++++------------ arch/arm64/include/asm/word-at-a-time.h | 7 +++--- arch/arm64/kernel/armv8_deprecated.c | 7 ++---- arch/arm64/mm/extable.c | 2 +- scripts/sortextable.c | 2 +- 8 files changed, 43 insertions(+), 51 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index a9fc24ec1aa9..beccbdefa106 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -157,11 +157,8 @@ void apply_alternatives(void *start, size_t length); add \addr, \addr, \post_inc; alternative_endif - .section __ex_table,"a"; - .align 3; - .quad 8888b,\l; - .quad 8889b,\l; - .previous; + _asm_extable 8888b,\l; + _asm_extable 8889b,\l; .endm .macro uao_stp l, reg1, reg2, addr, post_inc @@ -175,11 +172,8 @@ void apply_alternatives(void *start, size_t length); add \addr, \addr, \post_inc; alternative_endif - .section __ex_table,"a"; - .align 3; - .quad 8888b,\l; - .quad 8889b,\l; - .previous + _asm_extable 8888b,\l; + _asm_extable 8889b,\l; .endm .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc @@ -191,10 +185,7 @@ void apply_alternatives(void *start, size_t length); add \addr, \addr, \post_inc; alternative_endif - .section __ex_table,"a"; - .align 3; - .quad 8888b,\l; - .previous + _asm_extable 8888b,\l; .endm #else .macro uao_ldp l, reg1, reg2, addr, post_inc diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ba5aff6c830e..70f7b9e04598 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -94,12 +94,19 @@ dmb \opt .endm +/* + * Emit an entry into the exception table + */ + .macro _asm_extable, from, to + .pushsection __ex_table, "a" + .align 3 + .long (\from - .), (\to - .) + .popsection + .endm + #define USER(l, x...) \ 9999: x; \ - .section __ex_table,"a"; \ - .align 3; \ - .quad 9999b,l; \ - .previous + _asm_extable 9999b, l /* * Register aliases. diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 5f3ab8c1db55..f2585cdd32c2 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -42,10 +42,8 @@ "4: mov %w0, %w5\n" \ " b 3b\n" \ " .popsection\n" \ -" .pushsection __ex_table,\"a\"\n" \ -" .align 3\n" \ -" .quad 1b, 4b, 2b, 4b\n" \ -" .popsection\n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ @@ -134,10 +132,8 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) "4: mov %w0, %w6\n" " b 3b\n" " .popsection\n" -" .pushsection __ex_table,\"a\"\n" -" .align 3\n" -" .quad 1b, 4b, 2b, 4b\n" -" .popsection\n" + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 16ba0d5c9740..0685d74572af 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -36,11 +36,11 @@ #define VERIFY_WRITE 1 /* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. + * The exception table consists of pairs of relative offsets: the first + * is the relative offset to an instruction that is allowed to fault, + * and the second is the relative offset at which the program should + * continue. No registers are modified, so it is entirely up to the + * continuation code to figure out what to do. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -50,9 +50,11 @@ struct exception_table_entry { - unsigned long insn, fixup; + int insn, fixup; }; +#define ARCH_HAS_RELATIVE_EXTABLE + extern int fixup_exception(struct pt_regs *regs); #define KERNEL_DS (-1UL) @@ -115,6 +117,12 @@ static inline void set_fs(mm_segment_t fs) #define access_ok(type, addr, size) __range_ok(addr, size) #define user_addr_max get_fs +#define _ASM_EXTABLE(from, to) \ + " .pushsection __ex_table, \"a\"\n" \ + " .align 3\n" \ + " .long (" #from " - .), (" #to " - .)\n" \ + " .popsection\n" + /* * The "__xxx" versions of the user access functions do not verify the address * space - it must have been done previously with a separate "access_ok()" @@ -134,10 +142,7 @@ static inline void set_fs(mm_segment_t fs) " mov %1, #0\n" \ " b 2b\n" \ " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .quad 1b, 3b\n" \ - " .previous" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (err), "=&r" (x) \ : "r" (addr), "i" (-EFAULT)) @@ -206,10 +211,7 @@ do { \ "3: mov %w0, %3\n" \ " b 2b\n" \ " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .quad 1b, 3b\n" \ - " .previous" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (err) \ : "r" (x), "r" (addr), "i" (-EFAULT)) diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index aab5bf09e9d9..2b79b8a89457 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -16,6 +16,8 @@ #ifndef __ASM_WORD_AT_A_TIME_H #define __ASM_WORD_AT_A_TIME_H +#include + #ifndef __AARCH64EB__ #include @@ -81,10 +83,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) #endif " b 2b\n" " .popsection\n" - " .pushsection __ex_table,\"a\"\n" - " .align 3\n" - " .quad 1b, 3b\n" - " .popsection" + _ASM_EXTABLE(1b, 3b) : "=&r" (ret), "=&r" (offset) : "r" (addr), "Q" (*(unsigned long *)addr)); diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 3e01207917b1..c37202c0c838 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -297,11 +297,8 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) "4: mov %w0, %w5\n" \ " b 3b\n" \ " .popsection" \ - " .pushsection __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .quad 0b, 4b\n" \ - " .quad 1b, 4b\n" \ - " .popsection\n" \ + _ASM_EXTABLE(0b, 4b) \ + _ASM_EXTABLE(1b, 4b) \ ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ : "=&r" (res), "+r" (data), "=&r" (temp) \ diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 79444279ba8c..81acd4706878 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs) fixup = search_exception_tables(instruction_pointer(regs)); if (fixup) - regs->pc = fixup->fixup; + regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; return fixup != NULL; } diff --git a/scripts/sortextable.c b/scripts/sortextable.c index ecefa0a634f8..19d83647846c 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -282,12 +282,12 @@ do_file(char const *const fname) case EM_386: case EM_X86_64: case EM_S390: + case EM_AARCH64: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: case EM_ARCV2: case EM_ARM: - case EM_AARCH64: case EM_MICROBLAZE: case EM_MIPS: case EM_XTENSA: From 89328d41aa99df071dadb43c722cd88ffafc77e2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 26 Jan 2016 09:13:44 +0100 Subject: [PATCH 781/797] arm64: add support for building vmlinux as a relocatable PIE binary This implements CONFIG_RELOCATABLE, which links the final vmlinux image with a dynamic relocation section, allowing the early boot code to perform a relocation to a different virtual address at runtime. This is a prerequisite for KASLR (CONFIG_RANDOMIZE_BASE). Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 1e48ef7fcc374051730381a2a05da77eb4eafdb0) Signed-off-by: Alex Shi --- arch/arm64/Kconfig | 11 +++++++++++ arch/arm64/Makefile | 4 ++++ arch/arm64/include/asm/elf.h | 2 ++ arch/arm64/kernel/head.S | 32 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 16 ++++++++++++++++ 5 files changed, 65 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 22db20491733..ac1475f559e6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -737,6 +737,17 @@ config ARM64_MODULE_PLTS select ARM64_MODULE_CMODEL_LARGE select HAVE_MOD_ARCH_SPECIFIC +config RELOCATABLE + bool + help + This builds the kernel as a Position Independent Executable (PIE), + which retains all relocation metadata required to relocate the + kernel binary at runtime to a different virtual address than the + address it was linked at. + Since AArch64 uses the RELA relocation format, this requires a + relocation pass at runtime even if the kernel is loaded at the + same address it was linked at. + endmenu menu "Boot options" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 71054a38decf..304dcc3da06f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -15,6 +15,10 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 +ifneq ($(CONFIG_RELOCATABLE),) +LDFLAGS_vmlinux += -pie +endif + KBUILD_DEFCONFIG := defconfig # Check for binutils support for specific extensions diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 435f55952e1f..24ed037f09fd 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -77,6 +77,8 @@ #define R_AARCH64_MOVW_PREL_G2_NC 292 #define R_AARCH64_MOVW_PREL_G3 293 +#define R_AARCH64_RELATIVE 1027 + /* * These are used to set parameters in the core dumps. */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4cad8f9f2268..4e69412a7323 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -432,6 +433,37 @@ __mmap_switched: bl __pi_memset dsb ishst // Make zero page visible to PTW +#ifdef CONFIG_RELOCATABLE + + /* + * Iterate over each entry in the relocation table, and apply the + * relocations in place. + */ + adr_l x8, __dynsym_start // start of symbol table + adr_l x9, __reloc_start // start of reloc table + adr_l x10, __reloc_end // end of reloc table + +0: cmp x9, x10 + b.hs 2f + ldp x11, x12, [x9], #24 + ldr x13, [x9, #-8] + cmp w12, #R_AARCH64_RELATIVE + b.ne 1f + str x13, [x11] + b 0b + +1: cmp w12, #R_AARCH64_ABS64 + b.ne 0b + add x12, x12, x12, lsl #1 // symtab offset: 24x top word + add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word + ldr x15, [x12, #8] // Elf64_Sym::st_value + add x15, x13, x15 + str x15, [x11] + b 0b + +2: +#endif + adr_l sp, initial_sp, x4 mov x4, sp and x4, x4, #~(THREAD_SIZE - 1) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 282e3e64a17e..e3f6cd740ea3 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -87,6 +87,7 @@ SECTIONS EXIT_CALL *(.discard) *(.discard.*) + *(.interp .dynamic) } . = KIMAGE_VADDR + TEXT_OFFSET; @@ -149,6 +150,21 @@ SECTIONS .altinstr_replacement : { *(.altinstr_replacement) } + .rela : ALIGN(8) { + __reloc_start = .; + *(.rela .rela*) + __reloc_end = .; + } + .dynsym : ALIGN(8) { + __dynsym_start = .; + *(.dynsym) + } + .dynstr : { + *(.dynstr) + } + .hash : { + *(.hash) + } . = ALIGN(PAGE_SIZE); __init_end = .; From d0a12e9199c75cad71361f746ac40e4612945a43 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 26 Jan 2016 14:12:01 +0100 Subject: [PATCH 782/797] arm64: add support for kernel ASLR This adds support for KASLR is implemented, based on entropy provided by the bootloader in the /chosen/kaslr-seed DT property. Depending on the size of the address space (VA_BITS) and the page size, the entropy in the virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all 4 levels), with the sidenote that displacements that result in the kernel image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB granule kernels, respectively) are not allowed, and will be rounded up to an acceptable value. If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is randomized independently from the core kernel. This makes it less likely that the location of core kernel data structures can be determined by an adversary, but causes all function calls from modules into the core kernel to be resolved via entries in the module PLTs. If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is randomized by choosing a page aligned 128 MB region inside the interval [_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of entropy (depending on page size), independently of the kernel randomization, but still guarantees that modules are within the range of relative branch and jump instructions (with the caveat that, since the module region is shared with other uses of the vmalloc area, modules may need to be loaded further away if the module region is exhausted) Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit f80fb3a3d50843a401dac4b566b3b131da8077a2) Signed-off-by: Alex Shi --- arch/arm64/Kconfig | 29 ++++++ arch/arm64/include/asm/memory.h | 5 +- arch/arm64/include/asm/module.h | 6 ++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/head.S | 59 +++++++++-- arch/arm64/kernel/kaslr.c | 173 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/module.c | 3 +- arch/arm64/kernel/setup.c | 29 ++++++ arch/arm64/mm/kasan_init.c | 17 +++- arch/arm64/mm/mmu.c | 29 ++++-- 10 files changed, 329 insertions(+), 22 deletions(-) create mode 100644 arch/arm64/kernel/kaslr.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ac1475f559e6..b311ac23c989 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -748,6 +748,35 @@ config RELOCATABLE relocation pass at runtime even if the kernel is loaded at the same address it was linked at. +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + select ARM64_MODULE_PLTS + select RELOCATABLE + help + Randomizes the virtual address at which the kernel image is + loaded, as a security feature that deters exploit attempts + relying on knowledge of the location of kernel internals. + + It is the bootloader's job to provide entropy, by passing a + random u64 value in /chosen/kaslr-seed at kernel entry. + + If unsure, say N. + +config RANDOMIZE_MODULE_REGION_FULL + bool "Randomize the module region independently from the core kernel" + depends on RANDOMIZE_BASE + default y + help + Randomizes the location of the module region without considering the + location of the core kernel. This way, it is impossible for modules + to leak information about the location of core kernel data structures + but it does imply that function calls between modules and the core + kernel will need to be resolved via veneers in the module PLT. + + When this option is not set, the module region will be randomized over + a limited range that contains the [_stext, _etext] interval of the + core kernel, so branch relocations are always in range. + endmenu menu "Boot options" diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index eb798156cf56..5f8667a99e41 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -53,7 +53,7 @@ #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) -#define MODULES_VSIZE (SZ_64M) +#define MODULES_VSIZE (SZ_128M) #define PCI_IO_END (PAGE_OFFSET - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) @@ -139,6 +139,9 @@ extern phys_addr_t memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) +/* the virtual base of the kernel image (minus TEXT_OFFSET) */ +extern u64 kimage_vaddr; + /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 8652fb613304..e12af6754634 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -31,4 +31,10 @@ struct mod_arch_specific { u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, Elf64_Sym *sym); +#ifdef CONFIG_RANDOMIZE_BASE +extern u64 module_alloc_base; +#else +#define module_alloc_base ((u64)_etext - MODULES_VSIZE) +#endif + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 8d971f9c6ed5..49a2430b0786 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -43,6 +43,7 @@ arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o +arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4e69412a7323..319f896c6e74 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -210,6 +210,7 @@ section_table: ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode + mov x23, xzr // KASLR offset, defaults to 0 adrp x24, __PHYS_OFFSET bl set_cpu_boot_mode_flag bl __create_page_tables // x25=TTBR0, x26=TTBR1 @@ -313,7 +314,7 @@ ENDPROC(preserve_boot_args) __create_page_tables: adrp x25, idmap_pg_dir adrp x26, swapper_pg_dir - mov x27, lr + mov x28, lr /* * Invalidate the idmap and swapper page tables to avoid potential @@ -392,6 +393,7 @@ __create_page_tables: */ mov x0, x26 // swapper_pg_dir ldr x5, =KIMAGE_VADDR + add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 ldr w6, kernel_img_size add x6, x6, x5 @@ -408,8 +410,7 @@ __create_page_tables: dmb sy bl __inval_cache_range - mov lr, x27 - ret + ret x28 ENDPROC(__create_page_tables) kernel_img_size: @@ -421,6 +422,7 @@ kernel_img_size: */ .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: + mov x28, lr // preserve LR adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb @@ -449,19 +451,26 @@ __mmap_switched: ldr x13, [x9, #-8] cmp w12, #R_AARCH64_RELATIVE b.ne 1f - str x13, [x11] + add x13, x13, x23 // relocate + str x13, [x11, x23] b 0b 1: cmp w12, #R_AARCH64_ABS64 b.ne 0b add x12, x12, x12, lsl #1 // symtab offset: 24x top word add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word + ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx ldr x15, [x12, #8] // Elf64_Sym::st_value + cmp w14, #-0xf // SHN_ABS (0xfff1) ? + add x14, x15, x23 // relocate + csel x15, x14, x15, ne add x15, x13, x15 - str x15, [x11] + str x15, [x11, x23] b 0b -2: +2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr + dc cvac, x8 // value visible to secondaries + dsb sy // with MMU off #endif adr_l sp, initial_sp, x4 @@ -470,13 +479,23 @@ __mmap_switched: msr sp_el0, x4 // Save thread_info str_l x21, __fdt_pointer, x5 // Save FDT pointer - ldr x4, =KIMAGE_VADDR // Save the offset between + ldr_l x4, kimage_vaddr // Save the offset between sub x4, x4, x24 // the kernel virtual and str_l x4, kimage_voffset, x5 // physical mappings mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init +#endif +#ifdef CONFIG_RANDOMIZE_BASE + cbnz x23, 0f // already running randomized? + mov x0, x21 // pass FDT address in x0 + bl kaslr_early_init // parse FDT for KASLR options + cbz x0, 0f // KASLR disabled? just proceed + mov x23, x0 // record KASLR offset + ret x28 // we must enable KASLR, return + // to __enable_mmu() +0: #endif b start_kernel ENDPROC(__mmap_switched) @@ -486,6 +505,10 @@ ENDPROC(__mmap_switched) * hotplug and needs to have the same protections as the text region */ .section ".text","ax" + +ENTRY(kimage_vaddr) + .quad _text - TEXT_OFFSET + /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. @@ -651,7 +674,7 @@ ENTRY(secondary_startup) adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - ldr x8, =KIMAGE_VADDR + ldr x8, kimage_vaddr ldr w9, 0f sub x27, x8, w9, sxtw // address to jump to after enabling the MMU b __enable_mmu @@ -684,6 +707,7 @@ ENDPROC(__secondary_switched) */ .section ".idmap.text", "ax" __enable_mmu: + mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED @@ -701,6 +725,25 @@ __enable_mmu: ic iallu dsb nsh isb +#ifdef CONFIG_RANDOMIZE_BASE + mov x19, x0 // preserve new SCTLR_EL1 value + blr x27 + + /* + * If we return here, we have a KASLR displacement in x23 which we need + * to take into account by discarding the current kernel mapping and + * creating a new one. + */ + msr sctlr_el1, x18 // disable the MMU + isb + bl __create_page_tables // recreate kernel mapping + + msr sctlr_el1, x19 // re-enable the MMU + isb + ic ialluis // flush instructions fetched + isb // via old mapping + add x27, x27, x23 // relocated __mmap_switched +#endif br x27 ENDPROC(__enable_mmu) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c new file mode 100644 index 000000000000..8b32a1f8f09f --- /dev/null +++ b/arch/arm64/kernel/kaslr.c @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2016 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +u64 __read_mostly module_alloc_base; + +static __init u64 get_kaslr_seed(void *fdt) +{ + int node, len; + u64 *prop; + u64 ret; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return 0; + + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); + if (!prop || len != sizeof(u64)) + return 0; + + ret = fdt64_to_cpu(*prop); + *prop = 0; + return ret; +} + +static __init const u8 *get_cmdline(void *fdt) +{ + static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE; + + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + int node; + const u8 *prop; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + goto out; + + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + goto out; + return prop; + } +out: + return default_cmdline; +} + +extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, + pgprot_t prot); + +/* + * This routine will be executed with the kernel mapped at its default virtual + * address, and if it returns successfully, the kernel will be remapped, and + * start_kernel() will be executed from a randomized virtual offset. The + * relocation will result in all absolute references (e.g., static variables + * containing function pointers) to be reinitialized, and zero-initialized + * .bss variables will be reset to 0. + */ +u64 __init kaslr_early_init(u64 dt_phys) +{ + void *fdt; + u64 seed, offset, mask, module_range; + const u8 *cmdline, *str; + int size; + + /* + * Set a reasonable default for module_alloc_base in case + * we end up running with module randomization disabled. + */ + module_alloc_base = (u64)_etext - MODULES_VSIZE; + + /* + * Try to map the FDT early. If this fails, we simply bail, + * and proceed with KASLR disabled. We will make another + * attempt at mapping the FDT in setup_machine() + */ + early_fixmap_init(); + fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); + if (!fdt) + return 0; + + /* + * Retrieve (and wipe) the seed from the FDT + */ + seed = get_kaslr_seed(fdt); + if (!seed) + return 0; + + /* + * Check if 'nokaslr' appears on the command line, and + * return 0 if that is the case. + */ + cmdline = get_cmdline(fdt); + str = strstr(cmdline, "nokaslr"); + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) + return 0; + + /* + * OK, so we are proceeding with KASLR enabled. Calculate a suitable + * kernel image offset from the seed. Let's place the kernel in the + * lower half of the VMALLOC area (VA_BITS - 2). + * Even if we could randomize at page granularity for 16k and 64k pages, + * let's always round to 2 MB so we don't interfere with the ability to + * map using contiguous PTEs + */ + mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); + offset = seed & mask; + + /* + * The kernel Image should not extend across a 1GB/32MB/512MB alignment + * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this + * happens, increase the KASLR offset by the size of the kernel image. + */ + if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != + (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) + offset = (offset + (u64)(_end - _text)) & mask; + + if (IS_ENABLED(CONFIG_KASAN)) + /* + * KASAN does not expect the module region to intersect the + * vmalloc region, since shadow memory is allocated for each + * module at load time, whereas the vmalloc region is shadowed + * by KASAN zero pages. So keep modules out of the vmalloc + * region if KASAN is enabled. + */ + return offset; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + /* + * Randomize the module region independently from the core + * kernel. This prevents modules from leaking any information + * about the address of the kernel itself, but results in + * branches between modules and the core kernel that are + * resolved via PLTs. (Branches between modules will be + * resolved normally.) + */ + module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE; + module_alloc_base = VMALLOC_START; + } else { + /* + * Randomize the module region by setting module_alloc_base to + * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, + * _stext) . This guarantees that the resulting region still + * covers [_stext, _etext], and that all relative branches can + * be resolved without veneers. + */ + module_range = MODULES_VSIZE - (u64)(_etext - _stext); + module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; + } + + /* use the lower 21 bits to randomize the base of the module region */ + module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; + module_alloc_base &= PAGE_MASK; + + return offset; +} diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index a9dde97f5ca5..7f316982ce00 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -34,7 +34,8 @@ void *module_alloc(unsigned long size) { void *p; - p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, + module_alloc_base + MODULES_VSIZE, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index cfed56f0ad26..42371f69def3 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -388,3 +388,32 @@ static int __init topology_init(void) return 0; } subsys_initcall(topology_init); + +/* + * Dump out kernel offset information on panic. + */ +static int dump_kernel_offset(struct notifier_block *self, unsigned long v, + void *p) +{ + u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR; + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) { + pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n", + kaslr_offset, KIMAGE_VADDR); + } else { + pr_emerg("Kernel Offset: disabled\n"); + } + return 0; +} + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 7f10cc91fa8a..56e19d150c21 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -129,12 +129,16 @@ static void __init clear_pgds(unsigned long start, void __init kasan_init(void) { u64 kimg_shadow_start, kimg_shadow_end; + u64 mod_shadow_start, mod_shadow_end; struct memblock_region *reg; int i; kimg_shadow_start = (u64)kasan_mem_to_shadow(_text); kimg_shadow_end = (u64)kasan_mem_to_shadow(_end); + mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); + mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); + /* * We are going to perform proper setup of shadow memory. * At first we should unmap early shadow (clear_pgds() call bellow). @@ -158,13 +162,20 @@ void __init kasan_init(void) * with PMD table mappings at the edges of the shadow region for the * kernel image. */ - if (ARM64_SWAPPER_USES_SECTION_MAPS) + if (ARM64_SWAPPER_USES_SECTION_MAPS) { + kimg_shadow_start = round_down(kimg_shadow_start, + SWAPPER_BLOCK_SIZE); kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE); + } kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, - kasan_mem_to_shadow((void *)MODULES_VADDR)); + (void *)mod_shadow_start); kasan_populate_zero_shadow((void *)kimg_shadow_end, - kasan_mem_to_shadow((void *)PAGE_OFFSET)); + kasan_mem_to_shadow((void *)PAGE_OFFSET)); + + if (kimg_shadow_start > mod_shadow_end) + kasan_populate_zero_shadow((void *)mod_shadow_end, + (void *)kimg_shadow_start); for_each_memblock(memory, reg) { void *start = (void *)__phys_to_virt(reg->base); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index fb5c872fe3d6..ff0f5a46b552 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -676,7 +676,8 @@ void __init early_fixmap_init(void) unsigned long addr = FIXADDR_START; pgd = pgd_offset_k(addr); - if (CONFIG_PGTABLE_LEVELS > 3 && !pgd_none(*pgd)) { + if (CONFIG_PGTABLE_LEVELS > 3 && + !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) { /* * We only end up here if the kernel mapping and the fixmap * share the top level pgd entry, which should only happen on @@ -733,11 +734,10 @@ void __set_fixmap(enum fixed_addresses idx, } } -void *__init fixmap_remap_fdt(phys_addr_t dt_phys) +void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); - pgprot_t prot = PAGE_KERNEL_RO; - int size, offset; + int offset; void *dt_virt; /* @@ -776,16 +776,27 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) if (fdt_check_header(dt_virt) != 0) return NULL; - size = fdt_totalsize(dt_virt); - if (size > MAX_FDT_SIZE) + *size = fdt_totalsize(dt_virt); + if (*size > MAX_FDT_SIZE) return NULL; - if (offset + size > SWAPPER_BLOCK_SIZE) + if (offset + *size > SWAPPER_BLOCK_SIZE) create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, - round_up(offset + size, SWAPPER_BLOCK_SIZE), prot); + round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot); + + return dt_virt; +} + +void *__init fixmap_remap_fdt(phys_addr_t dt_phys) +{ + void *dt_virt; + int size; + + dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); + if (!dt_virt) + return NULL; memblock_reserve(dt_phys, size); - return dt_virt; } From 98e23ea3a3dd23269a69282291f9bef53e262bc2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 29 Jan 2016 11:59:03 +0100 Subject: [PATCH 783/797] arm64: kaslr: randomize the linear region When KASLR is enabled (CONFIG_RANDOMIZE_BASE=y), and entropy has been provided by the bootloader, randomize the placement of RAM inside the linear region if sufficient space is available. For instance, on a 4KB granule/3 levels kernel, the linear region is 256 GB in size, and we can choose any 1 GB aligned offset that is far enough from the top of the address space to fit the distance between the start of the lowest memblock and the top of the highest memblock. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit c031a4213c11a5db475f528c182f7b3858df11db) Signed-off-by: Alex Shi --- arch/arm64/kernel/kaslr.c | 4 ++++ arch/arm64/mm/init.c | 22 ++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 8b32a1f8f09f..582983920054 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -21,6 +21,7 @@ #include u64 __read_mostly module_alloc_base; +u16 __initdata memstart_offset_seed; static __init u64 get_kaslr_seed(void *fdt) { @@ -123,6 +124,9 @@ u64 __init kaslr_early_init(u64 dt_phys) mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); offset = seed & mask; + /* use the top 16 bits to randomize the linear region */ + memstart_offset_seed = seed >> 48; + /* * The kernel Image should not extend across a 1GB/32MB/512MB alignment * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 2c7a3c2868e4..58a6d3f7525c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -196,6 +196,23 @@ void __init arm64_memblock_init(void) memblock_add(__pa(_text), (u64)(_end - _text)); } + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + extern u16 memstart_offset_seed; + u64 range = linear_region_size - + (memblock_end_of_DRAM() - memblock_start_of_DRAM()); + + /* + * If the size of the linear region exceeds, by a sufficient + * margin, the size of the region that the available physical + * memory spans, randomize the linear region as well. + */ + if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) { + range = range / ARM64_MEMSTART_ALIGN + 1; + memstart_addr -= ARM64_MEMSTART_ALIGN * + ((range * memstart_offset_seed) >> 16); + } + } + /* * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. @@ -365,12 +382,13 @@ void __init mem_init(void) #ifdef CONFIG_SPARSEMEM_VMEMMAP MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE), - MLM((unsigned long)virt_to_page(PAGE_OFFSET), + MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), (unsigned long)virt_to_page(high_memory)), #endif MLK(FIXADDR_START, FIXADDR_TOP), MLM(PCI_IO_START, PCI_IO_END), - MLM(PAGE_OFFSET, (unsigned long)high_memory)); + MLM(__phys_to_virt(memblock_start_of_DRAM()), + (unsigned long)high_memory)); #undef MLK #undef MLM From ee6457583818600e4c9b7f3a09d358d6ae3727b8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 10 Jan 2016 11:29:07 +0100 Subject: [PATCH 784/797] efi: stub: implement efi_get_random_bytes() based on EFI_RNG_PROTOCOL This exposes the firmware's implementation of EFI_RNG_PROTOCOL via a new function efi_get_random_bytes(). Reviewed-by: Matt Fleming Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit e4fbf4767440472f9d23b0f25a2b905e1c63b6a8) Signed-off-by: Alex Shi --- drivers/firmware/efi/libstub/Makefile | 3 ++- drivers/firmware/efi/libstub/efistub.h | 3 +++ drivers/firmware/efi/libstub/random.c | 35 ++++++++++++++++++++++++++ include/linux/efi.h | 6 ++++- 4 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 drivers/firmware/efi/libstub/random.c diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index c0ddd1b8dca3..c4098748e1fe 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -34,7 +34,8 @@ $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o string.o \ $(patsubst %.c,lib-%.o,$(arm-deps)) -lib-$(CONFIG_ARM64) += arm64-stub.o +lib-$(CONFIG_ARM) += arm32-stub.o +lib-$(CONFIG_ARM64) += arm64-stub.o random.o CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) # diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 6b6548fda089..206b7252b9d1 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -43,4 +43,7 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, unsigned long desc_size, efi_memory_desc_t *runtime_map, int *count); +efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table, + unsigned long size, u8 *out); + #endif diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c new file mode 100644 index 000000000000..97941ee5954f --- /dev/null +++ b/drivers/firmware/efi/libstub/random.c @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2016 Linaro Ltd; + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include "efistub.h" + +struct efi_rng_protocol { + efi_status_t (*get_info)(struct efi_rng_protocol *, + unsigned long *, efi_guid_t *); + efi_status_t (*get_rng)(struct efi_rng_protocol *, + efi_guid_t *, unsigned long, u8 *out); +}; + +efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg, + unsigned long size, u8 *out) +{ + efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID; + efi_status_t status; + struct efi_rng_protocol *rng; + + status = efi_call_early(locate_protocol, &rng_proto, NULL, + (void **)&rng); + if (status != EFI_SUCCESS) + return status; + + return rng->get_rng(rng, NULL, size, out); +} diff --git a/include/linux/efi.h b/include/linux/efi.h index 47be3ad7d3e5..333d0ca6940f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -299,7 +299,7 @@ typedef struct { void *open_protocol_information; void *protocols_per_handle; void *locate_handle_buffer; - void *locate_protocol; + efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **); void *install_multiple_protocol_interfaces; void *uninstall_multiple_protocol_interfaces; void *calculate_crc32; @@ -599,6 +599,10 @@ void efi_native_runtime_setup(void); #define EFI_PROPERTIES_TABLE_GUID \ EFI_GUID( 0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 ) +#define EFI_RNG_PROTOCOL_GUID \ + EFI_GUID(0x3152bca5, 0xeade, 0x433d, \ + 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44) + typedef struct { efi_guid_t guid; u64 table; From 0f01a865b4feb17ff014717ed2745a845e0c0ee3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 11 Jan 2016 10:43:16 +0100 Subject: [PATCH 785/797] efi: stub: add implementation of efi_random_alloc() This implements efi_random_alloc(), which allocates a chunk of memory of a certain size at a certain alignment, and uses the random_seed argument it receives to randomize the address of the allocation. This is implemented by iterating over the UEFI memory map, counting the number of suitable slots (aligned offsets) within each region, and picking a random number between 0 and 'number of slots - 1' to select the slot, This should guarantee that each possible offset is chosen equally likely. Suggested-by: Kees Cook Reviewed-by: Matt Fleming Reviewed-by: Kees Cook Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 2ddbfc81eac84a299cb4747a8764bc43f23e9008) Signed-off-by: Alex Shi --- drivers/firmware/efi/libstub/efistub.h | 4 + drivers/firmware/efi/libstub/random.c | 100 +++++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 206b7252b9d1..5ed3d3f38166 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -46,4 +46,8 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table, unsigned long size, u8 *out); +efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, + unsigned long size, unsigned long align, + unsigned long *addr, unsigned long random_seed); + #endif diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 97941ee5954f..53f6d3fe6d86 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -33,3 +33,103 @@ efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg, return rng->get_rng(rng, NULL, size, out); } + +/* + * Return the number of slots covered by this entry, i.e., the number of + * addresses it covers that are suitably aligned and supply enough room + * for the allocation. + */ +static unsigned long get_entry_num_slots(efi_memory_desc_t *md, + unsigned long size, + unsigned long align) +{ + u64 start, end; + + if (md->type != EFI_CONVENTIONAL_MEMORY) + return 0; + + start = round_up(md->phys_addr, align); + end = round_down(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - size, + align); + + if (start > end) + return 0; + + return (end - start + 1) / align; +} + +/* + * The UEFI memory descriptors have a virtual address field that is only used + * when installing the virtual mapping using SetVirtualAddressMap(). Since it + * is unused here, we can reuse it to keep track of each descriptor's slot + * count. + */ +#define MD_NUM_SLOTS(md) ((md)->virt_addr) + +efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, + unsigned long size, + unsigned long align, + unsigned long *addr, + unsigned long random_seed) +{ + unsigned long map_size, desc_size, total_slots = 0, target_slot; + efi_status_t status; + efi_memory_desc_t *memory_map; + int map_offset; + + status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size, + &desc_size, NULL, NULL); + if (status != EFI_SUCCESS) + return status; + + if (align < EFI_ALLOC_ALIGN) + align = EFI_ALLOC_ALIGN; + + /* count the suitable slots in each memory map entry */ + for (map_offset = 0; map_offset < map_size; map_offset += desc_size) { + efi_memory_desc_t *md = (void *)memory_map + map_offset; + unsigned long slots; + + slots = get_entry_num_slots(md, size, align); + MD_NUM_SLOTS(md) = slots; + total_slots += slots; + } + + /* find a random number between 0 and total_slots */ + target_slot = (total_slots * (u16)random_seed) >> 16; + + /* + * target_slot is now a value in the range [0, total_slots), and so + * it corresponds with exactly one of the suitable slots we recorded + * when iterating over the memory map the first time around. + * + * So iterate over the memory map again, subtracting the number of + * slots of each entry at each iteration, until we have found the entry + * that covers our chosen slot. Use the residual value of target_slot + * to calculate the randomly chosen address, and allocate it directly + * using EFI_ALLOCATE_ADDRESS. + */ + for (map_offset = 0; map_offset < map_size; map_offset += desc_size) { + efi_memory_desc_t *md = (void *)memory_map + map_offset; + efi_physical_addr_t target; + unsigned long pages; + + if (target_slot >= MD_NUM_SLOTS(md)) { + target_slot -= MD_NUM_SLOTS(md); + continue; + } + + target = round_up(md->phys_addr, align) + target_slot * align; + pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + + status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, pages, &target); + if (status == EFI_SUCCESS) + *addr = target; + break; + } + + efi_call_early(free_pool, memory_map); + + return status; +} From 4a9c1460b2b904c4a9b6438a14d10c56e0e9ab78 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 11 Jan 2016 11:47:49 +0100 Subject: [PATCH 786/797] efi: stub: use high allocation for converted command line Before we can move the command line processing before the allocation of the kernel, which is required for detecting the 'nokaslr' option which controls that allocation, move the converted command line higher up in memory, to prevent it from interfering with the kernel itself. Since x86 needs the address to fit in 32 bits, use UINT_MAX as the upper bound there. Otherwise, use ULONG_MAX (i.e., no limit) Reviewed-by: Matt Fleming Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 48fcb2d0216103d15306caa4814e2381104df6d8) Signed-off-by: Alex Shi --- arch/x86/include/asm/efi.h | 2 ++ drivers/firmware/efi/libstub/efi-stub-helper.c | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0010c78c4998..08b1f2f6ea50 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -25,6 +25,8 @@ #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" +#define MAX_CMDLINE_ADDRESS UINT_MAX + #ifdef CONFIG_X86_32 diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index f07d4a67fa76..29ed2f9b218c 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -649,6 +649,10 @@ static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n) return dst; } +#ifndef MAX_CMDLINE_ADDRESS +#define MAX_CMDLINE_ADDRESS ULONG_MAX +#endif + /* * Convert the unicode UEFI command line to ASCII to pass to kernel. * Size of memory allocated return in *cmd_line_len. @@ -684,7 +688,8 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, options_bytes++; /* NUL termination */ - status = efi_low_alloc(sys_table_arg, options_bytes, 0, &cmdline_addr); + status = efi_high_alloc(sys_table_arg, options_bytes, 0, + &cmdline_addr, MAX_CMDLINE_ADDRESS); if (status != EFI_SUCCESS) return NULL; From e009472925ee90986397518ef6796e6f8d12e1da Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 26 Jan 2016 14:48:29 +0100 Subject: [PATCH 787/797] arm64: efi: invoke EFI_RNG_PROTOCOL to supply KASLR randomness Since arm64 does not use a decompressor that supplies an execution environment where it is feasible to some extent to provide a source of randomness, the arm64 KASLR kernel depends on the bootloader to supply some random bits in the /chosen/kaslr-seed DT property upon kernel entry. On UEFI systems, we can use the EFI_RNG_PROTOCOL, if supplied, to obtain some random bits. At the same time, use it to randomize the offset of the kernel Image in physical memory. Reviewed-by: Matt Fleming Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 2b5fe07a78a09a32002642b8a823428ade611f16) Signed-off-by: Alex Shi --- arch/arm64/Kconfig | 5 ++ drivers/firmware/efi/libstub/arm-stub.c | 40 +++++++---- drivers/firmware/efi/libstub/arm64-stub.c | 84 ++++++++++++++++------- drivers/firmware/efi/libstub/fdt.c | 14 ++++ 4 files changed, 105 insertions(+), 38 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b311ac23c989..97583a1878db 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -760,6 +760,11 @@ config RANDOMIZE_BASE It is the bootloader's job to provide entropy, by passing a random u64 value in /chosen/kaslr-seed at kernel entry. + When booting via the UEFI stub, it will invoke the firmware's + EFI_RNG_PROTOCOL implementation (if available) to supply entropy + to the kernel proper. In addition, it will randomise the physical + location of the kernel Image as well. + If unsure, say N. config RANDOMIZE_MODULE_REGION_FULL diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 950c87f5d279..d5aa1d16154f 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -18,6 +18,8 @@ #include "efistub.h" +bool __nokaslr; + static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg) { static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID; @@ -207,14 +209,6 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, pr_efi_err(sys_table, "Failed to find DRAM base\n"); goto fail; } - status = handle_kernel_image(sys_table, image_addr, &image_size, - &reserve_addr, - &reserve_size, - dram_base, image); - if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Failed to relocate kernel\n"); - goto fail; - } /* * Get the command line from EFI, using the LOADED_IMAGE @@ -224,7 +218,28 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, cmdline_ptr = efi_convert_cmdline(sys_table, image, &cmdline_size); if (!cmdline_ptr) { pr_efi_err(sys_table, "getting command line via LOADED_IMAGE_PROTOCOL\n"); - goto fail_free_image; + goto fail; + } + + /* check whether 'nokaslr' was passed on the command line */ + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + static const u8 default_cmdline[] = CONFIG_CMDLINE; + const u8 *str, *cmdline = cmdline_ptr; + + if (IS_ENABLED(CONFIG_CMDLINE_FORCE)) + cmdline = default_cmdline; + str = strstr(cmdline, "nokaslr"); + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) + __nokaslr = true; + } + + status = handle_kernel_image(sys_table, image_addr, &image_size, + &reserve_addr, + &reserve_size, + dram_base, image); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Failed to relocate kernel\n"); + goto fail_free_cmdline; } status = efi_parse_options(cmdline_ptr); @@ -244,7 +259,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, if (status != EFI_SUCCESS) { pr_efi_err(sys_table, "Failed to load device tree!\n"); - goto fail_free_cmdline; + goto fail_free_image; } } @@ -286,12 +301,11 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, efi_free(sys_table, initrd_size, initrd_addr); efi_free(sys_table, fdt_size, fdt_addr); -fail_free_cmdline: - efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr); - fail_free_image: efi_free(sys_table, image_size, *image_addr); efi_free(sys_table, reserve_size, reserve_addr); +fail_free_cmdline: + efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr); fail: return EFI_ERROR; } diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 78dfbd34b6bf..e0e6b74fef8f 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -13,6 +13,10 @@ #include #include +#include "efistub.h" + +extern bool __nokaslr; + efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, unsigned long *image_addr, unsigned long *image_size, @@ -23,26 +27,52 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, { efi_status_t status; unsigned long kernel_size, kernel_memsize = 0; - unsigned long nr_pages; void *old_image_addr = (void *)*image_addr; unsigned long preferred_offset; + u64 phys_seed = 0; + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + if (!__nokaslr) { + status = efi_get_random_bytes(sys_table_arg, + sizeof(phys_seed), + (u8 *)&phys_seed); + if (status == EFI_NOT_FOUND) { + pr_efi(sys_table_arg, "EFI_RNG_PROTOCOL unavailable, no randomness supplied\n"); + } else if (status != EFI_SUCCESS) { + pr_efi_err(sys_table_arg, "efi_get_random_bytes() failed\n"); + return status; + } + } else { + pr_efi(sys_table_arg, "KASLR disabled on kernel command line\n"); + } + } /* * The preferred offset of the kernel Image is TEXT_OFFSET bytes beyond * a 2 MB aligned base, which itself may be lower than dram_base, as * long as the resulting offset equals or exceeds it. */ - preferred_offset = round_down(dram_base, SZ_2M) + TEXT_OFFSET; + preferred_offset = round_down(dram_base, MIN_KIMG_ALIGN) + TEXT_OFFSET; if (preferred_offset < dram_base) - preferred_offset += SZ_2M; + preferred_offset += MIN_KIMG_ALIGN; - /* Relocate the image, if required. */ kernel_size = _edata - _text; - if (*image_addr != preferred_offset) { - kernel_memsize = kernel_size + (_end - _edata); + kernel_memsize = kernel_size + (_end - _edata); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) { /* - * First, try a straight allocation at the preferred offset. + * If KASLR is enabled, and we have some randomness available, + * locate the kernel at a randomized offset in physical memory. + */ + *reserve_size = kernel_memsize + TEXT_OFFSET; + status = efi_random_alloc(sys_table_arg, *reserve_size, + MIN_KIMG_ALIGN, reserve_addr, + phys_seed); + + *image_addr = *reserve_addr + TEXT_OFFSET; + } else { + /* + * Else, try a straight allocation at the preferred offset. * This will work around the issue where, if dram_base == 0x0, * efi_low_alloc() refuses to allocate at 0x0 (to prevent the * address of the allocation to be mistaken for a FAIL return @@ -52,27 +82,31 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, * Mustang), we can still place the kernel at the address * 'dram_base + TEXT_OFFSET'. */ - *image_addr = *reserve_addr = preferred_offset; - nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) / - EFI_PAGE_SIZE; - status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, nr_pages, - (efi_physical_addr_t *)reserve_addr); - if (status != EFI_SUCCESS) { - kernel_memsize += TEXT_OFFSET; - status = efi_low_alloc(sys_table_arg, kernel_memsize, - SZ_2M, reserve_addr); + if (*image_addr == preferred_offset) + return EFI_SUCCESS; - if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); - return status; - } - *image_addr = *reserve_addr + TEXT_OFFSET; - } - memcpy((void *)*image_addr, old_image_addr, kernel_size); - *reserve_size = kernel_memsize; + *image_addr = *reserve_addr = preferred_offset; + *reserve_size = round_up(kernel_memsize, EFI_ALLOC_ALIGN); + + status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, + *reserve_size / EFI_PAGE_SIZE, + (efi_physical_addr_t *)reserve_addr); } + if (status != EFI_SUCCESS) { + *reserve_size = kernel_memsize + TEXT_OFFSET; + status = efi_low_alloc(sys_table_arg, *reserve_size, + MIN_KIMG_ALIGN, reserve_addr); + + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); + *reserve_size = 0; + return status; + } + *image_addr = *reserve_addr + TEXT_OFFSET; + } + memcpy((void *)*image_addr, old_image_addr, kernel_size); return EFI_SUCCESS; } diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index b62e2f5dcab3..b1c22cf18f7d 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -147,6 +147,20 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, if (status) goto fdt_set_fail; + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + efi_status_t efi_status; + + efi_status = efi_get_random_bytes(sys_table, sizeof(fdt_val64), + (u8 *)&fdt_val64); + if (efi_status == EFI_SUCCESS) { + status = fdt_setprop(fdt, node, "kaslr-seed", + &fdt_val64, sizeof(fdt_val64)); + if (status) + goto fdt_set_fail; + } else if (efi_status != EFI_NOT_FOUND) { + return efi_status; + } + } return EFI_SUCCESS; fdt_set_fail: From 885af350336f3e3256999234cd4948210df6c946 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 11 Feb 2016 13:53:10 -0800 Subject: [PATCH 788/797] arm64: make irq_stack_ptr more robust Switching between stacks is only valid if we are tracing ourselves while on the irq_stack, so it is only valid when in current and non-preemptible context, otherwise is is just zeroed off. Fixes: 132cd887b5c5 ("arm64: Modify stack trace and dump for use with irq_stack") Acked-by: James Morse Tested-by: James Morse Signed-off-by: Yang Shi Signed-off-by: Will Deacon (cherry picked from commit a80a0eb70c358f8c7dda4bb62b2278dc6285217b) Signed-off-by: Alex Shi --- arch/arm64/kernel/stacktrace.c | 13 ++++++------- arch/arm64/kernel/traps.c | 11 ++++++++++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 4fad9787ab46..cfd46c227c8c 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -44,14 +44,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) unsigned long irq_stack_ptr; /* - * Use raw_smp_processor_id() to avoid false-positives from - * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping - * task stacks, we can be pre-empted in this case, so - * {raw_,}smp_processor_id() may give us the wrong value. Sleeping - * tasks can't ever be on an interrupt stack, so regardless of cpu, - * the checks will always fail. + * Switching between stacks is valid when tracing current and in + * non-preemptible context. */ - irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id()); + if (tsk == current && !preemptible()) + irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + else + irq_stack_ptr = 0; low = frame->sp; /* irq stacks are not THREAD_SIZE aligned */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index cbedd724f48e..c5392081b49b 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -146,9 +146,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; - unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + unsigned long irq_stack_ptr; int skip; + /* + * Switching between stacks is valid when tracing current and in + * non-preemptible context. + */ + if (tsk == current && !preemptible()) + irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + else + irq_stack_ptr = 0; + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); if (!tsk) From 37829fdb8c27a5a506cb535db156c917a6e0061a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Mar 2016 15:22:55 +0000 Subject: [PATCH 789/797] arm64: hugetlb: partial revert of 66b3923a1a0f Commit 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit") introduced support for huge pages using the contiguous bit in the PTE as opposed to block mappings, which may be slightly unwieldy (512M) in 64k page configurations. Unfortunately, this support has resulted in some late regressions when running the libhugetlbfs test suite with 64k pages and CONFIG_DEBUG_VM as a result of a BUG: | readback (2M: 64): ------------[ cut here ]------------ | kernel BUG at fs/hugetlbfs/inode.c:446! | Internal error: Oops - BUG: 0 [#1] SMP | Modules linked in: | CPU: 7 PID: 1448 Comm: readback Not tainted 4.5.0-rc7 #148 | Hardware name: linux,dummy-virt (DT) | task: fffffe0040964b00 ti: fffffe00c2668000 task.ti: fffffe00c2668000 | PC is at remove_inode_hugepages+0x44c/0x480 | LR is at remove_inode_hugepages+0x264/0x480 Rather than revert the entire patch, simply avoid advertising the contiguous huge page sizes for now while people are actively working on a fix. This patch can then be reverted once things have been sorted out. Cc: David Woods Reported-by: Steve Capper Signed-off-by: Will Deacon (cherry picked from commit ff7925848b50050732ac0401e0acf27e8b241d7b) Signed-off-by: Alex Shi --- arch/arm64/mm/hugetlbpage.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 82d607c3614e..da30529bb1f6 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -306,10 +306,6 @@ static __init int setup_hugepagesz(char *opt) hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); - } else if (ps == (PAGE_SIZE * CONT_PTES)) { - hugetlb_add_hstate(CONT_PTE_SHIFT); - } else if (ps == (PMD_SIZE * CONT_PMDS)) { - hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); } else { pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); return 0; @@ -317,13 +313,3 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); - -#ifdef CONFIG_ARM64_64K_PAGES -static __init int add_default_hugepagesz(void) -{ - if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) - hugetlb_add_hstate(CONT_PMD_SHIFT); - return 0; -} -arch_initcall(add_default_hugepagesz); -#endif From f2971e0e6c42c2b0197e43280ef6a48d8a46097e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 15 Mar 2016 11:22:57 +0000 Subject: [PATCH 790/797] arm64: fix KASLR boot-time I-cache maintenance Commit f80fb3a3d50843a4 ("arm64: add support for kernel ASLR") missed a DSB necessary to complete I-cache maintenance in the primary boot path, and hence stale instructions may still be present in the I-cache and may be executed until the I-cache maintenance naturally completes. Since commit 8ec41987436d566f ("arm64: mm: ensure patched kernel text is fetched from PoU"), all CPUs invalidate their I-caches after their MMU is enabled. Prior a CPU's MMU having been enabled, arbitrary lines may have been fetched from the PoC into I-caches. We never patch text expected to be executed with the MMU off. Thus, it is unnecessary to perform broadcast I-cache maintenance in the primary boot path. This patch reduces the scope of the I-cache maintenance to the local CPU, and adds the missing DSB with similar scope, matching prior maintenance in the primary boot path. Signed-off-by: Mark Rutland Acked-by: Ard Biesehvuel Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit b90b4a608ea2401cc491828f7a385edd2e236e37) Signed-off-by: Alex Shi --- arch/arm64/kernel/head.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 319f896c6e74..a88a15447c3b 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -740,8 +740,9 @@ __enable_mmu: msr sctlr_el1, x19 // re-enable the MMU isb - ic ialluis // flush instructions fetched - isb // via old mapping + ic iallu // flush instructions fetched + dsb nsh // via old mapping + isb add x27, x27, x23 // relocated __mmap_switched #endif br x27 From 2426266ca6318722160645a720bd02bece8400c7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 22 Mar 2016 10:11:45 +0000 Subject: [PATCH 791/797] arm64: consistently use p?d_set_huge Commit 324420bf91f60582 ("arm64: add support for ioremap() block mappings") added new p?d_set_huge functions which do the hard work to generate and set a correct block entry. These differ from open-coded huge page creation in the early page table code by explicitly setting the P?D_TYPE_SECT bits (which are implicitly retained by mk_sect_prot() for any valid prot), but are otherwise identical (and cannot fail on arm64). For simplicity and consistency, make use of these in the initial page table creation code. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit c661cb1c537e2364bfdabb298fb934fd77445e98) Signed-off-by: Alex Shi --- arch/arm64/mm/mmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ff0f5a46b552..41421c724fb9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -211,8 +211,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, if (((addr | next | phys) & ~SECTION_MASK) == 0 && block_mappings_allowed(pgtable_alloc)) { pmd_t old_pmd =*pmd; - set_pmd(pmd, __pmd(phys | - pgprot_val(mk_sect_prot(prot)))); + pmd_set_huge(pmd, phys, prot); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. @@ -272,8 +271,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, if (use_1G_block(addr, next, phys) && block_mappings_allowed(pgtable_alloc)) { pud_t old_pud = *pud; - set_pud(pud, __pud(phys | - pgprot_val(mk_sect_prot(prot)))); + pud_set_huge(pud, phys, prot); /* * If we have an old value for a pud, it will From 9ca29910090bc04686fbed05306131093da667f1 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 10 Mar 2016 18:41:16 +0000 Subject: [PATCH 792/797] arm64: kasan: Fix zero shadow mapping overriding kernel image shadow With the 16KB and 64KB page size configurations, SWAPPER_BLOCK_SIZE is PAGE_SIZE and ARM64_SWAPPER_USES_SECTION_MAPS is 0. Since kimg_shadow_end is not page aligned (_end shifted by KASAN_SHADOW_SCALE_SHIFT), the edges of previously mapped kernel image shadow via vmemmap_populate() may be overridden by subsequent calls to kasan_populate_zero_shadow(), leading to kernel panics like below: ------------------------------------------------------------------------------ Unable to handle kernel paging request at virtual address fffffc100135068c pgd = fffffc8009ac0000 [fffffc100135068c] *pgd=00000009ffee0003, *pud=00000009ffee0003, *pmd=00000009ffee0003, *pte=00e0000081a00793 Internal error: Oops: 9600004f [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.5.0-rc4+ #1984 Hardware name: Juno (DT) task: fffffe09001a0000 ti: fffffe0900200000 task.ti: fffffe0900200000 PC is at __memset+0x4c/0x200 LR is at kasan_unpoison_shadow+0x34/0x50 pc : [] lr : [] pstate: 00000245 sp : fffffe0900203db0 x29: fffffe0900203db0 x28: 0000000000000000 x27: 0000000000000000 x26: 0000000000000000 x25: fffffc80099b69d0 x24: 0000000000000001 x23: 0000000000000000 x22: 0000000000002000 x21: dffffc8000000000 x20: 1fffff9001350a8c x19: 0000000000002000 x18: 0000000000000008 x17: 0000000000000147 x16: ffffffffffffffff x15: 79746972100e041d x14: ffffff0000000000 x13: ffff000000000000 x12: 0000000000000000 x11: 0101010101010101 x10: 1fffffc11c000000 x9 : 0000000000000000 x8 : fffffc100135068c x7 : 0000000000000000 x6 : 000000000000003f x5 : 0000000000000040 x4 : 0000000000000004 x3 : fffffc100134f651 x2 : 0000000000000400 x1 : 0000000000000000 x0 : fffffc100135068c Process swapper/0 (pid: 1, stack limit = 0xfffffe0900200020) Call trace: [] __memset+0x4c/0x200 [] __asan_register_globals+0x5c/0xb0 [] _GLOBAL__sub_I_65535_1_sunrpc_cache_lookup+0x1c/0x28 [] kernel_init_freeable+0x104/0x274 [] kernel_init+0x10/0xf8 [] ret_from_fork+0x10/0x50 ------------------------------------------------------------------------------ This patch aligns kimg_shadow_start and kimg_shadow_end to SWAPPER_BLOCK_SIZE in all configurations. Fixes: f9040773b7bb ("arm64: move kernel image to base of vmalloc area") Signed-off-by: Catalin Marinas Acked-by: Mark Rutland Acked-by: Ard Biesheuvel (cherry picked from commit 2776e0e8ef683a42fe3e9a5facf576b73579700e) Signed-off-by: Alex Shi --- arch/arm64/mm/kasan_init.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 56e19d150c21..206dd95ea292 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -158,15 +158,12 @@ void __init kasan_init(void) * vmemmap_populate() has populated the shadow region that covers the * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent - * kasan_populate_zero_shadow() from replacing the PMD block mappings - * with PMD table mappings at the edges of the shadow region for the - * kernel image. + * kasan_populate_zero_shadow() from replacing the page table entries + * (PMD or PTE) at the edges of the shadow region for the kernel + * image. */ - if (ARM64_SWAPPER_USES_SECTION_MAPS) { - kimg_shadow_start = round_down(kimg_shadow_start, - SWAPPER_BLOCK_SIZE); - kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE); - } + kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE); + kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE); kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, (void *)mod_shadow_start); From 3d3fe7cf1fbc97ea285fd1ee3ed55b7527e4d1ae Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 2 Mar 2016 09:47:13 +0100 Subject: [PATCH 793/797] arm64: mm: check at build time that PAGE_OFFSET divides the VA space evenly Commit 8439e62a1561 ("arm64: mm: use bit ops rather than arithmetic in pa/va translations") changed the boundary check against PAGE_OFFSET from an arithmetic comparison to a bit test. This means we now silently assume that PAGE_OFFSET is a power of 2 that divides the kernel virtual address space into two equal halves. So make that assumption explicit. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 6d2aa549de1fc998581d216de3853aa131aa4446) Signed-off-by: Alex Shi --- arch/arm64/mm/init.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 58a6d3f7525c..19ccdb73c680 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -170,6 +170,13 @@ void __init arm64_memblock_init(void) { const s64 linear_region_size = -(s64)PAGE_OFFSET; + /* + * Ensure that the linear region takes up exactly half of the kernel + * virtual address space. This way, we can distinguish a linear address + * from a kernel/module/vmalloc address by testing a single bit. + */ + BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1)); + /* * Select a suitable value for the base of physical memory. */ From 27fa6e51b8ddfd0224dad957b85fd8097caa5978 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 25 Feb 2016 20:48:53 +0100 Subject: [PATCH 794/797] arm64: lse: deal with clobbered IP registers after branch via PLT The LSE atomics implementation uses runtime patching to patch in calls to out of line non-LSE atomics implementations on cores that lack hardware support for LSE. To avoid paying the overhead cost of a function call even if no call ends up being made, the bl instruction is kept invisible to the compiler, and the out of line implementations preserve all registers, not just the ones that they are required to preserve as per the AAPCS64. However, commit fd045f6cd98e ("arm64: add support for module PLTs") added support for routing branch instructions via veneers if the branch target offset exceeds the range of the ordinary relative branch instructions. Since this deals with jump and call instructions that are exposed to ELF relocations, the PLT code uses x16 to hold the address of the branch target when it performs an indirect branch-to-register, something which is explicitly allowed by the AAPCS64 (and ordinary compiler generated code does not expect register x16 or x17 to retain their values across a bl instruction). Since the lse runtime patched bl instructions don't adhere to the AAPCS64, they don't deal with this clobbering of registers x16 and x17. So add them to the clobber list of the asm() statements that perform the call instructions, and drop x16 and x17 from the list of registers that are callee saved in the out of line non-LSE implementations. In addition, since we have given these functions two scratch registers, they no longer need to stack/unstack temp registers. Signed-off-by: Ard Biesheuvel [will: factored clobber list into #define, updated Makefile comment] Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 5be8b70af1ca78cefb8b756d157532360a5fd663) Signed-off-by: Alex Shi --- arch/arm64/include/asm/atomic_lse.h | 38 ++++++++++++++--------------- arch/arm64/include/asm/lse.h | 1 + arch/arm64/lib/Makefile | 13 +++++----- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 197e06afbf71..39c1d340fec5 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -36,7 +36,7 @@ static inline void atomic_andnot(int i, atomic_t *v) " stclr %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_or(int i, atomic_t *v) @@ -48,7 +48,7 @@ static inline void atomic_or(int i, atomic_t *v) " stset %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_xor(int i, atomic_t *v) @@ -60,7 +60,7 @@ static inline void atomic_xor(int i, atomic_t *v) " steor %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_add(int i, atomic_t *v) @@ -72,7 +72,7 @@ static inline void atomic_add(int i, atomic_t *v) " stadd %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ @@ -90,7 +90,7 @@ static inline int atomic_add_return##name(int i, atomic_t *v) \ " add %w[i], %w[i], w30") \ : [i] "+r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return w0; \ } @@ -116,7 +116,7 @@ static inline void atomic_and(int i, atomic_t *v) " stclr %w[i], %[v]") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_sub(int i, atomic_t *v) @@ -133,7 +133,7 @@ static inline void atomic_sub(int i, atomic_t *v) " stadd %w[i], %[v]") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \ @@ -153,7 +153,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ " add %w[i], %w[i], w30") \ : [i] "+r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS , ##cl); \ \ return w0; \ } @@ -177,7 +177,7 @@ static inline void atomic64_andnot(long i, atomic64_t *v) " stclr %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_or(long i, atomic64_t *v) @@ -189,7 +189,7 @@ static inline void atomic64_or(long i, atomic64_t *v) " stset %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_xor(long i, atomic64_t *v) @@ -201,7 +201,7 @@ static inline void atomic64_xor(long i, atomic64_t *v) " steor %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_add(long i, atomic64_t *v) @@ -213,7 +213,7 @@ static inline void atomic64_add(long i, atomic64_t *v) " stadd %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ @@ -231,7 +231,7 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ " add %[i], %[i], x30") \ : [i] "+r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } @@ -257,7 +257,7 @@ static inline void atomic64_and(long i, atomic64_t *v) " stclr %[i], %[v]") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_sub(long i, atomic64_t *v) @@ -274,7 +274,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) " stadd %[i], %[v]") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ @@ -294,7 +294,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ " add %[i], %[i], x30") \ : [i] "+r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } @@ -330,7 +330,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) "2:") : [ret] "+&r" (x0), [v] "+Q" (v->counter) : - : "x30", "cc", "memory"); + : __LL_SC_CLOBBERS, "cc", "memory"); return x0; } @@ -359,7 +359,7 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ " mov %" #w "[ret], " #w "30") \ : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \ : [old] "r" (x1), [new] "r" (x2) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } @@ -416,7 +416,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ [v] "+Q" (*(unsigned long *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 3de42d68611d..23acc00be32d 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -26,6 +26,7 @@ __asm__(".arch_extension lse"); /* Macro for constructing calls to out-of-line ll/sc atomics */ #define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n" +#define __LL_SC_CLOBBERS "x16", "x17", "x30" /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 1a811ecf71da..c86b7909ef31 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -4,15 +4,16 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ strchr.o strrchr.o -# Tell the compiler to treat all general purpose registers as -# callee-saved, which allows for efficient runtime patching of the bl -# instruction in the caller with an atomic instruction when supported by -# the CPU. Result and argument registers are handled correctly, based on -# the function prototype. +# Tell the compiler to treat all general purpose registers (with the +# exception of the IP registers, which are already handled by the caller +# in case of a PLT) as callee-saved, which allows for efficient runtime +# patching of the bl instruction in the caller with an atomic instruction +# when supported by the CPU. Result and argument registers are handled +# correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \ - -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18 + -fcall-saved-x18 From b78c702db9fefface6f68cfade7a1afbadd829ab Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 26 Feb 2016 17:57:14 +0100 Subject: [PATCH 795/797] arm64: mm: treat memstart_addr as a signed quantity Commit c031a4213c11 ("arm64: kaslr: randomize the linear region") implements randomization of the linear region, by subtracting a random multiple of PUD_SIZE from memstart_addr. This causes the virtual mapping of system RAM to move upwards in the linear region, and at the same time causes memstart_addr to assume a value which may be negative if the offset of system RAM in the physical space is smaller than its offset relative to PAGE_OFFSET in the virtual space. Since memstart_addr is effectively an offset now, redefine its type as s64 so that expressions involving shifting or division preserve its sign. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas (cherry picked from commit 020d044f66874eba058ce8264fc550f3eca67879) Signed-off-by: Alex Shi --- arch/arm64/include/asm/memory.h | 2 +- arch/arm64/mm/init.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 5f8667a99e41..12f8a00fb3f1 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -135,7 +135,7 @@ #include #include -extern phys_addr_t memstart_addr; +extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 19ccdb73c680..9db46dfb6afb 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -54,7 +54,7 @@ * executes, which assigns it its actual value. So use a default value * that cannot be mistaken for a real physical address. */ -phys_addr_t memstart_addr __read_mostly = ~0ULL; +s64 memstart_addr __read_mostly = -1; phys_addr_t arm64_dma_phys_limit __read_mostly; #ifdef CONFIG_BLK_DEV_INITRD @@ -188,7 +188,7 @@ void __init arm64_memblock_init(void) * linear mapping. Take care not to clip the kernel which may be * high in memory. */ - memblock_remove(max(memstart_addr + linear_region_size, __pa(_end)), + memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)), ULLONG_MAX); if (memblock_end_of_DRAM() > linear_region_size) memblock_remove(0, memblock_end_of_DRAM() - linear_region_size); From 200d9e78dba04ae2a5ee4b847f389758db5152dd Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 10 Mar 2016 18:30:56 +0000 Subject: [PATCH 796/797] arm64: kasan: Use actual memory node when populating the kernel image shadow With the 16KB or 64KB page configurations, the generic vmemmap_populate() implementation warns on potential offnode page_structs via vmemmap_verify() because the arm64 kasan_init() passes NUMA_NO_NODE instead of the actual node for the kernel image memory. Fixes: f9040773b7bb ("arm64: move kernel image to base of vmalloc area") Signed-off-by: Catalin Marinas Reported-by: James Morse Acked-by: Ard Biesheuvel Acked-by: Mark Rutland (cherry picked from commit 2f76969f2eef051bdd63d38b08d78e790440b0ad) Signed-off-by: Alex Shi --- arch/arm64/mm/kasan_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 206dd95ea292..757009daa9ed 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -152,7 +152,8 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); - vmemmap_populate(kimg_shadow_start, kimg_shadow_end, NUMA_NO_NODE); + vmemmap_populate(kimg_shadow_start, kimg_shadow_end, + pfn_to_nid(virt_to_pfn(_text))); /* * vmemmap_populate() has populated the shadow region that covers the From 5dd612ebfad71f5463d89ff92d1bc307cd286b5d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 23 Mar 2016 16:00:46 +0100 Subject: [PATCH 797/797] parisc: Use generic extable search and sort routines Switch to the generic extable search and sort routines which were introduced with commit a272858 from Ard Biesheuvel. This saves quite some memory in the vmlinux binary with the 64bit kernel. Signed-off-by: Helge Deller (cherry picked from commit 0de798584bdedfdad19db21e3c7aec84f252f4f3) Signed-off-by: Alex Shi --- arch/parisc/Kconfig | 1 + arch/parisc/include/asm/assembly.h | 2 +- arch/parisc/include/asm/uaccess.h | 7 ++++--- arch/parisc/mm/fault.c | 9 ++------- scripts/sortextable.c | 1 + 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 729f89163bc3..d2256fa97ea0 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -11,6 +11,7 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG + select BUILDTIME_EXTABLE_SORT select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index b3069fd83468..60e6f07b7e32 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -523,7 +523,7 @@ */ #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr) \ .section __ex_table,"aw" ! \ - ASM_ULONG_INSN fault_addr, except_addr ! \ + .word (fault_addr - .), (except_addr - .) ! \ .previous diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 1960b87c1c8b..6f893d29f1b2 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -60,14 +60,15 @@ static inline long access_ok(int type, const void __user * addr, * use a 32bit (unsigned int) address here. */ +#define ARCH_HAS_RELATIVE_EXTABLE struct exception_table_entry { - unsigned long insn; /* address of insn that is allowed to fault. */ - unsigned long fixup; /* fixup routine */ + int insn; /* relative address of insn that is allowed to fault. */ + int fixup; /* relative address of fixup routine */ }; #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ ".section __ex_table,\"aw\"\n" \ - ASM_WORD_INSN #fault_addr ", " #except_addr "\n\t" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ ".previous\n" /* diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index f9064449908a..16dbe81c97c9 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -140,12 +140,6 @@ int fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *fix; - /* If we only stored 32bit addresses in the exception table we can drop - * out if we faulted on a 64bit address. */ - if ((sizeof(regs->iaoq[0]) > sizeof(fix->insn)) - && (regs->iaoq[0] >> 32)) - return 0; - fix = search_exception_tables(regs->iaoq[0]); if (fix) { struct exception_data *d; @@ -155,7 +149,8 @@ int fixup_exception(struct pt_regs *regs) d->fault_space = regs->isr; d->fault_addr = regs->ior; - regs->iaoq[0] = ((fix->fixup) & ~3); + regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup; + regs->iaoq[0] &= ~3; /* * NOTE: In some cases the faulting instruction * may be in the delay slot of a branch. We diff --git a/scripts/sortextable.c b/scripts/sortextable.c index 19d83647846c..a2c0d620ca80 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -283,6 +283,7 @@ do_file(char const *const fname) case EM_X86_64: case EM_S390: case EM_AARCH64: + case EM_PARISC: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: